blob: 73fbb514513e8d0d239cdffd1439b9e7e4ac2fb2 [file] [log] [blame]
// Copyright 2008 The RE2 Authors. All Rights Reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// String generator: generates all possible strings of up to
// maxlen letters using the set of letters in alpha.
// Fetch strings using a Java-like Next()/HasNext() interface.
#include <stdint.h>
#include <random>
#include <string>
#include <vector>
#include "util/util.h"
#include "re2/stringpiece.h"
namespace re2 {
class StringGenerator {
StringGenerator(int maxlen, const std::vector<std::string>& alphabet);
~StringGenerator() {}
const StringPiece& Next();
bool HasNext() { return hasnext_; }
// Resets generator to start sequence over.
void Reset();
// Causes generator to emit random strings for next n calls to Next().
void Random(int32_t seed, int n);
// Causes generator to emit a NULL as the next call.
void GenerateNULL();
bool IncrementDigits();
bool RandomDigits();
// Global state.
int maxlen_; // Maximum length string to generate.
std::vector<std::string> alphabet_; // Alphabet, one string per letter.
// Iteration state.
StringPiece sp_; // Last StringPiece returned by Next().
std::string s_; // String data in last StringPiece returned by Next().
bool hasnext_; // Whether Next() can be called again.
std::vector<int> digits_; // Alphabet indices for next string.
bool generate_null_; // Whether to generate a NULL StringPiece next.
bool random_; // Whether generated strings are random.
int nrandom_; // Number of random strings left to generate.
std::minstd_rand0 rng_; // Random number generator.
StringGenerator(const StringGenerator&) = delete;
StringGenerator& operator=(const StringGenerator&) = delete;
// Generates and returns a string over binary alphabet {0,1} that contains
// all possible binary sequences of length n as subsequences. The obvious
// brute force method would generate a string of length n * 2^n, but this
// generates a string of length n-1 + 2^n called a De Bruijn cycle.
// See Knuth, The Art of Computer Programming, Vol 2, Exercise 3.2.2 #17.
// Such a string is useful for testing a DFA. If you have a DFA
// where distinct last n bytes implies distinct states, then running on a
// DeBruijn string causes the DFA to need to create a new state at every
// position in the input, never reusing any states until it gets to the
// end of the string. This is the worst possible case for DFA execution.
std::string DeBruijnString(int n);
} // namespace re2