blob: a16101ebae76d3dca67eddf352da27e12d9c4519 [file] [log] [blame]
#include <unistd.h>
#include <cstdlib>
#include <iostream>
#include <string>
#include <vector>
#include "base/command_line.h"
#include "base/files/file.h"
#include "base/files/file_path.h"
#include "base/no_destructor.h"
#include "base/strings/string_number_conversions.h"
#include "base/strings/string_util.h"
#include "base/time/default_clock.h"
// TODO(mpdenton) okay to include this? Otherwise I'm copying it into this file
// Tehcnically the std random number engines are banned in Chrome but if this
// used base::Rand* this turns milliseconds into hours.
#include "third_party/libFuzzer/src/FuzzerRandom.h"
#include "third_party/sqlite/fuzz/sql_query_grammar.pb.h"
#include "third_party/sqlite/fuzz/sql_query_proto_to_string.h"
#include "third_party/sqlite/fuzz/sql_run_queries.h"
using namespace sql_query_grammar;
// TODO(mpdenton):
// 2. Add functionality to start with a specific database so that the
// fuzzer doesn't waste so much time getting a sufficiently complicated
// database.
// 3. FTS3 Corpus
namespace {
constexpr int kMinNumInsertions = 15;
constexpr int kMaxNumInsertions = 20;
constexpr int kMinNumIndexes = 5;
constexpr int kMaxNumIndexes = 8;
constexpr int kMinNumSelects = 3;
constexpr int kMaxNumSelects = 6;
constexpr int kMinNumJoins = 3;
constexpr int kMaxNumJoins = 3;
constexpr int kMinNumUpdates = 15;
constexpr int kMaxNumUpdates = 20;
constexpr int kMinNumDeletes = 5;
constexpr int kMaxNumDeletes = 5;
constexpr int kMinNumOthers = 10;
constexpr int kMaxNumOthers = 10;
} // namespace
fuzzer::Random& GetRandom() {
static base::NoDestructor<fuzzer::Random> rand([] {
unsigned seed = base::DefaultClock::GetInstance()
->Now()
.ToDeltaSinceWindowsEpoch()
.InMicroseconds() +
getpid();
return fuzzer::Random(seed);
}());
return *rand;
}
// Inclusive range.
int RandInt(int min, int max) {
return GetRandom()(max - min + 1) + min;
}
void RandBytes(void* output, size_t output_len) {
uint8_t* out = static_cast<uint8_t*>(output);
for (size_t i = 0; i < output_len / sizeof(size_t); i++) {
size_t rand_num = GetRandom()();
for (size_t j = 0; j < sizeof(size_t); j++) {
*out = *reinterpret_cast<uint8_t*>(&rand_num);
out++;
rand_num >>= 8;
}
}
size_t rand_num = GetRandom()();
for (size_t j = 0; j < output_len % sizeof(size_t); j++) {
*out = *reinterpret_cast<uint8_t*>(&rand_num);
out++;
rand_num >>= 8;
}
}
std::string RandBytesAsString(size_t length) {
std::string result;
RandBytes(base::WriteInto(&result, length + 1), length);
return result;
}
uint64_t RandUint64() {
if (sizeof(size_t) == sizeof(uint64_t))
return GetRandom()();
CHECK(sizeof(size_t) == sizeof(uint32_t));
uint64_t rand = GetRandom()();
rand <<= 32;
rand |= GetRandom()();
return rand;
}
namespace i {
struct Table {
uint32_t table_num;
int num_columns;
std::vector<CastTypeName::CastTypeNameEnum> col_types;
std::vector<std::unique_ptr<Expr>> index_exprs;
};
struct Schema {
int num_tables;
std::vector<i::Table> tables;
};
} // namespace i
// WOW, a template AND a macro??? :)
template <typename T>
int GetRandomEnum(T is_valid_fn, int min, int max) {
int r;
while (!is_valid_fn(r = RandInt(min, max)))
;
return r;
}
#define RANDOM_ENUM(CLASS_NAME, ENUM_NAME) \
static_cast<CLASS_NAME::ENUM_NAME>( \
GetRandomEnum(CLASS_NAME::ENUM_NAME##_IsValid, \
CLASS_NAME::ENUM_NAME##_MIN, CLASS_NAME::ENUM_NAME##_MAX))
std::set<uint32_t> GetRandomNums(size_t size, uint32_t max_num) {
std::set<unsigned int> ret;
while (ret.size() < size)
ret.insert(RandInt(0, max_num));
return ret;
}
template <typename T>
std::set<T> GetRandomSubset(const std::set<T>& s, size_t size) {
std::set<T> ret;
std::set<uint32_t> indices = GetRandomNums(size, s.size() - 1);
auto it = s.begin();
for (unsigned int i = 0; i < s.size(); i++) {
if (indices.count(i) > 0) {
ret.insert(*it);
}
it++;
}
return ret;
}
inline ColumnDef* CreateDefaultColDef(ColumnDef* cd) {
cd->mutable_col()->set_column(0);
return cd;
}
inline ComplicatedExpr* CreateDefaultCompExpr(ComplicatedExpr* ce) {
ce->mutable_lit_val();
return ce;
}
inline void CreateColumn(Column* col_ptr, uint32_t col) {
col_ptr->set_column(col);
}
inline void CreateTableFromUint32(Table* table_ptr, uint32_t table) {
table_ptr->set_table(table);
}
inline void CreateSchemaTable(ExprSchemaTable* e, i::Table* table) {
CreateTableFromUint32(e->mutable_table_name(), table->table_num);
}
inline void CreateColumnExpr(Expr* e, uint32_t col, i::Table* table) {
ExprSchemaTableColumn* stc =
CreateDefaultCompExpr(e->mutable_comp_expr())->mutable_expr_stc();
CreateColumn(stc->mutable_col(), col);
if (table)
CreateTableFromUint32(stc->mutable_table(), table->table_num);
}
std::set<uint32_t> GenerateColumnList(ColumnList* ret, i::Table* table) {
std::set<uint32_t> cols;
for (int i = 0; i < RandInt(1, table->num_columns); i++) {
cols.insert(RandInt(0, table->num_columns - 1));
}
std::set<uint32_t> cols_copy = cols;
auto it = cols.begin();
CreateColumn(ret->mutable_col(), *it);
cols.erase(it);
ret->mutable_extra_cols()->Reserve(cols.size());
for (uint32_t col : cols) {
CreateColumn(ret->mutable_extra_cols()->Add(), col);
}
return cols_copy;
}
void GenerateNumericLit(NumericLiteral* nl) {
for (int i = 0; i < RandInt(1, 20); i++) {
nl->add_digits(RandInt(0, 9));
}
nl->set_decimal_point(true);
for (int i = 0; i < RandInt(1, 20); i++) {
nl->add_dec_digits(RandInt(0, 9));
}
}
void GenerateLiteralValue(LiteralValue* ret,
CastTypeName::CastTypeNameEnum type) {
if (RandInt(1, 10) == 1) {
ret->set_special_val(RANDOM_ENUM(LiteralValue, SpecialVal));
return;
}
if (type == CastTypeName::INTEGER ||
(type == CastTypeName::NUMERIC && RandInt(1, 2) == 1)) {
if (RandInt(1, 3) == 1)
ret->set_num_lit((int64_t)RandInt(1, 3));
else
ret->set_num_lit((int64_t)RandUint64());
} else if (type == CastTypeName::TEXT) {
if (RandInt(1, 3) == 1)
ret->set_string_lit("a");
else
// string literals too often have unreadable chars, so instead of rand
// bytes just use a couple extra #'s
ret->set_string_lit("#####");
} else if (type == CastTypeName::BLOB) {
if (RandInt(1, 3) == 1)
ret->set_blob_lit("a");
else
ret->set_blob_lit(RandBytesAsString(5));
} else if (type == CastTypeName::REAL) {
GenerateNumericLit(ret->mutable_numeric_lit());
} else {
ret->set_special_val(RANDOM_ENUM(LiteralValue, SpecialVal));
}
}
void GenerateValuesStatement(ValuesStatement* v,
i::Table* table,
std::set<uint32_t> cols) {
int rand_num_values = RandInt(1, 10);
if (rand_num_values > 1)
v->mutable_extra_expr_lists()->Reserve(rand_num_values - 1);
for (int i = 0; i < rand_num_values; i++) {
ExprList* el;
if (i == 0) {
el = v->mutable_expr_list();
} else {
el = v->mutable_extra_expr_lists()->Add();
}
auto it = cols.begin();
GenerateLiteralValue(el->mutable_expr()->mutable_lit_val(),
table->col_types[*it]);
it++;
el->mutable_extra_exprs()->Reserve(cols.size() - 1);
for (size_t i = 0; i < cols.size() - 1; i++) {
GenerateLiteralValue(el->mutable_extra_exprs()->Add()->mutable_lit_val(),
table->col_types[*it]);
it++;
}
}
}
void GenerateWhereStatement(WhereStatement* where,
i::Schema* schema,
i::Table* table,
bool join = false) {
BinaryExpr* we = where->mutable_expr()
->mutable_expr()
->mutable_comp_expr()
->mutable_binary_expr();
// TODO(mpdenton) exclude joins for now.
if (!join && table->index_exprs.size() != 0 && RandInt(1, 5) >= 4) {
// Use an indexed expression
*we->mutable_lhs() =
*table->index_exprs[RandInt(0, table->index_exprs.size() - 1)];
we->set_op(BINOP_LEQ);
GenerateLiteralValue(we->mutable_rhs()->mutable_lit_val(),
CastTypeName::NUMERIC);
return;
}
// Otherwise just use a simple predicate
uint32_t col = RandInt(0, table->num_columns - 1);
ExprSchemaTableColumn* stc =
we->mutable_lhs()->mutable_comp_expr()->mutable_expr_stc();
CreateColumn(stc->mutable_col(), col);
if (join)
CreateTableFromUint32(stc->mutable_table(), table->table_num);
if (table->col_types[col] == CastTypeName::BLOB) {
we->set_op(BINOP_NOTEQ);
we->mutable_rhs()->mutable_lit_val()->set_special_val(
LiteralValue::VAL_NULL);
} else if (table->col_types[col] == CastTypeName::TEXT) {
we->set_op(BINOP_REGEXP);
we->mutable_rhs()->mutable_lit_val()->set_string_lit(".*");
} else {
we->set_op(BINOP_LEQ);
we->mutable_rhs()->mutable_lit_val()->set_num_lit(RandUint64());
}
}
void GenerateInsertion(Insert* i, i::Schema* schema, i::Table* table) {
// TODO(mpdenton) generate With statement
// i->set_insert_type(RANDOM_ENUM(Insert, InsertType));
if (RandInt(1, 2) == 1)
i->set_insert_type(Insert::INSERT);
else
i->set_insert_type(Insert::REPLACE);
SchemaTableAsAlias* staa = i->mutable_staa();
CreateSchemaTable(staa->mutable_schema_table(), table);
if (RandInt(1, 5) >= 2) {
std::set<uint32_t> cols = GenerateColumnList(i->mutable_col_list(), table);
GenerateValuesStatement(i->mutable_values(), table, cols);
}
}
void GenerateUpdate(Update* u, i::Schema* schema, i::Table* table) {
SchemaTableAsAlias* staa = u->mutable_qtn()->mutable_staa();
CreateSchemaTable(staa->mutable_schema_table(), table);
ColEqualsExpr* cee = u->mutable_ucp2()->mutable_cee();
uint32_t col = RandInt(0, table->num_columns - 1);
CreateColumn(cee->mutable_col(), col);
GenerateLiteralValue(cee->mutable_expr()->mutable_lit_val(),
table->col_types[col]);
if (RandInt(1, 10) >= 2)
GenerateWhereStatement(u->mutable_ucp2()->mutable_where_stmt(), schema,
table);
}
void GenerateDelete(Delete* d, i::Schema* schema, i::Table* table) {
SchemaTableAsAlias* staa = d->mutable_qtn()->mutable_staa();
CreateSchemaTable(staa->mutable_schema_table(), table);
if (RandInt(1, 20) >= 2)
GenerateWhereStatement(d->mutable_where(), schema, table);
}
void GenerateCreateTable(CreateTable* ct, i::Schema* schema, i::Table* table) {
ct->set_if_not_exists(false);
if (RandInt(1, 4) == 1) {
ct->set_temp_modifier(TM_TEMP);
}
CreateSchemaTable(ct->mutable_schema_table(), table);
if (table->num_columns > 1)
ct->mutable_op1()->mutable_extra_col_defs()->Reserve(table->num_columns -
1);
for (int i = 0; i < table->num_columns; i++) {
ColumnDef* col_def;
if (i == 0)
col_def = ct->mutable_op1()->mutable_col_def();
else
col_def = ct->mutable_op1()->mutable_extra_col_defs()->Add();
CreateColumn(col_def->mutable_col(), i);
col_def->mutable_type_name()->mutable_ctn()->set_type_enum(
table->col_types[i]);
// Set default values
GenerateLiteralValue(
col_def->add_col_constraints()->mutable_opt2()->mutable_lit_val(),
table->col_types[i]);
}
}
bool IsNumeric(CastTypeName::CastTypeNameEnum type) {
return (type == CastTypeName::NUMERIC || type == CastTypeName::INTEGER ||
type == CastTypeName::REAL);
}
Expr* GenerateJoinConstaints(i::Table* table,
const std::vector<i::Table*>& join_tables) {
std::vector<i::Table*> all_tables = join_tables;
all_tables.push_back(table);
// Decide some columns have to be equal
std::vector<std::pair<ExprSchemaTableColumn*, ExprSchemaTableColumn*>>
equal_cols;
std::vector<BinaryOperator> comparison_ops;
// Would be better if the num_constraints
do {
ExprSchemaTableColumn* a = new ExprSchemaTableColumn;
ExprSchemaTableColumn* b = new ExprSchemaTableColumn;
int table_index_a = RandInt(0, all_tables.size() - 1);
CreateTableFromUint32(a->mutable_table(),
all_tables[table_index_a]->table_num);
int table_index_b;
while ((table_index_b = RandInt(0, all_tables.size() - 1)) == table_index_a)
;
CreateTableFromUint32(b->mutable_table(),
all_tables[table_index_b]->table_num);
uint32_t col_a = RandInt(0, all_tables[table_index_a]->num_columns - 1);
uint32_t col_b = RandInt(0, all_tables[table_index_b]->num_columns - 1);
CreateColumn(a->mutable_col(), col_a);
CreateColumn(b->mutable_col(), col_b);
equal_cols.push_back({a, b});
// If both columns are numeric, small chance of using a comparison op
// instead.
if (IsNumeric(all_tables[table_index_a]->col_types[col_a]) &&
IsNumeric(all_tables[table_index_b]->col_types[col_b]) &&
RandInt(1, 2) == 1)
comparison_ops.push_back(BINOP_LEQ);
else
comparison_ops.push_back(BINOP_EQ);
} while (RandInt(1, 3) >= 2);
// Actually generate the expressions.
Expr* initial_expr = new Expr;
Expr* curr_expr = initial_expr;
for (size_t i = 0; i < equal_cols.size() - 1; i++) {
BinaryExpr* bin_expr = CreateDefaultCompExpr(curr_expr->mutable_comp_expr())
->mutable_binary_expr();
BinaryExpr* lhs_bin_expr =
bin_expr->mutable_lhs()->mutable_comp_expr()->mutable_binary_expr();
lhs_bin_expr->mutable_lhs()->mutable_comp_expr()->set_allocated_expr_stc(
equal_cols[i].first);
lhs_bin_expr->set_op(comparison_ops[i]);
lhs_bin_expr->mutable_rhs()->mutable_comp_expr()->set_allocated_expr_stc(
equal_cols[i].second);
if (RandInt(1, 2) == 1)
bin_expr->set_op(BINOP_AND);
else
bin_expr->set_op(BINOP_OR);
curr_expr = bin_expr->mutable_rhs();
}
// Finish off final expr
size_t last_index = equal_cols.size() - 1;
BinaryExpr* bin_expr = CreateDefaultCompExpr(curr_expr->mutable_comp_expr())
->mutable_binary_expr();
bin_expr->mutable_lhs()->mutable_comp_expr()->set_allocated_expr_stc(
equal_cols[last_index].first);
bin_expr->set_op(comparison_ops[last_index]);
bin_expr->mutable_rhs()->mutable_comp_expr()->set_allocated_expr_stc(
equal_cols[last_index].second);
return initial_expr;
}
void GenerateFromStatement(FromStatement* from,
i::Schema* schema,
i::Table* table,
const std::vector<i::Table*>& join_tables) {
// TODO(mpdenton) join statements?
if (join_tables.size() == 0) {
SchemaTableAsAlias* staa =
from->mutable_tos3()->add_tos_list()->mutable_qtn()->mutable_staa();
CreateSchemaTable(staa->mutable_schema_table(), table);
return;
}
// Write some nice joins.
CreateSchemaTable(from->mutable_tos3()
->mutable_join_clause()
->mutable_tos()
->mutable_qtn()
->mutable_staa()
->mutable_schema_table(),
table);
// For each table in join_tables, write a JoinClauseCore that inner joins
// with some comparisons between any two columns
for (i::Table* curr_table : join_tables) {
JoinClauseCore* jcc =
from->mutable_tos3()->mutable_join_clause()->add_clauses();
// Just generate inner joins, fuzzer should be smart enough to find other
// join types.
jcc->mutable_join_op()->set_join_type(JoinOperator::INNER);
// Fill in the join clause core with the current table
CreateSchemaTable(jcc->mutable_tos()
->mutable_qtn()
->mutable_staa()
->mutable_schema_table(),
curr_table);
jcc->mutable_join_constraint()->set_allocated_on_expr(
GenerateJoinConstaints(table, join_tables));
}
// TODO(mpdenton) multiple Tables with aliases?
}
void GenerateGroupByStatement(GroupByStatement* gbs,
i::Schema* schema,
i::Table* table,
bool join = false) {
ExprSchemaTableColumn* stc = gbs->mutable_exprs()
->mutable_expr()
->mutable_comp_expr()
->mutable_expr_stc();
// fine to just pick a single random column.
CreateColumn(stc->mutable_col(), RandInt(0, table->num_columns - 1));
if (join)
CreateTableFromUint32(stc->mutable_table(), table->table_num);
}
std::set<uint32_t> GenerateSelectStatementCore(
SelectStatementCore* ssc,
i::Schema* schema,
i::Table* table,
std::vector<i::Table*> join_tables) {
if (RandInt(1, 2) == 1) {
ssc->set_s_or_d(SelectStatementCore::SELECT);
} else {
ssc->set_s_or_d(SelectStatementCore::SELECT_DISTINCT);
}
std::set<uint32_t> cols;
if (join_tables.size() > 0) {
// This is a join. Add columns from all the tables and include the table.
for (size_t i = 0; i <= join_tables.size(); i++) {
i::Table* table2;
if (i == join_tables.size())
table2 = table;
else
table2 = join_tables[i];
cols = GetRandomNums(RandInt(1, table2->num_columns - 1),
table2->num_columns - 1);
for (uint32_t col : cols) {
ExprSchemaTableColumn* stc = ssc->add_result_columns()
->mutable_eca()
->mutable_expr()
->mutable_comp_expr()
->mutable_expr_stc();
CreateColumn(stc->mutable_col(), col);
CreateTableFromUint32(stc->mutable_table(), table2->table_num);
}
}
} else {
if (RandInt(1, 2) == 1) {
cols = GetRandomNums(RandInt(1, table->num_columns - 1),
table->num_columns - 1);
for (uint32_t col : cols) {
CreateColumn(ssc->add_result_columns()->mutable_col(), col);
}
} else {
AggregateFn* af = ssc->add_result_columns()
->mutable_eca()
->mutable_expr()
->mutable_comp_expr()
->mutable_fn_expr()
->mutable_aggregate_fn();
af->set_fn_name(RANDOM_ENUM(AggregateFn, FnName));
af->set_distinct((bool)RandInt(0, 1));
CreateColumn(af->mutable_col1(), RandInt(0, table->num_columns - 1));
}
}
bool join = join_tables.size() > 0;
GenerateFromStatement(ssc->mutable_from(), schema, table, join_tables);
if (RandInt(1, 3) >= 2) {
GenerateWhereStatement(ssc->mutable_where(), schema, table, join);
}
if (RandInt(1, 3) == 1) {
GenerateGroupByStatement(ssc->mutable_groupby(), schema, table, join);
}
return cols;
}
void GenerateOrderByStatement(OrderByStatement* obs,
i::Schema* schema,
i::Table* table,
std::set<uint32_t> cols_tmp,
bool join = false) {
// TODO(mpdenton) exclude joins for now.
if (!join && table->index_exprs.size() != 0 && RandInt(1, 5) >= 4) {
// Use an indexed expression
*obs->mutable_ord_term()->mutable_expr() =
*table->index_exprs[RandInt(0, table->index_exprs.size() - 1)];
return;
}
std::set<uint32_t> cols =
GetRandomSubset(cols_tmp, RandInt(1, cols_tmp.size() - 1));
std::vector<uint32_t> v;
std::copy(cols.begin(), cols.end(), std::back_inserter(v));
std::shuffle(v.begin(), v.end(), GetRandom());
i::Table* table_in_col = join ? table : nullptr;
auto it = v.begin();
CreateColumnExpr(obs->mutable_ord_term()->mutable_expr(), *it, table_in_col);
it++;
for (size_t i = 0; i < v.size() - 1; i++) {
CreateColumnExpr(obs->add_extra_ord_terms()->mutable_expr(), *it,
table_in_col);
it++;
}
}
void GenerateSelect(Select* s,
i::Schema* schema,
i::Table* table,
std::vector<i::Table*> join_tables = {}) {
// Could be empty.
std::set<uint32_t> cols = GenerateSelectStatementCore(
s->mutable_select_core(), schema, table, join_tables);
// TODO(mpdenton)
if (RandInt(1, 2) == 1) {
GenerateOrderByStatement(s->mutable_orderby(), schema, table,
GetRandomNums(RandInt(1, table->num_columns - 1),
table->num_columns - 1),
join_tables.size() > 0);
}
// Limits are not very interesting from a corpus standpoint.
}
void InsertUpdateSelectOrDelete(SQLQuery* q,
i::Schema* main_schema,
int table_num) {
int rand = RandInt(1, 4);
if (rand == 1) {
GenerateInsertion(q->mutable_insert(), main_schema,
&main_schema->tables[table_num]);
} else if (rand == 2) {
GenerateDelete(q->mutable_delete_(), main_schema,
&main_schema->tables[table_num]);
} else if (rand == 3) {
GenerateUpdate(q->mutable_update(), main_schema,
&main_schema->tables[table_num]);
} else if (rand == 4) {
GenerateSelect(q->mutable_select(), main_schema,
&main_schema->tables[table_num]);
}
}
inline ExprSchemaTableColumn* GetSTC(Expr* expr) {
return CreateDefaultCompExpr(expr->mutable_comp_expr())->mutable_expr_stc();
}
Expr* GenerateCreateIndex(CreateIndex* ci,
i::Schema* schema,
i::Table* table,
std::set<uint32_t>& free_index_nums) {
CHECK(free_index_nums.size() != 0);
std::set<uint32_t> index_num_set = GetRandomSubset(free_index_nums, 1);
uint32_t index_num = *index_num_set.begin();
ci->mutable_index()->set_index(index_num);
free_index_nums.erase(index_num);
CreateTableFromUint32(ci->mutable_table(), table->table_num);
if (RandInt(1, 3) >= 2) {
Expr* expr = new Expr;
int expr_type = RandInt(1, 2);
if (expr_type == 1) {
// Select two random columns of the table, add or subtract them.
uint32_t col1 = RandInt(0, table->num_columns - 1);
uint32_t col2 = RandInt(0, table->num_columns - 1);
BinaryExpr* bin_expr = CreateDefaultCompExpr(expr->mutable_comp_expr())
->mutable_binary_expr();
ExprSchemaTableColumn* lhs_stc = GetSTC(bin_expr->mutable_lhs());
ExprSchemaTableColumn* rhs_stc = GetSTC(bin_expr->mutable_rhs());
CreateColumn(lhs_stc->mutable_col(), col1);
CreateColumn(rhs_stc->mutable_col(), col2);
// TODO(mpdenton) perhaps set the tables here? The tables must not be set
// for CREATE INDEX, but MUST be set for JOINs to avoid ambiguous columns.
// Does it still count as the same expression if the table is included in
// the JOIN but not the CREATE INDEX?
if (RandInt(1, 2) == 1)
bin_expr->set_op(BINOP_PLUS);
else
bin_expr->set_op(BINOP_MINUS);
} else if (expr_type == 2) {
// Or, apply abs to a single column.
OneArgFn* oaf = CreateDefaultCompExpr(expr->mutable_comp_expr())
->mutable_fn_expr()
->mutable_simple_fn()
->mutable_one_arg_fn();
oaf->set_fn_enum(OneArgFn::ABS);
uint32_t col = RandInt(0, table->num_columns - 1);
ExprSchemaTableColumn* stc = GetSTC(oaf->mutable_arg1());
CreateColumn(stc->mutable_col(), col);
// TODO(mpdenton) see above about setting tables.
}
ci->mutable_icol_list()->mutable_indexed_col()->set_allocated_expr(expr);
// Make a copy that isn't owned by another protobuf
Expr* ret_expr = new Expr;
*ret_expr = *expr;
return ret_expr;
}
IndexedColumnList* icol_list = ci->mutable_icol_list();
std::set<uint32_t> cols =
GetRandomNums(RandInt(1, table->num_columns - 1), table->num_columns - 1);
bool first;
for (uint32_t col : cols) {
IndexedColumn* icol;
if (first) {
first = false;
icol = icol_list->mutable_indexed_col();
} else {
icol = icol_list->add_extra_indexed_cols();
}
CreateColumn(icol->mutable_col(), col);
}
return NULL;
}
namespace {
enum class GenQueryInstr {
SUCCESS,
MOVE_ON,
TRY_AGAIN,
};
}
template <typename T>
void GenQueries(SQLQueries& queries,
int min,
int max,
bool txn,
int num_tables,
T gen) {
queries.mutable_extra_queries()->Reserve(queries.extra_queries_size() + max +
2);
SQLQuery* q;
if (txn) {
q = new SQLQuery;
q->mutable_begin_txn(); // constructs a begin txn.
queries.mutable_extra_queries()->AddAllocated(q);
}
for (int i = 0; i < num_tables; i++) {
for (int j = 0; j < RandInt(min, max); j++) {
// continue; // TODO(mpdenton)
q = new SQLQuery;
GenQueryInstr success = gen(q, i);
// Try again
if (success != GenQueryInstr::SUCCESS) {
if (success == GenQueryInstr::TRY_AGAIN)
j--;
delete q;
continue;
}
queries.mutable_extra_queries()->AddAllocated(q);
}
}
if (txn) {
q = new SQLQuery;
q->mutable_commit_txn(); // constructs a begin txn.
queries.mutable_extra_queries()->AddAllocated(q);
}
}
void FirstCreateTable(CreateTable* ct) {
ct->mutable_schema_table()->mutable_schema_name()->set_schema(5);
ct->mutable_schema_table()->mutable_schema_name()->set_main(false);
ct->mutable_schema_table()->mutable_schema_name()->set_temp(false);
ct->mutable_schema_table()->mutable_table_name()->set_table(0);
ct->set_if_not_exists(false);
ct->mutable_op();
}
SQLQueries GenCorpusEntry() {
// The answer is no, I free nothing at any point.
// Create the tables, and attached databases with tables
// Schema schemas[i::kNumSchemas];
// for (int i = 0; i < i::kNumSchemas; i++) {
// // schemas[i] = Schema{
// // .num_tables = RandInt(1, 5);
// // };
// }
SQLQueries queries;
FirstCreateTable(queries.mutable_create_table());
// Just get rid of the first CreateTable, it will error out but not screw up
// anything below
i::Schema main_schema;
main_schema.num_tables = RandInt(1, 5);
std::set<uint32_t> free_index_nums;
for (uint32_t i = 0; i < 10; i++) {
free_index_nums.insert(i);
}
GenQueries(
queries, 1, 1, false, main_schema.num_tables, [&](SQLQuery* q, int i) {
i::Table t = i::Table{
.table_num = i,
.num_columns = RandInt(1, 8),
};
for (int j = 0; j < t.num_columns; j++) {
t.col_types.push_back(RANDOM_ENUM(CastTypeName, CastTypeNameEnum));
}
main_schema.tables.push_back(std::move(t));
GenerateCreateTable(q->mutable_create_table(), &main_schema,
&main_schema.tables[i]);
return GenQueryInstr::SUCCESS;
});
GenQueries(queries, kMinNumIndexes, kMaxNumIndexes, false,
main_schema.num_tables, [&](SQLQuery* q, int i) {
if (free_index_nums.size() == 0)
return GenQueryInstr::MOVE_ON;
Expr* index_expr =
GenerateCreateIndex(q->mutable_create_index(), &main_schema,
&main_schema.tables[i], free_index_nums);
if (index_expr)
main_schema.tables[i].index_exprs.emplace_back(index_expr);
return GenQueryInstr::SUCCESS;
});
// Generate a bunch of inserts in a transaction (for speed)
GenQueries(queries, kMinNumInsertions, kMaxNumInsertions, true,
main_schema.num_tables, [&](SQLQuery* q, int i) {
GenerateInsertion(q->mutable_insert(), &main_schema,
&main_schema.tables[i]);
return GenQueryInstr::SUCCESS;
});
// Generate a bunch of interesting selects with GroupBys, OrderBys, aggregate
// functions, etc.
GenQueries(queries, kMinNumSelects, kMaxNumSelects, false,
main_schema.num_tables, [&](SQLQuery* q, int i) {
GenerateSelect(q->mutable_select(), &main_schema,
&main_schema.tables[i]);
return GenQueryInstr::SUCCESS;
});
// Generate lots of interesting JOINs.
if (main_schema.num_tables > 1) {
GenQueries(queries, kMinNumJoins, kMaxNumJoins, false,
main_schema.num_tables, [&](SQLQuery* q, int i) {
std::set<uint32_t> tables =
GetRandomNums(RandInt(1, main_schema.num_tables - 1),
main_schema.num_tables - 1);
tables.erase((uint32_t)i);
if (tables.size() == 0) {
// try again
return GenQueryInstr::TRY_AGAIN;
}
std::vector<i::Table*> tables_p;
for (uint32_t t : tables) {
tables_p.push_back(&main_schema.tables[t]);
}
GenerateSelect(q->mutable_select(), &main_schema,
&main_schema.tables[i], tables_p);
return GenQueryInstr::SUCCESS;
});
}
// Generate a bunch of interesting updates.
GenQueries(queries, kMinNumUpdates, kMaxNumUpdates, true,
main_schema.num_tables, [&](SQLQuery* q, int i) {
GenerateUpdate(q->mutable_update(), &main_schema,
&main_schema.tables[i]);
return GenQueryInstr::SUCCESS;
});
// Generate interesting deletes.
GenQueries(queries, kMinNumDeletes, kMaxNumDeletes, true,
main_schema.num_tables, [&](SQLQuery* q, int i) {
GenerateDelete(q->mutable_delete_(), &main_schema,
&main_schema.tables[i]);
return GenQueryInstr::SUCCESS;
});
// Do everything except joins.
GenQueries(queries, kMinNumOthers, kMaxNumOthers, true,
main_schema.num_tables, [&](SQLQuery* q, int i) {
InsertUpdateSelectOrDelete(q, &main_schema, i);
return GenQueryInstr::SUCCESS;
});
return queries;
}
int main(int argc, char** argv) {
base::CommandLine cl(argc, argv);
int num_entries;
if (!cl.HasSwitch("num_entries"))
LOG(FATAL) << "num_entries not specified.";
if (!base::StringToInt(cl.GetSwitchValueASCII("num_entries"), &num_entries))
LOG(FATAL) << "num_entries not parseable as an int.";
bool to_stdout = true;
base::FilePath dir_path;
if (cl.HasSwitch("corpus_dir")) {
to_stdout = false;
dir_path = cl.GetSwitchValuePath("corpus_dir");
base::File dir(dir_path, base::File::FLAG_OPEN | base::File::FLAG_READ);
if (!dir.IsValid())
LOG(FATAL) << "corpus_dir " << dir_path << " could not be opened.";
base::File::Info dir_info;
if (!dir.GetInfo(&dir_info))
LOG(FATAL) << "Could not get corpus_dir " << dir_path << " file info.";
if (!dir_info.is_directory)
LOG(FATAL) << "corpus_dir " << dir_path << " is not a directory.";
} else {
LOG(INFO) << "corpus_dir not specified, writing serialized output to "
"stdout instead.";
}
int last_index = 0;
for (int total = 0; total < num_entries; total++) {
SQLQueries queries = GenCorpusEntry();
std::vector<std::string> queries_str;
for (int i = 0; i < queries.extra_queries_size(); i++) {
queries_str.push_back(
sql_fuzzer::SQLQueryToString(queries.extra_queries(i)));
if (to_stdout || ::getenv("LPM_DUMP_NATIVE_INPUT"))
std::cout << queries_str[i] << std::endl;
}
if (getenv("PRINT_SQLITE_ERRORS"))
sql_fuzzer::RunSqlQueries(queries_str, ::getenv("LPM_SQLITE_TRACE"));
// If we just want to print to stdout, skip the directory stuff below.
if (to_stdout)
continue;
// It's okay to serialize without all required fields, as LPM uses
// ParsePartial* as well.
std::string proto_text;
if (!queries.SerializePartialToString(&proto_text))
LOG(FATAL) << "Could not serialize queries to string.";
bool found_file = false;
while (!found_file) {
base::FilePath file_path =
dir_path.Append("corpus_queries" + std::to_string(last_index));
base::File file(file_path,
base::File::FLAG_CREATE | base::File::FLAG_WRITE);
if (file.created()) {
found_file = true;
if (file.Write(0, proto_text.data(), proto_text.length()) < 0) {
LOG(FATAL) << "Failed to write to file " << file_path;
}
}
last_index++;
}
}
return 0;
}