sqlite/sqlite-src-3280000/ext/fts5/test/fts5_common.tcl - chromium/src/third_party - Git at Google

 # 2014 Dec 19
 #
 # The author disclaims copyright to this source code.  In place of
 # a legal notice, here is a blessing:
 #
 #    May you do good and not evil.
 #    May you find forgiveness for yourself and forgive others.
 #    May you share freely, never taking more than you give.
 #
 #***********************************************************************
 #

 if {![info exists testdir]} {
   set testdir [file join [file dirname [info script]] .. .. .. test]
 }
 source $testdir/tester.tcl

 ifcapable !fts5 {
   proc return_if_no_fts5 {} {
     finish_test
     return -code return
   }
   return
 } else {
   proc return_if_no_fts5 {} {}
 }

 catch {
   sqlite3_fts5_may_be_corrupt 0
   reset_db
 }

 proc fts5_test_poslist {cmd} {
   set res [list]
   for {set i 0} {$i < [$cmd xInstCount]} {incr i} {
     lappend res [string map {{ } .} [$cmd xInst $i]]
   }
   set res
 }

 proc fts5_test_poslist2 {cmd} {
   set res [list]

   for {set i 0} {$i < [$cmd xPhraseCount]} {incr i} {
     $cmd xPhraseForeach $i c o {
       lappend res $i.$c.$o
     }
   }

   #set res
   sort_poslist $res
 }

 proc fts5_test_collist {cmd} {
   set res [list]

   for {set i 0} {$i < [$cmd xPhraseCount]} {incr i} {
     $cmd xPhraseColumnForeach $i c { lappend res $i.$c }
   }

   set res
 }

 proc fts5_test_columnsize {cmd} {
   set res [list]
   for {set i 0} {$i < [$cmd xColumnCount]} {incr i} {
     lappend res [$cmd xColumnSize $i]
   }
   set res
 }

 proc fts5_test_columntext {cmd} {
   set res [list]
   for {set i 0} {$i < [$cmd xColumnCount]} {incr i} {
     lappend res [$cmd xColumnText $i]
   }
   set res
 }

 proc fts5_test_columntotalsize {cmd} {
   set res [list]
   for {set i 0} {$i < [$cmd xColumnCount]} {incr i} {
     lappend res [$cmd xColumnTotalSize $i]
   }
   set res
 }

 proc test_append_token {varname token iStart iEnd} {
   upvar $varname var
   lappend var $token
   return "SQLITE_OK"
 }
 proc fts5_test_tokenize {cmd} {
   set res [list]
   for {set i 0} {$i < [$cmd xColumnCount]} {incr i} {
     set tokens [list]
     $cmd xTokenize [$cmd xColumnText $i] [list test_append_token tokens]
     lappend res $tokens
   }
   set res
 }

 proc fts5_test_rowcount {cmd} {
   $cmd xRowCount
 }

 proc test_queryphrase_cb {cnt cmd} {
   upvar $cnt L
   for {set i 0} {$i < [$cmd xInstCount]} {incr i} {
     foreach {ip ic io} [$cmd xInst $i] break
     set A($ic) 1
   }
   foreach ic [array names A] {
     lset L $ic [expr {[lindex $L $ic] + 1}]
   }
 }
 proc fts5_test_queryphrase {cmd} {
   set res [list]
   for {set i 0} {$i < [$cmd xPhraseCount]} {incr i} {
     set cnt [list]
     for {set j 0} {$j < [$cmd xColumnCount]} {incr j} { lappend cnt 0 }
     $cmd xQueryPhrase $i [list test_queryphrase_cb cnt]
     lappend res $cnt
   }
   set res
 }

 proc fts5_test_phrasecount {cmd} {
   $cmd xPhraseCount
 }

 proc fts5_test_all {cmd} {
   set res [list]
   lappend res columnsize      [fts5_test_columnsize $cmd]
   lappend res columntext      [fts5_test_columntext $cmd]
   lappend res columntotalsize [fts5_test_columntotalsize $cmd]
   lappend res poslist         [fts5_test_poslist $cmd]
   lappend res tokenize        [fts5_test_tokenize $cmd]
   lappend res rowcount        [fts5_test_rowcount $cmd]
   set res
 }

 proc fts5_aux_test_functions {db} {
   foreach f {
     fts5_test_columnsize
     fts5_test_columntext
     fts5_test_columntotalsize
     fts5_test_poslist
     fts5_test_poslist2
     fts5_test_collist
     fts5_test_tokenize
     fts5_test_rowcount
     fts5_test_all

     fts5_test_queryphrase
     fts5_test_phrasecount
   } {
     sqlite3_fts5_create_function $db $f $f
   }
 }

 proc fts5_segcount {tbl} {
   set N 0
   foreach n [fts5_level_segs $tbl] { incr N $n }
   set N
 }

 proc fts5_level_segs {tbl} {
   set sql "SELECT fts5_decode(rowid,block) aS r FROM ${tbl}_data WHERE rowid=10"
   set ret [list]
   foreach L [lrange [db one $sql] 1 end] {
     lappend ret [expr [llength $L] - 3]
   }
   set ret
 }

 proc fts5_level_segids {tbl} {
   set sql "SELECT fts5_decode(rowid,block) aS r FROM ${tbl}_data WHERE rowid=10"
   set ret [list]
   foreach L [lrange [db one $sql] 1 end] {
     set lvl [list]
     foreach S [lrange $L 3 end] {
       regexp {id=([1234567890]*)} $S -> segid
       lappend lvl $segid
     }
     lappend ret $lvl
   }
   set ret
 }

 proc fts5_rnddoc {n} {
   set map [list 0 a  1 b  2 c  3 d  4 e  5 f  6 g  7 h  8 i  9 j]
   set doc [list]
   for {set i 0} {$i < $n} {incr i} {
     lappend doc "x[string map $map [format %.3d [expr int(rand()*1000)]]]"
   }
   set doc
 }

 #-------------------------------------------------------------------------
 # Usage:
 #
 #   nearset aCol ?-pc VARNAME? ?-near N? ?-col C? -- phrase1 phrase2...
 #
 # This command is used to test if a document (set of column values) matches
 # the logical equivalent of a single FTS5 NEAR() clump and, if so, return
 # the equivalent of an FTS5 position list.
 #
 # Parameter $aCol is passed a list of the column values for the document
 # to test. Parameters $phrase1 and so on are the phrases.
 #
 # The result is a list of phrase hits. Each phrase hit is formatted as
 # three integers separated by "." characters, in the following format:
 #
 #   <phrase number> . <column number> . <token offset>
 #
 # Options:
 #
 #   -near N        (NEAR distance. Default 10)
 #   -col  C        (List of column indexes to match against)
 #   -pc   VARNAME  (variable in caller frame to use for phrase numbering)
 #   -dict VARNAME  (array in caller frame to use for synonyms)
 #
 proc nearset {aCol args} {

   # Process the command line options.
   #
   set O(-near) 10
   set O(-col)  {}
   set O(-pc)   ""
   set O(-dict) ""

   set nOpt [lsearch -exact $args --]
   if {$nOpt<0} { error "no -- option" }

   # Set $lPhrase to be a list of phrases. $nPhrase its length.
   set lPhrase [lrange $args [expr $nOpt+1] end]
   set nPhrase [llength $lPhrase]

   foreach {k v} [lrange $args 0 [expr $nOpt-1]] {
     if {[info exists O($k)]==0} { error "unrecognized option $k" }
     set O($k) $v
   }

   if {$O(-pc) == ""} {
     set counter 0
   } else {
     upvar $O(-pc) counter
   }

   if {$O(-dict)!=""} { upvar $O(-dict) aDict }

   for {set j 0} {$j < [llength $aCol]} {incr j} {
     for {set i 0} {$i < $nPhrase} {incr i} {
       set A($j,$i) [list]
     }
   }

   # Loop through each column of the current row.
   for {set iCol 0} {$iCol < [llength $aCol]} {incr iCol} {

     # If there is a column filter, test whether this column is excluded. If
     # so, skip to the next iteration of this loop. Otherwise, set zCol to the
     # column value and nToken to the number of tokens that comprise it.
     if {$O(-col)!="" && [lsearch $O(-col) $iCol]<0} continue
     set zCol [lindex $aCol $iCol]
     set nToken [llength $zCol]

     # Each iteration of the following loop searches a substring of the
     # column value for phrase matches. The last token of the substring
     # is token $iLast of the column value. The first token is:
     #
     #   iFirst = ($iLast - $O(-near) - 1)
     #
     # where $sz is the length of the phrase being searched for. A phrase
     # counts as matching the substring if its first token lies on or before
     # $iLast and its last token on or after $iFirst.
     #
     # For example, if the query is "NEAR(a+b c, 2)" and the column value:
     #
     #   "x x x x A B x x C x"
     #    0 1 2 3 4 5 6 7 8 9"
     #
     # when (iLast==8 && iFirst=5) the range will contain both phrases and
     # so both instances can be added to the output poslists.
     #
     set iLast [expr $O(-near) >= $nToken ? $nToken - 1 : $O(-near)]
     for { } {$iLast < $nToken} {incr iLast} {

       catch { array unset B }

       for {set iPhrase 0} {$iPhrase<$nPhrase} {incr iPhrase} {
         set p [lindex $lPhrase $iPhrase]
         set nPm1 [expr {[llength $p] - 1}]
         set iFirst [expr $iLast - $O(-near) - [llength $p]]

         for {set i $iFirst} {$i <= $iLast} {incr i} {
           set lCand [lrange $zCol $i [expr $i+$nPm1]]
           set bMatch 1
           foreach tok $p term $lCand {
             if {[nearset_match aDict $tok $term]==0} { set bMatch 0 ; break }
           }
           if {$bMatch} { lappend B($iPhrase) $i }
         }

         if {![info exists B($iPhrase)]} break
       }

       if {$iPhrase==$nPhrase} {
         for {set iPhrase 0} {$iPhrase<$nPhrase} {incr iPhrase} {
           set A($iCol,$iPhrase) [concat $A($iCol,$iPhrase) $B($iPhrase)]
           set A($iCol,$iPhrase) [lsort -integer -uniq $A($iCol,$iPhrase)]
         }
       }
     }
   }

   set res [list]
   #puts [array names A]

   for {set iPhrase 0} {$iPhrase<$nPhrase} {incr iPhrase} {
     for {set iCol 0} {$iCol < [llength $aCol]} {incr iCol} {
       foreach a $A($iCol,$iPhrase) {
         lappend res "$counter.$iCol.$a"
       }
     }
     incr counter
   }

   #puts "$aCol -> $res"
   sort_poslist $res
 }

 proc nearset_match {aDictVar tok term} {
   if {[string match $tok $term]} { return 1 }

   upvar $aDictVar aDict
   if {[info exists aDict($tok)]} {
     foreach s $aDict($tok) {
       if {[string match $s $term]} { return 1 }
     }
   }
   return 0;
 }

 #-------------------------------------------------------------------------
 # Usage:
 #
 #   sort_poslist LIST
 #
 # Sort a position list of the type returned by command [nearset]
 #
 proc sort_poslist {L} {
   lsort -command instcompare $L
 }
 proc instcompare {lhs rhs} {
   foreach {p1 c1 o1} [split $lhs .] {}
   foreach {p2 c2 o2} [split $rhs .] {}

   set res [expr $c1 - $c2]
   if {$res==0} { set res [expr $o1 - $o2] }
   if {$res==0} { set res [expr $p1 - $p2] }

   return $res
 }

 #-------------------------------------------------------------------------
 # Logical operators used by the commands returned by fts5_tcl_expr().
 #
 proc AND {args} {
   foreach a $args {
     if {[llength $a]==0} { return [list] }
   }
   sort_poslist [concat {*}$args]
 }
 proc OR {args} {
   sort_poslist [concat {*}$args]
 }
 proc NOT {a b} {
   if {[llength $b]>0} { return [list] }
   return $a
 }

 #-------------------------------------------------------------------------
 # This command is similar to [split], except that it also provides the
 # start and end offsets of each token. For example:
 #
 #   [fts5_tokenize_split "abc d ef"] -> {abc 0 3 d 4 5 ef 6 8}
 #

 proc gobble_whitespace {textvar} {
   upvar $textvar t
   regexp {([ ]*)(.*)} $t -> space t
   return [string length $space]
 }

 proc gobble_text {textvar wordvar} {
   upvar $textvar t
   upvar $wordvar w
   regexp {([^ ]*)(.*)} $t -> w t
   return [string length $w]
 }

 proc fts5_tokenize_split {text} {
   set token ""
   set ret [list]
   set iOff [gobble_whitespace text]
   while {[set nToken [gobble_text text word]]} {
     lappend ret $word $iOff [expr $iOff+$nToken]
     incr iOff $nToken
     incr iOff [gobble_whitespace text]
   }

   set ret
 }

 #-------------------------------------------------------------------------
 #
 proc foreach_detail_mode {prefix script} {
   set saved $::testprefix
   foreach d [list full col none] {
     set s [string map [list %DETAIL% $d] $script]
     set ::detail $d
     set ::testprefix "$prefix-$d"
     reset_db
     uplevel $s
     unset ::detail
   }
   set ::testprefix $saved
 }

 proc detail_check {} {
   if {$::detail != "none" && $::detail!="full" && $::detail!="col"} {
     error "not in foreach_detail_mode {...} block"
   }
 }
 proc detail_is_none {} { detail_check ; expr {$::detail == "none"} }
 proc detail_is_col {}  { detail_check ; expr {$::detail == "col" } }
 proc detail_is_full {} { detail_check ; expr {$::detail == "full"} }


 #-------------------------------------------------------------------------
 # Convert a poslist of the type returned by fts5_test_poslist() to a
 # collist as returned by fts5_test_collist().
 #
 proc fts5_poslist2collist {poslist} {
   set res [list]
   foreach h $poslist {
     regexp {(.*)\.[1234567890]+} $h -> cand
     lappend res $cand
   }
   set res [lsort -command fts5_collist_elem_compare -unique $res]
   return $res
 }

 # Comparison function used by fts5_poslist2collist to sort collist entries.
 proc fts5_collist_elem_compare {a b} {
   foreach {a1 a2} [split $a .] {}
   foreach {b1 b2} [split $b .] {}

   if {$a1==$b1} { return [expr $a2 - $b2] }
   return [expr $a1 - $b1]
 }


 #--------------------------------------------------------------------------
 # Construct and return a tcl list equivalent to that returned by the SQL
 # query executed against database handle [db]:
 #
 #   SELECT
 #     rowid,
 #     fts5_test_poslist($tbl),
 #     fts5_test_collist($tbl)
 #   FROM $tbl('$expr')
 #   ORDER BY rowid $order;
 #
 proc fts5_query_data {expr tbl {order ASC} {aDictVar ""}} {

   # Figure out the set of columns in the FTS5 table. This routine does
   # not handle tables with UNINDEXED columns, but if it did, it would
   # have to be here.
   db eval "PRAGMA table_info = $tbl" x { lappend lCols $x(name) }

   set d ""
   if {$aDictVar != ""} {
     upvar $aDictVar aDict
     set d aDict
   }

   set cols ""
   foreach e $lCols { append cols ", '$e'" }
   set tclexpr [db one [subst -novar {
     SELECT fts5_expr_tcl( $expr, 'nearset $cols -dict $d -pc ::pc' [set cols] )
   }]]

   set res [list]
   db eval "SELECT rowid, * FROM $tbl ORDER BY rowid $order" x {
     set cols [list]
     foreach col $lCols { lappend cols $x($col) }

     set ::pc 0
     set rowdata [eval $tclexpr]
     if {$rowdata != ""} {
       lappend res $x(rowid) $rowdata [fts5_poslist2collist $rowdata]
     }
   }

   set res
 }

 #-------------------------------------------------------------------------
 # Similar to [fts5_query_data], but omit the collist field.
 #
 proc fts5_poslist_data {expr tbl {order ASC} {aDictVar ""}} {
   set res [list]

   if {$aDictVar!=""} {
     upvar $aDictVar aDict
     set dict aDict
   } else {
     set dict ""
   }

   foreach {rowid poslist collist} [fts5_query_data $expr $tbl $order $dict] {
     lappend res $rowid $poslist
   }
   set res
 }

 proc fts5_collist_data {expr tbl {order ASC} {aDictVar ""}} {
   set res [list]

   if {$aDictVar!=""} {
     upvar $aDictVar aDict
     set dict aDict
   } else {
     set dict ""
   }

   foreach {rowid poslist collist} [fts5_query_data $expr $tbl $order $dict] {
     lappend res $rowid $collist
   }
   set res
 }

 #-------------------------------------------------------------------------
 #

 # This command will only work inside a [foreach_detail_mode] block. It tests
 # whether or not expression $expr run on FTS5 table $tbl is supported by
 # the current mode. If so, 1 is returned. If not, 0.
 #
 #   detail=full    (all queries supported)
 #   detail=col     (all but phrase queries and NEAR queries)
 #   detail=none    (all but phrase queries, NEAR queries, and column filters)
 #
 proc fts5_expr_ok {expr tbl} {

   if {![detail_is_full]} {
     set nearset "nearset_rc"
     if {[detail_is_col]} { set nearset "nearset_rf" }

     set ::expr_not_ok 0
     db eval "PRAGMA table_info = $tbl" x { lappend lCols $x(name) }

     set cols ""
     foreach e $lCols { append cols ", '$e'" }
     set ::pc 0
     set tclexpr [db one [subst -novar {
       SELECT fts5_expr_tcl( $expr, '[set nearset] $cols -pc ::pc' [set cols] )
     }]]
     eval $tclexpr
     if {$::expr_not_ok} { return 0 }
   }

   return 1
 }

 # Helper for [fts5_expr_ok]
 proc nearset_rf {aCol args} {
   set idx [lsearch -exact $args --]
   if {$idx != [llength $args]-2 || [llength [lindex $args end]]!=1} {
     set ::expr_not_ok 1
   }
   list
 }

 # Helper for [fts5_expr_ok]
 proc nearset_rc {aCol args} {
   nearset_rf $aCol {*}$args
   if {[lsearch $args -col]>=0} {
     set ::expr_not_ok 1
   }
   list
 }


 #-------------------------------------------------------------------------
 # Code for a simple Tcl tokenizer that supports synonyms at query time.
 #
 proc tclnum_tokenize {mode tflags text} {
   foreach {w iStart iEnd} [fts5_tokenize_split $text] {
     sqlite3_fts5_token $w $iStart $iEnd
     if {$tflags == $mode && [info exists ::tclnum_syn($w)]} {
       foreach s $::tclnum_syn($w)  { sqlite3_fts5_token -colo $s $iStart $iEnd }
     }
   }
 }

 proc tclnum_create {args} {
   set mode query
   if {[llength $args]} {
     set mode [lindex $args 0]
   }
   if {$mode != "query" && $mode != "document"} { error "bad mode: $mode" }
   return [list tclnum_tokenize $mode]
 }

 proc fts5_tclnum_register {db} {
   foreach SYNDICT {
     {zero  0}
     {one   1 i}
     {two   2 ii}
     {three 3 iii}
     {four  4 iv}
     {five  5 v}
     {six   6 vi}
     {seven 7 vii}
     {eight 8 viii}
     {nine  9 ix}

     {a1 a2 a3 a4 a5 a6 a7 a8 a9}
     {b1 b2 b3 b4 b5 b6 b7 b8 b9}
     {c1 c2 c3 c4 c5 c6 c7 c8 c9}
   } {
     foreach s $SYNDICT {
       set o [list]
       foreach x $SYNDICT {if {$x!=$s} {lappend o $x}}
       set ::tclnum_syn($s) $o
     }
   }
   sqlite3_fts5_create_tokenizer db tclnum tclnum_create
 }
 #
 # End of tokenizer code.
 #-------------------------------------------------------------------------
	# 2014 Dec 19
	#
	# The author disclaims copyright to this source code. In place of
	# a legal notice, here is a blessing:
	#
	# May you do good and not evil.
	# May you find forgiveness for yourself and forgive others.
	# May you share freely, never taking more than you give.
	#
	#***********************************************************************
	#

	if {![info exists testdir]} {
	set testdir [file join [file dirname [info script]] .. .. .. test]
	}
	source $testdir/tester.tcl

	ifcapable !fts5 {
	proc return_if_no_fts5 {} {
	finish_test
	return -code return
	}
	return
	} else {
	proc return_if_no_fts5 {} {}
	}

	catch {
	sqlite3_fts5_may_be_corrupt 0
	reset_db
	}

	proc fts5_test_poslist {cmd} {
	set res [list]
	for {set i 0} {$i < [$cmd xInstCount]} {incr i} {
	lappend res [string map {{ } .} [$cmd xInst $i]]
	}
	set res
	}

	proc fts5_test_poslist2 {cmd} {
	set res [list]

	for {set i 0} {$i < [$cmd xPhraseCount]} {incr i} {
	$cmd xPhraseForeach $i c o {
	lappend res $i.$c.$o
	}
	}

	#set res
	sort_poslist $res
	}

	proc fts5_test_collist {cmd} {
	set res [list]

	for {set i 0} {$i < [$cmd xPhraseCount]} {incr i} {
	$cmd xPhraseColumnForeach $i c { lappend res $i.$c }
	}

	set res
	}

	proc fts5_test_columnsize {cmd} {
	set res [list]
	for {set i 0} {$i < [$cmd xColumnCount]} {incr i} {
	lappend res [$cmd xColumnSize $i]
	}
	set res
	}

	proc fts5_test_columntext {cmd} {
	set res [list]
	for {set i 0} {$i < [$cmd xColumnCount]} {incr i} {
	lappend res [$cmd xColumnText $i]
	}
	set res
	}

	proc fts5_test_columntotalsize {cmd} {
	set res [list]
	for {set i 0} {$i < [$cmd xColumnCount]} {incr i} {
	lappend res [$cmd xColumnTotalSize $i]
	}
	set res
	}

	proc test_append_token {varname token iStart iEnd} {
	upvar $varname var
	lappend var $token
	return "SQLITE_OK"
	}
	proc fts5_test_tokenize {cmd} {
	set res [list]
	for {set i 0} {$i < [$cmd xColumnCount]} {incr i} {
	set tokens [list]
	$cmd xTokenize [$cmd xColumnText $i] [list test_append_token tokens]
	lappend res $tokens
	}
	set res
	}

	proc fts5_test_rowcount {cmd} {
	$cmd xRowCount
	}

	proc test_queryphrase_cb {cnt cmd} {
	upvar $cnt L
	for {set i 0} {$i < [$cmd xInstCount]} {incr i} {
	foreach {ip ic io} [$cmd xInst $i] break
	set A($ic) 1
	}
	foreach ic [array names A] {
	lset L $ic [expr {[lindex $L $ic] + 1}]
	}
	}
	proc fts5_test_queryphrase {cmd} {
	set res [list]
	for {set i 0} {$i < [$cmd xPhraseCount]} {incr i} {
	set cnt [list]
	for {set j 0} {$j < [$cmd xColumnCount]} {incr j} { lappend cnt 0 }
	$cmd xQueryPhrase $i [list test_queryphrase_cb cnt]
	lappend res $cnt
	}
	set res
	}

	proc fts5_test_phrasecount {cmd} {
	$cmd xPhraseCount
	}

	proc fts5_test_all {cmd} {
	set res [list]
	lappend res columnsize [fts5_test_columnsize $cmd]
	lappend res columntext [fts5_test_columntext $cmd]
	lappend res columntotalsize [fts5_test_columntotalsize $cmd]
	lappend res poslist [fts5_test_poslist $cmd]
	lappend res tokenize [fts5_test_tokenize $cmd]
	lappend res rowcount [fts5_test_rowcount $cmd]
	set res
	}

	proc fts5_aux_test_functions {db} {
	foreach f {
	fts5_test_columnsize
	fts5_test_columntext
	fts5_test_columntotalsize
	fts5_test_poslist
	fts5_test_poslist2
	fts5_test_collist
	fts5_test_tokenize
	fts5_test_rowcount
	fts5_test_all

	fts5_test_queryphrase
	fts5_test_phrasecount
	} {
	sqlite3_fts5_create_function $db $f $f
	}
	}

	proc fts5_segcount {tbl} {
	set N 0
	foreach n [fts5_level_segs $tbl] { incr N $n }
	set N
	}

	proc fts5_level_segs {tbl} {
	set sql "SELECT fts5_decode(rowid,block) aS r FROM ${tbl}_data WHERE rowid=10"
	set ret [list]
	foreach L [lrange [db one $sql] 1 end] {
	lappend ret [expr [llength $L] - 3]
	}
	set ret
	}

	proc fts5_level_segids {tbl} {
	set sql "SELECT fts5_decode(rowid,block) aS r FROM ${tbl}_data WHERE rowid=10"
	set ret [list]
	foreach L [lrange [db one $sql] 1 end] {
	set lvl [list]
	foreach S [lrange $L 3 end] {
	regexp {id=([1234567890]*)} $S -> segid
	lappend lvl $segid
	}
	lappend ret $lvl
	}
	set ret
	}

	proc fts5_rnddoc {n} {
	set map [list 0 a 1 b 2 c 3 d 4 e 5 f 6 g 7 h 8 i 9 j]
	set doc [list]
	for {set i 0} {$i < $n} {incr i} {
	lappend doc "x[string map $map [format %.3d [expr int(rand()*1000)]]]"
	}
	set doc
	}

	#-------------------------------------------------------------------------
	# Usage:
	#
	# nearset aCol ?-pc VARNAME? ?-near N? ?-col C? -- phrase1 phrase2...
	#
	# This command is used to test if a document (set of column values) matches
	# the logical equivalent of a single FTS5 NEAR() clump and, if so, return
	# the equivalent of an FTS5 position list.
	#
	# Parameter $aCol is passed a list of the column values for the document
	# to test. Parameters $phrase1 and so on are the phrases.
	#
	# The result is a list of phrase hits. Each phrase hit is formatted as
	# three integers separated by "." characters, in the following format:
	#
	# <phrase number> . <column number> . <token offset>
	#
	# Options:
	#
	# -near N (NEAR distance. Default 10)
	# -col C (List of column indexes to match against)
	# -pc VARNAME (variable in caller frame to use for phrase numbering)
	# -dict VARNAME (array in caller frame to use for synonyms)
	#
	proc nearset {aCol args} {

	# Process the command line options.
	#
	set O(-near) 10
	set O(-col) {}
	set O(-pc) ""
	set O(-dict) ""

	set nOpt [lsearch -exact $args --]
	if {$nOpt<0} { error "no -- option" }

	# Set $lPhrase to be a list of phrases. $nPhrase its length.
	set lPhrase [lrange $args [expr $nOpt+1] end]
	set nPhrase [llength $lPhrase]

	foreach {k v} [lrange $args 0 [expr $nOpt-1]] {
	if {[info exists O($k)]==0} { error "unrecognized option $k" }
	set O($k) $v
	}

	if {$O(-pc) == ""} {
	set counter 0
	} else {
	upvar $O(-pc) counter
	}

	if {$O(-dict)!=""} { upvar $O(-dict) aDict }

	for {set j 0} {$j < [llength $aCol]} {incr j} {
	for {set i 0} {$i < $nPhrase} {incr i} {
	set A($j,$i) [list]
	}
	}

	# Loop through each column of the current row.
	for {set iCol 0} {$iCol < [llength $aCol]} {incr iCol} {

	# If there is a column filter, test whether this column is excluded. If
	# so, skip to the next iteration of this loop. Otherwise, set zCol to the
	# column value and nToken to the number of tokens that comprise it.
	if {$O(-col)!="" && [lsearch $O(-col) $iCol]<0} continue
	set zCol [lindex $aCol $iCol]
	set nToken [llength $zCol]

	# Each iteration of the following loop searches a substring of the
	# column value for phrase matches. The last token of the substring
	# is token $iLast of the column value. The first token is:
	#
	# iFirst = ($iLast - $O(-near) - 1)
	#
	# where $sz is the length of the phrase being searched for. A phrase
	# counts as matching the substring if its first token lies on or before
	# $iLast and its last token on or after $iFirst.
	#
	# For example, if the query is "NEAR(a+b c, 2)" and the column value:
	#
	# "x x x x A B x x C x"
	# 0 1 2 3 4 5 6 7 8 9"
	#
	# when (iLast==8 && iFirst=5) the range will contain both phrases and
	# so both instances can be added to the output poslists.
	#
	set iLast [expr $O(-near) >= $nToken ? $nToken - 1 : $O(-near)]
	for { } {$iLast < $nToken} {incr iLast} {

	catch { array unset B }

	for {set iPhrase 0} {$iPhrase<$nPhrase} {incr iPhrase} {
	set p [lindex $lPhrase $iPhrase]
	set nPm1 [expr {[llength $p] - 1}]
	set iFirst [expr $iLast - $O(-near) - [llength $p]]

	for {set i $iFirst} {$i <= $iLast} {incr i} {
	set lCand [lrange $zCol $i [expr $i+$nPm1]]
	set bMatch 1
	foreach tok $p term $lCand {
	if {[nearset_match aDict $tok $term]==0} { set bMatch 0 ; break }
	}
	if {$bMatch} { lappend B($iPhrase) $i }
	}

	if {![info exists B($iPhrase)]} break
	}

	if {$iPhrase==$nPhrase} {
	for {set iPhrase 0} {$iPhrase<$nPhrase} {incr iPhrase} {
	set A($iCol,$iPhrase) [concat $A($iCol,$iPhrase) $B($iPhrase)]
	set A($iCol,$iPhrase) [lsort -integer -uniq $A($iCol,$iPhrase)]
	}
	}
	}
	}

	set res [list]
	#puts [array names A]

	for {set iPhrase 0} {$iPhrase<$nPhrase} {incr iPhrase} {
	for {set iCol 0} {$iCol < [llength $aCol]} {incr iCol} {
	foreach a $A($iCol,$iPhrase) {
	lappend res "$counter.$iCol.$a"
	}
	}
	incr counter
	}

	#puts "$aCol -> $res"
	sort_poslist $res
	}

	proc nearset_match {aDictVar tok term} {
	if {[string match $tok $term]} { return 1 }

	upvar $aDictVar aDict
	if {[info exists aDict($tok)]} {
	foreach s $aDict($tok) {
	if {[string match $s $term]} { return 1 }
	}
	}
	return 0;
	}

	#-------------------------------------------------------------------------
	# Usage:
	#
	# sort_poslist LIST
	#
	# Sort a position list of the type returned by command [nearset]
	#
	proc sort_poslist {L} {
	lsort -command instcompare $L
	}
	proc instcompare {lhs rhs} {
	foreach {p1 c1 o1} [split $lhs .] {}
	foreach {p2 c2 o2} [split $rhs .] {}

	set res [expr $c1 - $c2]
	if {$res==0} { set res [expr $o1 - $o2] }
	if {$res==0} { set res [expr $p1 - $p2] }

	return $res
	}

	#-------------------------------------------------------------------------
	# Logical operators used by the commands returned by fts5_tcl_expr().
	#
	proc AND {args} {
	foreach a $args {
	if {[llength $a]==0} { return [list] }
	}
	sort_poslist [concat {*}$args]
	}
	proc OR {args} {
	sort_poslist [concat {*}$args]
	}
	proc NOT {a b} {
	if {[llength $b]>0} { return [list] }
	return $a
	}

	#-------------------------------------------------------------------------
	# This command is similar to [split], except that it also provides the
	# start and end offsets of each token. For example:
	#
	# [fts5_tokenize_split "abc d ef"] -> {abc 0 3 d 4 5 ef 6 8}
	#

	proc gobble_whitespace {textvar} {
	upvar $textvar t
	regexp {([ ])(.)} $t -> space t
	return [string length $space]
	}

	proc gobble_text {textvar wordvar} {
	upvar $textvar t
	upvar $wordvar w
	regexp {([^ ])(.)} $t -> w t
	return [string length $w]
	}

	proc fts5_tokenize_split {text} {
	set token ""
	set ret [list]
	set iOff [gobble_whitespace text]
	while {[set nToken [gobble_text text word]]} {
	lappend ret $word $iOff [expr $iOff+$nToken]
	incr iOff $nToken
	incr iOff [gobble_whitespace text]
	}

	set ret
	}

	#-------------------------------------------------------------------------
	#
	proc foreach_detail_mode {prefix script} {
	set saved $::testprefix
	foreach d [list full col none] {
	set s [string map [list %DETAIL% $d] $script]
	set ::detail $d
	set ::testprefix "$prefix-$d"
	reset_db
	uplevel $s
	unset ::detail
	}
	set ::testprefix $saved
	}

	proc detail_check {} {
	if {$::detail != "none" && $::detail!="full" && $::detail!="col"} {
	error "not in foreach_detail_mode {...} block"
	}
	}
	proc detail_is_none {} { detail_check ; expr {$::detail == "none"} }
	proc detail_is_col {} { detail_check ; expr {$::detail == "col" } }
	proc detail_is_full {} { detail_check ; expr {$::detail == "full"} }


	#-------------------------------------------------------------------------
	# Convert a poslist of the type returned by fts5_test_poslist() to a
	# collist as returned by fts5_test_collist().
	#
	proc fts5_poslist2collist {poslist} {
	set res [list]
	foreach h $poslist {
	regexp {(.*)\.[1234567890]+} $h -> cand
	lappend res $cand
	}
	set res [lsort -command fts5_collist_elem_compare -unique $res]
	return $res
	}

	# Comparison function used by fts5_poslist2collist to sort collist entries.
	proc fts5_collist_elem_compare {a b} {
	foreach {a1 a2} [split $a .] {}
	foreach {b1 b2} [split $b .] {}

	if {$a1==$b1} { return [expr $a2 - $b2] }
	return [expr $a1 - $b1]
	}


	#--------------------------------------------------------------------------
	# Construct and return a tcl list equivalent to that returned by the SQL
	# query executed against database handle [db]:
	#
	# SELECT
	# rowid,
	# fts5_test_poslist($tbl),
	# fts5_test_collist($tbl)
	# FROM $tbl('$expr')
	# ORDER BY rowid $order;
	#
	proc fts5_query_data {expr tbl {order ASC} {aDictVar ""}} {

	# Figure out the set of columns in the FTS5 table. This routine does
	# not handle tables with UNINDEXED columns, but if it did, it would
	# have to be here.
	db eval "PRAGMA table_info = $tbl" x { lappend lCols $x(name) }

	set d ""
	if {$aDictVar != ""} {
	upvar $aDictVar aDict
	set d aDict
	}

	set cols ""
	foreach e $lCols { append cols ", '$e'" }
	set tclexpr [db one [subst -novar {
	SELECT fts5_expr_tcl( $expr, 'nearset $cols -dict $d -pc ::pc' [set cols] )
	}]]

	set res [list]
	db eval "SELECT rowid, * FROM $tbl ORDER BY rowid $order" x {
	set cols [list]
	foreach col $lCols { lappend cols $x($col) }

	set ::pc 0
	set rowdata [eval $tclexpr]
	if {$rowdata != ""} {
	lappend res $x(rowid) $rowdata [fts5_poslist2collist $rowdata]
	}
	}

	set res
	}

	#-------------------------------------------------------------------------
	# Similar to [fts5_query_data], but omit the collist field.
	#
	proc fts5_poslist_data {expr tbl {order ASC} {aDictVar ""}} {
	set res [list]

	if {$aDictVar!=""} {
	upvar $aDictVar aDict
	set dict aDict
	} else {
	set dict ""
	}

	foreach {rowid poslist collist} [fts5_query_data $expr $tbl $order $dict] {
	lappend res $rowid $poslist
	}
	set res
	}

	proc fts5_collist_data {expr tbl {order ASC} {aDictVar ""}} {
	set res [list]

	if {$aDictVar!=""} {
	upvar $aDictVar aDict
	set dict aDict
	} else {
	set dict ""
	}

	foreach {rowid poslist collist} [fts5_query_data $expr $tbl $order $dict] {
	lappend res $rowid $collist
	}
	set res
	}

	#-------------------------------------------------------------------------
	#

	# This command will only work inside a [foreach_detail_mode] block. It tests
	# whether or not expression $expr run on FTS5 table $tbl is supported by
	# the current mode. If so, 1 is returned. If not, 0.
	#
	# detail=full (all queries supported)
	# detail=col (all but phrase queries and NEAR queries)
	# detail=none (all but phrase queries, NEAR queries, and column filters)
	#
	proc fts5_expr_ok {expr tbl} {

	if {![detail_is_full]} {
	set nearset "nearset_rc"
	if {[detail_is_col]} { set nearset "nearset_rf" }

	set ::expr_not_ok 0
	db eval "PRAGMA table_info = $tbl" x { lappend lCols $x(name) }

	set cols ""
	foreach e $lCols { append cols ", '$e'" }
	set ::pc 0
	set tclexpr [db one [subst -novar {
	SELECT fts5_expr_tcl( $expr, '[set nearset] $cols -pc ::pc' [set cols] )
	}]]
	eval $tclexpr
	if {$::expr_not_ok} { return 0 }
	}

	return 1
	}

	# Helper for [fts5_expr_ok]
	proc nearset_rf {aCol args} {
	set idx [lsearch -exact $args --]
	if {$idx != [llength $args]-2 \|\| [llength [lindex $args end]]!=1} {
	set ::expr_not_ok 1
	}
	list
	}

	# Helper for [fts5_expr_ok]
	proc nearset_rc {aCol args} {
	nearset_rf $aCol {*}$args
	if {[lsearch $args -col]>=0} {
	set ::expr_not_ok 1
	}
	list
	}


	#-------------------------------------------------------------------------
	# Code for a simple Tcl tokenizer that supports synonyms at query time.
	#
	proc tclnum_tokenize {mode tflags text} {
	foreach {w iStart iEnd} [fts5_tokenize_split $text] {
	sqlite3_fts5_token $w $iStart $iEnd
	if {$tflags == $mode && [info exists ::tclnum_syn($w)]} {
	foreach s $::tclnum_syn($w) { sqlite3_fts5_token -colo $s $iStart $iEnd }
	}
	}
	}

	proc tclnum_create {args} {
	set mode query
	if {[llength $args]} {
	set mode [lindex $args 0]
	}
	if {$mode != "query" && $mode != "document"} { error "bad mode: $mode" }
	return [list tclnum_tokenize $mode]
	}

	proc fts5_tclnum_register {db} {
	foreach SYNDICT {
	{zero 0}
	{one 1 i}
	{two 2 ii}
	{three 3 iii}
	{four 4 iv}
	{five 5 v}
	{six 6 vi}
	{seven 7 vii}
	{eight 8 viii}
	{nine 9 ix}

	{a1 a2 a3 a4 a5 a6 a7 a8 a9}
	{b1 b2 b3 b4 b5 b6 b7 b8 b9}
	{c1 c2 c3 c4 c5 c6 c7 c8 c9}
	} {
	foreach s $SYNDICT {
	set o [list]
	foreach x $SYNDICT {if {$x!=$s} {lappend o $x}}
	set ::tclnum_syn($s) $o
	}
	}
	sqlite3_fts5_create_tokenizer db tclnum tclnum_create
	}
	#
	# End of tokenizer code.
	#-------------------------------------------------------------------------