| # 2020 September 30 |
| # |
| # The author disclaims copyright to this source code. In place of |
| # a legal notice, here is a blessing: |
| # |
| # May you do good and not evil. |
| # May you find forgiveness for yourself and forgive others. |
| # May you share freely, never taking more than you give. |
| # |
| #************************************************************************* |
| # |
| # Tests for the fts5 "trigram" tokenizer. |
| # |
| |
| source [file join [file dirname [info script]] fts5_common.tcl] |
| ifcapable !fts5 { finish_test ; return } |
| set ::testprefix fts5trigram |
| |
| do_execsql_test 1.0 { |
| CREATE VIRTUAL TABLE t1 USING fts5(y, tokenize=trigram); |
| INSERT INTO t1 VALUES('abcdefghijklm'); |
| INSERT INTO t1 VALUES('กรุงเทพมหานคร'); |
| } |
| |
| foreach {tn s res} { |
| 1 abc "(abc)defghijklm" |
| 2 defgh "abc(defgh)ijklm" |
| 3 abcdefghijklm "(abcdefghijklm)" |
| 4 กรุ "(กรุ)งเทพมหานคร" |
| 5 งเทพมห "กรุ(งเทพมห)านคร" |
| 6 กรุงเทพมหานคร "(กรุงเทพมหานคร)" |
| 7 Abc "(abc)defghijklm" |
| 8 deFgh "abc(defgh)ijklm" |
| 9 aBcdefGhijKlm "(abcdefghijklm)" |
| } { |
| do_execsql_test 1.1.$tn { |
| SELECT highlight(t1, 0, '(', ')') FROM t1($s) |
| } $res |
| } |
| |
| do_execsql_test 1.2.0 { |
| SELECT fts5_expr('ABCD', 'tokenize=trigram') |
| } {{"abc" + "bcd"}} |
| |
| do_execsql_test 1.2.1 { |
| SELECT * FROM t1 WHERE y LIKE ? ESCAPE 'a' |
| } |
| |
| foreach {tn like res} { |
| 1 {%cDef%} 1 |
| 2 {cDef%} {} |
| 3 {%f%} 1 |
| 4 {%f_h%} 1 |
| 5 {%f_g%} {} |
| 6 {abc%klm} 1 |
| 7 {ABCDEFG%} 1 |
| 8 {%รุงเ%} 2 |
| 9 {%งเ%} 2 |
| 10 {%"งเ"%} {} |
| } { |
| do_execsql_test 1.3.$tn { |
| SELECT rowid FROM t1 WHERE y LIKE $like |
| } $res |
| } |
| |
| #------------------------------------------------------------------------- |
| reset_db |
| do_execsql_test 2.0 { |
| CREATE VIRTUAL TABLE t1 USING fts5(y, tokenize="trigram case_sensitive 1"); |
| INSERT INTO t1 VALUES('abcdefghijklm'); |
| INSERT INTO t1 VALUES('กรุงเทพมหานคร'); |
| } |
| do_catchsql_test 2.0.1 { |
| CREATE VIRTUAL TABLE t2 USING fts5(z, tokenize='trigram case_sensitive'); |
| } {1 {error in tokenizer constructor}} |
| |
| foreach {tn s res} { |
| 1 abc "(abc)defghijklm" |
| 2 defgh "abc(defgh)ijklm" |
| 3 abcdefghijklm "(abcdefghijklm)" |
| 4 กรุ "(กรุ)งเทพมหานคร" |
| 5 งเทพมห "กรุ(งเทพมห)านคร" |
| 6 กรุงเทพมหานคร "(กรุงเทพมหานคร)" |
| 7 Abc "" |
| 8 deFgh "" |
| 9 aBcdefGhijKlm "" |
| } { |
| do_execsql_test 2.1.$tn { |
| SELECT highlight(t1, 0, '(', ')') FROM t1($s) |
| } $res |
| } |
| foreach {tn like res} { |
| 1 {%cDef%} 1 |
| 2 {cDef%} {} |
| 3 {%f%} 1 |
| 4 {%f_h%} 1 |
| 5 {%f_g%} {} |
| 6 {abc%klm} 1 |
| 7 {ABCDEFG%} 1 |
| 8 {%รุงเ%} 2 |
| } { |
| do_execsql_test 2.2.$tn { |
| SELECT rowid FROM t1 WHERE y LIKE $like |
| } $res |
| } |
| foreach {tn like res} { |
| 1 {*cdef*} 1 |
| 2 {cdef*} {} |
| 3 {*f*} 1 |
| 4 {*f?h*} 1 |
| 5 {*f?g*} {} |
| 6 {abc*klm} 1 |
| 7 {abcdefg*} 1 |
| 8 {*รุงเ*} 2 |
| 9 {abc[d]efg*} 1 |
| 10 {abc[]d]efg*} 1 |
| 11 {abc[^]d]efg*} {} |
| 12 {abc[^]XYZ]efg*} 1 |
| } { |
| do_execsql_test 2.3.$tn { |
| SELECT rowid FROM t1 WHERE y GLOB $like |
| } $res |
| } |
| |
| do_execsql_test 2.3.null.1 { |
| SELECT rowid FROM t1 WHERE y LIKE NULL |
| } |
| |
| #------------------------------------------------------------------------- |
| reset_db |
| do_catchsql_test 3.1 { |
| CREATE VIRTUAL TABLE ttt USING fts5(c, tokenize="trigram case_sensitive 2"); |
| } {1 {error in tokenizer constructor}} |
| do_catchsql_test 3.2 { |
| CREATE VIRTUAL TABLE ttt USING fts5(c, tokenize="trigram case_sensitive 11"); |
| } {1 {error in tokenizer constructor}} |
| do_catchsql_test 3.3 { |
| CREATE VIRTUAL TABLE ttt USING fts5(c, "tokenize=trigram case_sensitive 1"); |
| } {0 {}} |
| |
| #------------------------------------------------------------------------- |
| reset_db |
| do_execsql_test 4.0 { |
| CREATE VIRTUAL TABLE t0 USING fts5(b, tokenize = "trigram"); |
| } |
| do_execsql_test 4.1 { |
| INSERT INTO t0 VALUES (x'000b01'); |
| } |
| do_execsql_test 4.2 { |
| INSERT INTO t0(t0) VALUES('integrity-check'); |
| } |
| |
| #------------------------------------------------------------------------- |
| reset_db |
| foreach_detail_mode $::testprefix { |
| foreach {ci} {0 1} { |
| reset_db |
| do_execsql_test 5.cs=$ci.0.1 " |
| CREATE VIRTUAL TABLE t1 USING fts5( |
| y, tokenize=\"trigram case_sensitive $ci\", detail=%DETAIL% |
| ); |
| " |
| do_execsql_test 5.cs=$ci.0.2 { |
| INSERT INTO t1 VALUES('abcdefghijklm'); |
| INSERT INTO t1 VALUES('กรุงเทพมหานคร'); |
| } |
| |
| foreach {tn like res} { |
| 1 {%cDef%} 1 |
| 2 {cDef%} {} |
| 3 {%f%} 1 |
| 4 {%f_h%} 1 |
| 5 {%f_g%} {} |
| 6 {abc%klm} 1 |
| 7 {ABCDEFG%} 1 |
| 8 {%รุงเ%} 2 |
| } { |
| do_execsql_test 5.cs=$ci.1.$tn { |
| SELECT rowid FROM t1 WHERE y LIKE $like |
| } $res |
| } |
| } |
| } |
| |
| do_execsql_test 6.0 { |
| CREATE VIRTUAL TABLE ci0 USING fts5(x, tokenize="trigram"); |
| CREATE VIRTUAL TABLE ci1 USING fts5(x, tokenize="trigram case_sensitive 1"); |
| } |
| |
| # LIKE and GLOB both work with case-insensitive tokenizers. Only GLOB works |
| # with case-sensitive. |
| do_eqp_test 6.1 { |
| SELECT * FROM ci0 WHERE x LIKE ? |
| } {VIRTUAL TABLE INDEX 0:L0} |
| do_eqp_test 6.2 { |
| SELECT * FROM ci0 WHERE x GLOB ? |
| } {VIRTUAL TABLE INDEX 0:G0} |
| do_eqp_test 6.3 { |
| SELECT * FROM ci1 WHERE x LIKE ? |
| } {{SCAN ci1 VIRTUAL TABLE INDEX 0:}} |
| do_eqp_test 6.4 { |
| SELECT * FROM ci1 WHERE x GLOB ? |
| } {VIRTUAL TABLE INDEX 0:G0} |
| do_eqp_test 6.5 { |
| SELECT * FROM ci1 WHERE x < ? |
| } {{SCAN ci1 VIRTUAL TABLE INDEX 0:}} |
| do_eqp_test 6.6 { |
| SELECT * FROM ci0 WHERE x < ? |
| } {{SCAN ci0 VIRTUAL TABLE INDEX 0:}} |
| |
| reset_db |
| do_execsql_test 7.0 { |
| CREATE VIRTUAL TABLE f USING FTS5(filename, tokenize="trigram"); |
| INSERT INTO f (rowid, filename) VALUES |
| (10, "giraffe.png"), |
| (20, "жираф.png"), |
| (30, "cat.png"), |
| (40, "кот.png"), |
| (50, "misic-ðµ-.mp3"); |
| } |
| do_execsql_test 7.1 { |
| SELECT rowid FROM f WHERE +filename GLOB '*ир*'; |
| } {20} |
| do_execsql_test 7.2 { |
| SELECT rowid FROM f WHERE filename GLOB '*ир*'; |
| } {20} |
| |
| |
| #------------------------------------------------------------------------- |
| reset_db |
| do_execsql_test 8.0 { |
| CREATE VIRTUAL TABLE t1 USING fts5(y, tokenize=trigram); |
| INSERT INTO t1 VALUES('abcdefghijklm'); |
| } |
| |
| foreach {tn match res} { |
| 1 "abc ghi" "(abc)def(ghi)jklm" |
| 2 "def ghi" "abc(defghi)jklm" |
| 3 "efg ghi" "abcd(efghi)jklm" |
| 4 "efghi" "abcd(efghi)jklm" |
| 5 "abcd jklm" "(abcd)efghi(jklm)" |
| 6 "ijkl jklm" "abcdefgh(ijklm)" |
| 7 "ijk ijkl hijk" "abcdefg(hijkl)m" |
| |
| } { |
| do_execsql_test 8.1.$tn { |
| SELECT highlight(t1, 0, '(', ')') FROM t1($match) |
| } $res |
| } |
| |
| do_execsql_test 8.2 { |
| CREATE VIRTUAL TABLE ft2 USING fts5(a, tokenize="trigram"); |
| INSERT INTO ft2 VALUES('abc x cde'); |
| INSERT INTO ft2 VALUES('abc cde'); |
| INSERT INTO ft2 VALUES('abcde'); |
| } |
| |
| do_execsql_test 8.3 { |
| SELECT highlight(ft2, 0, '[', ']') FROM ft2 WHERE ft2 MATCH 'abc AND cde'; |
| } { |
| {[abc] x [cde]} |
| {[abc] [cde]} |
| {[abcde]} |
| } |
| |
| #------------------------------------------------------------------------- |
| reset_db |
| do_execsql_test 9.0 { |
| CREATE VIRTUAL TABLE t1 USING fts5( |
| a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, |
| tokenize=trigram |
| ); |
| |
| INSERT INTO t1(rowid, a12) VALUES(111, 'thats a tricky case though'); |
| INSERT INTO t1(rowid, a12) VALUES(222, 'the query planner cannot do'); |
| } |
| |
| do_execsql_test 9.1 { |
| SELECT rowid FROM t1 WHERE a12 LIKE '%tricky%' |
| } {111} |
| |
| do_execsql_test 9.2 { |
| SELECT rowid FROM t1 WHERE a12 LIKE '%tricky%' AND a12 LIKE '%case%' |
| } {111} |
| |
| do_execsql_test 9.3 { |
| SELECT rowid FROM t1 WHERE a12 LIKE NULL |
| } {} |
| |
| #------------------------------------------------------------------------- |
| reset_db |
| do_execsql_test 10.0 { |
| CREATE VIRTUAL TABLE t1 USING fts5(a, tokenize=trigram); |
| } |
| |
| do_test 10.1 { |
| foreach {val} { |
| "abc \UFFjkl\UFF" |
| "abc \UFFFjkl\UFFF" |
| "abc \UFFFFjkl\UFFFF" |
| "abc \UFFFFFjkl\UFFFFF" |
| "\UFFjkl\UFF abc" |
| "\UFFFjkl\UFFF abc" |
| "\UFFFFjkl\UFFFF abc" |
| "\UFFFFFjkl\UFFFFF abc" |
| "\U10001jkl\U10001 abc" |
| } { |
| execsql { INSERT INTO t1 VALUES( $val ) } |
| } |
| } {} |
| |
| do_test 10.2 { |
| foreach {val} { |
| X'E18000626320646566' |
| X'61EDA0806320646566' |
| X'61EDA0806320646566' |
| X'61EFBFBE6320646566' |
| X'76686920E18000626320646566' |
| X'7668692061EDA0806320646566' |
| X'7668692061EDA0806320646566' |
| X'7668692061EFBFBE6320646566' |
| } { |
| execsql " INSERT INTO t1 VALUES( $val ) " |
| } |
| } {} |
| |
| do_test 10.3 { |
| set a [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0x62}] |
| set b [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0xBF 0x62}] |
| set c [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0xBF 0xBF 0x62}] |
| set d [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0xBF 0xBF 0xBF 0x62}] |
| execsql { |
| INSERT INTO t1 VALUES($a); |
| INSERT INTO t1 VALUES($b); |
| INSERT INTO t1 VALUES($c); |
| INSERT INTO t1 VALUES($d); |
| |
| INSERT INTO t1 VALUES('abcd' || $a); |
| INSERT INTO t1 VALUES('abcd' || $b); |
| INSERT INTO t1 VALUES('abcd' || $c); |
| INSERT INTO t1 VALUES('abcd' || $d); |
| } |
| } {} |
| |
| do_execsql_test 11.0 { |
| CREATE VIRTUAL TABLE t4 USING fts5(y, tokenize=trigram); |
| } |
| sqlite3_fts5_register_str db |
| do_execsql_test 11.1 { |
| INSERT INTO t4 VALUES( str('') ); |
| } |
| |
| do_test 12.0 { |
| sqlite3_fts5_tokenize db trigram "abcd" |
| } {abc 0 3 bcd 1 4} |
| |
| do_test 12.1 { |
| sqlite3_fts5_tokenize db trigram "a" |
| } {} |
| |
| do_test 12.2 { |
| sqlite3_fts5_tokenize db trigram "" |
| } {} |
| |
| finish_test |
| |