blob: 0fdff4fc573f54317452b3fa0933ce01709b124e [file] [log] [blame]
diff --git a/source/data/brkitr/brklocal.mk b/source/data/brkitr/brklocal.mk
index 91754f1..ccac4d1 100644
--- a/source/data/brkitr/brklocal.mk
+++ b/source/data/brkitr/brklocal.mk
@@ -34,15 +34,15 @@ BRK_RES_ALIAS_SOURCE = $(BRK_RES_SYNTHETIC_ALIAS)
# List of compact trie dictionary files (ctd).
-BRK_CTD_SOURCE = thaidict.txt cjdict.txt
+BRK_CTD_SOURCE = thaidict.txt
# List of break iterator files (brk).
-# Chrome change: remove word_ja.txt and line_he.txt
-BRK_SOURCE = sent_el.txt word_POSIX.txt line_fi.txt char.txt word.txt line.txt sent.txt title.txt char_th.txt
+# Chrome change: remove line_he.txt
+BRK_SOURCE = sent_el.txt word_POSIX.txt line_fi.txt word_ja.txt char.txt word.txt line.txt sent.txt title.txt char_th.txt
# Ordinary resources
-# Chrome change: remove ja.txt and he.txt
+# Chrome change: remove he.txt
BRK_RES_SOURCE = el.txt en.txt en_US.txt en_US_POSIX.txt\
- fi.txt th.txt
+ fi.txt ja.txt th.txt
diff --git a/source/data/brkitr/root.txt b/source/data/brkitr/root.txt
index fb83ac3..5d839bd 100644
--- a/source/data/brkitr/root.txt
+++ b/source/data/brkitr/root.txt
@@ -17,8 +17,5 @@ root{
}
dictionaries{
Thai:process(dependency){"thaidict.ctd"}
- Hani:process(dependency){"cjdict.ctd"}
- Hira:process(dependency){"cjdict.ctd"}
- Kata:process(dependency){"cjdict.ctd"}
}
}
diff --git a/source/data/brkitr/word.txt b/source/data/brkitr/word.txt
index 0b49377..a0e1ceb 100644
--- a/source/data/brkitr/word.txt
+++ b/source/data/brkitr/word.txt
@@ -60,11 +60,10 @@ $Control = [\p{Grapheme_Cluster_Break = Control}];
$HangulSyllable = [\uac00-\ud7a3];
$ComplexContext = [:LineBreak = Complex_Context:];
$KanaKanji = [$Han $Hiragana $Katakana];
-$dictionaryCJK = [$KanaKanji $HangulSyllable];
-$dictionary = [$ComplexContext $dictionaryCJK];
+$dictionary = [:LineBreak = Complex_Context:];
-# leave CJK scripts out of ALetterPlus
-$ALetterPlus = [$ALetter-$dictionaryCJK [$ComplexContext-$Extend-$Control]];
+$ALetterPlus = [$ALetter [$dictionary-$Extend-$Control]]; # Note: default ALetter does not
+ # include the dictionary characters.
#
@@ -99,8 +98,7 @@ $CR $LF;
# begins with a group of Format chars, or with a "word" consisting of a single
# char that is not in any of the listed word break categories followed by
# format char(s).
- # format char(s), or is not a CJK dictionary character.
-[^$CR $LF $Newline $dictionaryCJK]? ($Extend | $Format)+;
+[^$CR $LF $Newline]? ($Extend | $Format)+;
$NumericEx {100};
$ALetterEx {200};
@@ -155,9 +153,6 @@ $ExtendNumLetEx $ALetterEx {200}; # (13b)
$ExtendNumLetEx $NumericEx {100}; # (13b)
$ExtendNumLetEx $KatakanaEx {400}; # (13b)
-# special handling for CJK characters: chain for later dictionary segmentation
-$HangulSyllable $HangulSyllable {200};
-$KanaKanji $KanaKanji {400}; #different rule status if both kanji and kana found
## -------------------------------------------------
@@ -179,7 +174,7 @@ $BackHebrewLetEx = ($Format | $Extend)* $HebrewLet;
$LF $CR;
# rule 4
-($Format | $Extend)* [^$CR $LF $Newline $dictionaryCJK]?;
+($Format | $Extend)* [^$CR $LF $Newline]?;
# rule 5
@@ -217,10 +212,6 @@ $BackKatakanaEx $BackKatakanaEx;
$BackExtendNumLetEx ($BackALetterEx | $BackNumericEx | $BackKatakanaEx | $BackExtendNumLetEx);
($BackALetterEx | $BackNumericEx | $BackKatakanaEx) $BackExtendNumLetEx;
-# special handling for CJK characters: chain for later dictionary segmentation
-$HangulSyllable $HangulSyllable;
-$KanaKanji $KanaKanji; #different rule status if both kanji and kana found
-
## -------------------------------------------------
!!safe_reverse;