blob: 713b39e6ca0299033126b9272fe40f8f522adf2e [file] [log] [blame]
diff --git a/source/data/brkitr/brklocal.mk b/source/data/brkitr/brklocal.mk
index b5eca75..2a75a9e 100644
--- a/source/data/brkitr/brklocal.mk
+++ b/source/data/brkitr/brklocal.mk
@@ -34,7 +34,7 @@ BRK_RES_ALIAS_SOURCE = $(BRK_RES_SYNTHETIC_ALIAS)
# List of dictionary files (dict).
-BRK_DICT_SOURCE = burmesedict.txt cjdict.txt khmerdict.txt laodict.txt\
+BRK_DICT_SOURCE = burmesedict.txt khmerdict.txt laodict.txt\
thaidict.txt
@@ -42,7 +42,7 @@ BRK_DICT_SOURCE = burmesedict.txt cjdict.txt khmerdict.txt laodict.txt\
BRK_SOURCE = char.txt line.txt\
line_normal.txt line_normal_cj.txt line_normal_fi.txt\
line_loose_cj.txt\
- sent.txt sent_el.txt title.txt word.txt
+ sent.txt sent_el.txt title.txt word.txt word_ja.txt
# Ordinary resources
diff --git a/source/data/brkitr/ja.txt b/source/data/brkitr/ja.txt
index 2e9a1c8..cb732a7 100644
--- a/source/data/brkitr/ja.txt
+++ b/source/data/brkitr/ja.txt
@@ -7,5 +7,6 @@ ja{
line_loose:process(dependency){"line_loose_cj.brk"}
line_normal:process(dependency){"line_normal_cj.brk"}
line_strict:process(dependency){"line.brk"}
+ word:process(dependency){"word_ja.brk"}
}
}
diff --git a/source/data/brkitr/root.txt b/source/data/brkitr/root.txt
index 1a1ad8a..c790282 100644
--- a/source/data/brkitr/root.txt
+++ b/source/data/brkitr/root.txt
@@ -13,9 +13,6 @@ root{
word:process(dependency){"word.brk"}
}
dictionaries{
- Hani:process(dependency){"cjdict.dict"}
- Hira:process(dependency){"cjdict.dict"}
- Kana:process(dependency){"cjdict.dict"}
Khmr:process(dependency){"khmerdict.dict"}
Laoo:process(dependency){"laodict.dict"}
Mymr:process(dependency){"burmesedict.dict"}
diff --git a/source/data/brkitr/rules/word.txt b/source/data/brkitr/rules/word.txt
index 9c93dd5..eb150ea 100644
--- a/source/data/brkitr/rules/word.txt
+++ b/source/data/brkitr/rules/word.txt
@@ -71,11 +71,9 @@ $Control = [\p{Grapheme_Cluster_Break = Control}];
$HangulSyllable = [\uac00-\ud7a3];
$ComplexContext = [:LineBreak = Complex_Context:];
$KanaKanji = [$Han $Hiragana $Katakana];
-$dictionaryCJK = [$KanaKanji $HangulSyllable];
-$dictionary = [$ComplexContext $dictionaryCJK];
+$dictionary = [$ComplexContext];
-# leave CJK scripts out of ALetterPlus
-$ALetterPlus = [$ALetter-$dictionaryCJK [$ComplexContext-$Extend-$Control]];
+$ALetterPlus = [$ALetter [$ComplexContext-$Extend-$Control]];
#
@@ -194,11 +192,6 @@ $ExtendNumLetEx $KatakanaEx {400}; # (13b)
#
^$Regional_IndicatorEx $Regional_IndicatorEx;
-# special handling for CJK characters: chain for later dictionary segmentation
-$HangulSyllable $HangulSyllable {200};
-$KanaKanji $KanaKanji {400}; # different rule status if both kana and kanji found
-
-
## -------------------------------------------------
!!reverse;
@@ -265,10 +258,6 @@ $BackKatakanaEx $BackKatakanaEx;
$BackExtendNumLetEx ($BackALetterEx | $BackHebrew_LetterEx | $BackNumericEx | $BackKatakanaEx | $BackExtendNumLetEx);
($BackALetterEx | $BackHebrew_LetterEx | $BackNumericEx | $BackKatakanaEx) $BackExtendNumLetEx;
-# special handling for CJK characters: chain for later dictionary segmentation
-$HangulSyllable $HangulSyllable;
-$KanaKanji $KanaKanji; #different rule status if both kanji and kana found
-
# rule 14
$E_Modifier ($Format | $Extend | $ZWJ)* ($E_Base | $EBG);