| // Copyright (c) 2009 The Chromium Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| // |
| // Unit test compact language detector |
| // |
| // Small version, covering these languages only: |
| // Arabic Bulgarian Catalan Chinese ChineseT Croatian Czech Danish Dutch |
| // English Estonian Finnish French German Greek Hebrew Hindi Hungarian |
| // Icelandic Indonesian Italian Japanese Korean Latvian Lithuanian Norwegian |
| // Polish Portuguese Romanian Russian Serbian Slovak Slovenian Spanish |
| // Swedish Tagalog Thai Turkish Ukrainian Vietnamese |
| |
| // Additional single-language scripts recognized for free: |
| // Armenian Cherokee Dhivehi Georgian Gujarati Inuktitut Kannada Khmer |
| // Laothian Malayalam Oriya Punjabi Sinhalese Syriac Telugu Tamil |
| // |
| |
| #include <string> |
| #include "testing/gtest/include/gtest/gtest.h" |
| #include "encodings/compact_lang_det/compact_lang_det.h" |
| #include "encodings/compact_lang_det/ext_lang_enc.h" |
| #include "encodings/compact_lang_det/unittest_data.h" |
| |
| #include "encodings/compact_lang_det/win/cld_commandlineflags.h" |
| #include "encodings/compact_lang_det/win/cld_google.h" |
| |
| // Test strings. |
| // These are all included here to make the unit test self-contained. |
| const char* kTeststr_en = |
| "confiscation of goods is assigned as the penalty part most of the courts " |
| "consist of members and when it is necessary to bring public cases before a " |
| "jury of members two courts combine for the purpose the most important cases " |
| "of all are brought jurors or"; |
| |
| |
| // UTF8 constants. Use a UTF-8 aware editor for this file |
| const char* kTeststr_ks = |
| "\xe0\xa4\xa8\xe0\xa5\x87\xe0\xa4\xaa\xe0\xa4\xbe\xe0\xa4\xb2 \xe0\xa4\x8f" |
| "\xe0\xa4\xb8\xe0\xa4\xbf\xe0\xa4\xaf\xe0\xa4\xbe \xe0\xa4\xae\xe0\xa4\x82" |
| "\xe0\xa4\x9c \xe0\xa4\x85\xe0\xa4\x96 \xe0\xa4\xae\xe0\xa5\x81\xe0\xa4\xb2" |
| "\xe0\xa5\x81\xe0\xa4\x95 \xe0\xa4\xb0\xe0\xa4\xbe\xe0\xa4\x9c\xe0\xa4\xa7" |
| "\xe0\xa4\xbe\xe0\xa4\xa8\xe0\xa5\x80 \xe0\xa4\x95\xe0\xa4\xbe\xe0\xa4\xa0" |
| "\xe0\xa4\xae\xe0\xa4\xbe\xe0\xa4\xa1\xe0\xa5\x8c\xe0\xa4\x82 \xe0\xa4\xa8" |
| "\xe0\xa5\x87\xe0\xa4\xaa\xe0\xa4\xbe\xe0\xa4\xb2 \xe0\xa4\x85\xe0\xa4\xa7" |
| "\xe0\xa4\xbf\xe0\xa4\xb0\xe0\xa4\xbe\xe0\xa4\x9c\xe0\xa5\x8d\xe0\xa4\xaf " |
| "\xe0\xa4\xaa\xe0\xa5\x87\xe0\xa4\xb0\xe0\xa5\x87\xe0\xa4\x97\xe0\xa5\x8d" |
| "\xe0\xa4\xb5\xe0\xa4\xbe\xe0\xa4\xaf \xe0\xa4\xa6\xe0\xa4\x95\xe0\xa5\x8d" |
| "\xe0\xa4\xb7\xe0\xa4\xbf\xe0\xa4\xa3 \xe0\xa4\x85\xe0\xa4\xae\xe0\xa5\x87" |
| "\xe0\xa4\xb0\xe0\xa4\xbf\xe0\xa4\x95\xe0\xa4\xbe \xe0\xa4\xae\xe0\xa4\xb9" |
| "\xe0\xa4\xbe\xe0\xa4\xa6\xe0\xa5\x8d\xe0\xa4\xb5\xe0\xa5\x80\xe0\xa4\xaa" |
| "\xe0\xa5\x87 \xe0\xa4\xae\xe0\xa4\xa7\xe0\xa5\x8d \xe0\xa4\xaf\xe0\xa4\x95" |
| "\xe0\xa5\x8d\xe0\xa4\xb7\xe0\xa5\x87\xe0\xa4\xa4\xe0\xa5\x8d\xe0\xa4\xb0" |
| "\xe0\xa5\x87 \xe0\xa4\x8f\xe0\xa4\x95 \xe0\xa4\xa6\xe0\xa5\x87\xe0\xa4\xb6 " |
| "\xe0\xa4\x85\xe0\xa4\xb8\xe0\xa5\x8d \xe0\xa4\xa4\xe0\xa4\xbf \xe0\xa4\xab" |
| "\xe0\xa4\xa3\xe0\xa5\x80\xe0\xa4\xb6\xe0\xa5\x8d\xe0\xa4\xb5\xe0\xa4\xb0 " |
| "\xe0\xa4\xa8\xe0\xa4\xbe\xe0\xa4\xa5 \xe0\xa4\xb0\xe0\xa5\x87\xe0\xa4\xa3" |
| "\xe0\xa5\x81 \xe0\xa4\xab\xe0\xa4\xbf\xe0\xa4\x9c\xe0\xa5\x80 \xe0\xa4\x9b" |
| "\xe0\xa5\x81 \xe0\xa4\xa6\xe0\xa4\x95\xe0\xa5\x8d\xe0\xa4\xb7\xe0\xa4\xbf" |
| "\xe0\xa4\xa3 \xe0\xa4\xaa\xe0\xa5\x8d\xe0\xa4\xb0\xe0\xa4\xb6\xe0\xa4\xbe" |
| "\xe0\xa4\xa8\xe0\xa5\x8d \xe0\xa4\xa4 \xe0\xa4\xae\xe0\xa4\xb9\xe0\xa4\xbe" |
| "\xe0\xa4\xb8\xe0\xa4\xbe\xe0\xa4\x97\xe0\xa4\xb0 \xe0\xa4\xae\xe0\xa4\x82" |
| "\xe0\xa4\x9c \xe0\xa4\x85\xe0\xa4\x96 \xe0\xa4\xa6\xe0\xa5\x87\xe0\xa4\xb6 " |
| "\xe0\xa4\xac\xe0\xa4\xb9\xe0\xa4\xbe\xe0\xa4\xae\xe0\xa4\xbe\xe0\xa4\xb8 " |
| "\xe0\xa4\x9b\xe0\xa5\x81 \xe0\xa4\x95\xe0\xa5\x87\xe0\xa4\xb0\xe0\xa5\x87" |
| "\xe0\xa4\xac\xe0\xa4\xbf\xe0\xa4\xaf\xe0\xa4\xa8 \xe0\xa4\xae\xe0\xa4\x82" |
| "\xe0\xa4\x9c \xe0\xa4\x85\xe0\xa4\x96 \xe0\xa4\xae\xe0\xa5\x81\xe0\xa4\xb2" |
| "\xe0\xa5\x81\xe0\xa4\x96 \xe0\xa4\xb0\xe0\xa4\xbe\xe0\xa4\x9c\xe0\xa4\xa7" |
| "\xe0\xa4\xbe\xe0\xa4\xa8\xe0\xa5\x80 \xe0\xa4\xa8\xe0\xa4\xb8\xe0\xa5\x8c " |
| "\xe0\xa4\xb8\xe0\xa4\xae\xe0\xa5\x8d \xe0\xa4\xac\xe0\xa4\xa6\xe0\xa5\x8d" |
| "\xe0\xa4\x98 \xe0\xa4\xb5\xe0\xa4\xbf\xe0\xa4\xb7\xe0\xa4\xaf \xe0\xa4\xac" |
| "\xe0\xa5\x81\xe0\xa4\xb0\xe0\xa5\x81\xe0\xa4\x82\xe0\xa4\xa1\xe0\xa5\x80 " |
| "\xe0\xa4\x85\xe0\xa4\xab\xe0\xa5\x8d\xe0\xa4\xb0\xe0\xa5\x80\xe0\xa4\x95" |
| "\xe0\xa4\xbe \xe0\xa4\xae\xe0\xa4\xb9\xe0\xa4\xbe\xe0\xa4\xa6\xe0\xa5\x8d" |
| "\xe0\xa4\xb5\xe0\xa5\x80\xe0\xa4\xaa\xe0\xa5\x87 \xe0\xa4\xae\xe0\xa4\xa7" |
| "\xe0\xa5\x8d \xe0\xa4\xaf\xe0\xa4\x95\xe0\xa5\x8d\xe0\xa4\xb7\xe0\xa5\x87" |
| "\xe0\xa4\xa4\xe0\xa5\x8d\xe0\xa4\xb0\xe0\xa5\x87 \xe0\xa4\xa6\xe0\xa5\x87" |
| "\xe0\xa4\xb6 \xe0\xa4\x85\xe0\xa4\xb8\xe0\xa5\x8d \xe0\xa4\xa4\xe0\xa4\xbf " |
| "\xe0\xa4\xb8\xe0\xa4\xae\xe0\xa5\x8d \xe0\xa4\xac\xe0\xa4\xa6\xe0\xa5\x8d" |
| "\xe0\xa4\x98 \xe0\xa4\xb5\xe0\xa4\xbf\xe0\xa4\xb7\xe0\xa4\xaf"; |
| |
| // Test strings. This will be squeezed because of the repetitions. |
| const char* kTeststr_kr_repetitions = |
| "<meta charset=\"utf-8\" />\n\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b" |
| "\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97" |
| "\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a" |
| "\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5" |
| "\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98" |
| "\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6" |
| "\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec" |
| "\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec" |
| "\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d" |
| "\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93" |
| "\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96" |
| "\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1" |
| "\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f" |
| "\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad" |
| "\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96" |
| "\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9" |
| "\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6" |
| "\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8" |
| "\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82" |
| "\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99" |
| "\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac" |
| "\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b" |
| "\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8" |
| "\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb" |
| "\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea" |
| "\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5" |
| "\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9" |
| "\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3" |
| "\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b" |
| "\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97" |
| "\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a" |
| "\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5" |
| "\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98" |
| "\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6" |
| "\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec" |
| "\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec" |
| "\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d" |
| "\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93" |
| "\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96" |
| "\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1" |
| "\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f" |
| "\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad" |
| "\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96" |
| "\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9" |
| "\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6" |
| "\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8" |
| "\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82" |
| "\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99" |
| "\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac" |
| "\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b" |
| "\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8" |
| "\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb" |
| "\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea" |
| "\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5" |
| "\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9" |
| "\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3" |
| "\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b" |
| "\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97" |
| "\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a" |
| "\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5" |
| "\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98" |
| "\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6" |
| "\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec" |
| "\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec" |
| "\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d" |
| "\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93" |
| "\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96" |
| "\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1" |
| "\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f" |
| "\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad" |
| "\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96" |
| "\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9" |
| "\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6" |
| "\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8" |
| "\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82" |
| "\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99" |
| "\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac" |
| "\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b" |
| "\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8" |
| "\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb" |
| "\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea" |
| "\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5" |
| "\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9" |
| "\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3" |
| "\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b" |
| "\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97" |
| "\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a" |
| "\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5" |
| "\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98" |
| "\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6" |
| "\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec" |
| "\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec" |
| "\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d" |
| "\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93" |
| "\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96" |
| "\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1" |
| "\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f" |
| "\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad" |
| "\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96" |
| "\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9" |
| "\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6" |
| "\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8" |
| "\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82" |
| "\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99" |
| "\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac" |
| "\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b" |
| "\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8" |
| "\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb" |
| "\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea" |
| "\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5" |
| "\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9" |
| "\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3" |
| "\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b" |
| "\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97" |
| "\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a" |
| "\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5" |
| "\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98" |
| "\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6" |
| "\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec" |
| "\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec" |
| "\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d" |
| "\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93" |
| "\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96" |
| "\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1" |
| "\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f" |
| "\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad" |
| "\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96" |
| "\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9" |
| "\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6" |
| "\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8" |
| "\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82" |
| "\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99" |
| "\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac" |
| "\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b" |
| "\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8" |
| "\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb" |
| "\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea" |
| "\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5" |
| "\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9" |
| "\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3" |
| "\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b" |
| "\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97" |
| "\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a" |
| "\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5" |
| "\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98" |
| "\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6" |
| "\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec" |
| "\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec" |
| "\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d" |
| "\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93" |
| "\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96" |
| "\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1" |
| "\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f" |
| "\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad" |
| "\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96" |
| "\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9" |
| "\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6" |
| "\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8" |
| "\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82" |
| "\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99" |
| "\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac" |
| "\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b" |
| "\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8" |
| "\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb" |
| "\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea" |
| "\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5" |
| "\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9" |
| "\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3" |
| "\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b" |
| "\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97" |
| "\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a" |
| "\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5" |
| "\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98" |
| "\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6" |
| "\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec" |
| "\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec" |
| "\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d" |
| "\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93" |
| "\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96" |
| "\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1" |
| "\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f" |
| "\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad" |
| "\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96" |
| "\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9" |
| "\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6" |
| "\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8" |
| "\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82" |
| "\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99" |
| "\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac" |
| "\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b" |
| "\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8" |
| "\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb" |
| "\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea" |
| "\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5" |
| "\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9" |
| "\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3" |
| "\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b" |
| "\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97" |
| "\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a" |
| "\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5" |
| "\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98" |
| "\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6" |
| "\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec" |
| "\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec" |
| "\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d" |
| "\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93" |
| "\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96" |
| "\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1" |
| "\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f" |
| "\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad" |
| "\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96" |
| "\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9" |
| "\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6" |
| "\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8" |
| "\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82" |
| "\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99" |
| "\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac" |
| "\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b" |
| "\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8" |
| "\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb" |
| "\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea" |
| "\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5" |
| "\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9" |
| "\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3" |
| "\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b" |
| "\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97" |
| "\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a" |
| "\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5" |
| "\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98" |
| "\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6" |
| "\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec" |
| "\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec" |
| "\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d" |
| "\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93" |
| "\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96" |
| "\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1" |
| "\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f" |
| "\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad" |
| "\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96" |
| "\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9" |
| "\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6" |
| "\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8" |
| "\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82" |
| "\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99" |
| "\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb\xea\xac" |
| "\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b" |
| "\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8" |
| "\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9\x97\xbb" |
| "\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3\xb3\xea" |
| "\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b\x83\xe5" |
| "\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97\x90\xe9" |
| "\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a\xec\xb3" |
| "\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5\xe6\x8b" |
| "\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98\xe1\x97" |
| "\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6\xa8\x8a" |
| "\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec\xaa\xa5" |
| "\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec\x9b\x98" |
| "\xe1\x97\x90\xe9\x97\xbb\xea\xac\x83\xe8\x83\x96\xdf\xb1\xea\x9d\x99\xe6" |
| "\xa8\x8a\xec\xb3\xb3\xea\xa8\x9b\xe6\x82\x9c\xe9\xb9\x9f\xe9\x93\x80\xec" |
| "\xaa\xa5\xe6\x8b\x83\xe5\x9b\xa8\xe5\x99\xaa\xe6\xad\xad\xeb\x96\x84\xec" |
| "\x9b\x98\naaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" |
| "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" |
| "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" |
| "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" |
| "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" |
| "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" |
| "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" |
| "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" |
| "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" |
| "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" |
| "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" |
| "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" |
| "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" |
| "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" |
| "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" |
| "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" |
| "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" |
| "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" |
| "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" |
| "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" |
| "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" |
| "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" |
| "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" |
| "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" |
| "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" |
| "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" |
| "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" |
| "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" |
| "aaaaaaaaaaaaa"; |
| |
| |
| // const char* kTeststr_ks = |
| // \u0928\u0947\u092A\u093E\u0932\u0020\u090F\u0938\u093F\u092F\u093E\u0020 |
| // \u092E\u0902\u091C\u0020\u0905\u0916\u0020\u092E\u0941\u0932\u0941\u0915 |
| // \u0020\u0930\u093E\u091C\u0927\u093E\u0928\u0940\u0020\u0915\u093E\u0920 |
| // \u092E\u093E\u0921\u094C\u0902\u0020\u0928\u0947\u092A\u093E\u0932\u0020 |
| // \u0905\u0927\u093F\u0930\u093E\u091C\u094D\u092F\u0020\u092A\u0947\u0930 |
| // \u0947\u0917\u094D\u0935\u093E\u092F\u0020 |
| // \u0926\u0915\u094D\u0937\u093F\u0923\u0020\u0905\u092E\u0947\u0930\u093F |
| // \u0915\u093E\u0020\u092E\u0939\u093E\u0926\u094D\u0935\u0940\u092A\u0947 |
| // \u0020\u092E\u0927\u094D\u0020\u092F\u0915\u094D\u0937\u0947\u0924\u094D |
| // \u0930\u0947\u0020\u090F\u0915\u0020\u0926\u0947\u0936\u0020\u0905\u0938 |
| // \u094D\u0020\u0924\u093F\u0020\u092B\u0923\u0940\u0936\u094D\u0935\u0930 |
| // \u0020\u0928\u093E\u0925\u0020\u0930\u0947\u0923\u0941\u0020 |
| // \u092B\u093F\u091C\u0940\u0020\u091B\u0941\u0020\u0926\u0915\u094D\u0937 |
| // \u093F\u0923\u0020\u092A\u094D\u0930\u0936\u093E\u0928\u094D\u0020\u0924 |
| // \u0020\u092E\u0939\u093E\u0938\u093E\u0917\u0930\u0020\u092E\u0902\u091C |
| // \u0020\u0905\u0916\u0020\u0926\u0947\u0936\u0020\u092C\u0939\u093E\u092E |
| // \u093E\u0938\u0020\u091B\u0941\u0020\u0915\u0947\u0930\u0947\u092C\u093F |
| // \u092F\u0928\u0020\u092E\u0902\u091C\u0020 |
| // \u0905\u0916\u0020\u092E\u0941\u0932\u0941\u0916\u0020\u0930\u093E\u091C |
| // \u0927\u093E\u0928\u0940\u0020\u0928\u0938\u094C\u0020\u0938\u092E\u094D |
| // \u0020\u092C\u0926\u094D\u0918\u0020\u0935\u093F\u0937\u092F\u0020\u092C |
| // \u0941\u0930\u0941\u0902\u0921\u0940\u0020\u0905\u092B\u094D\u0930\u0940 |
| // \u0915\u093E\u0020\u092E\u0939\u093E\u0926\u094D\u0935\u0940\u092A\u0947 |
| // \u0020\u092E\u0927\u094D\u0020 |
| // \u092F\u0915\u094D\u0937\u0947\u0924\u094D\u0930\u0947\u0020\u0926\u0947 |
| // \u0936\u0020\u0905\u0938\u094D\u0020\u0924\u093F\u0020\u0938\u092E\u094D |
| // \u0020\u092C\u0926\u094D\u0918\u0020\u0935\u093F\u0937\u092F |
| |
| |
| namespace { |
| |
| class CompactLangDetTest : public testing::Test { |
| protected: |
| // Objects declared here can be used by all tests in the test case for Foo. |
| |
| // Detect language of plaintext src |
| Language TestCompactLangDetPlain(const char* src) { |
| bool is_plain_text = true; |
| bool is_reliable; |
| |
| Language lang = CompactLangDet::DetectLanguage(NULL, src, strlen(src), |
| is_plain_text, |
| &is_reliable); |
| return lang; |
| } |
| |
| |
| // Detect extended language of plaintext src |
| Language TestExtCompactLangDetPlain(const char* src) { |
| bool is_plain_text = true; |
| Language language3[3]; |
| int percent3[3]; |
| int text_bytes; |
| bool is_reliable; |
| |
| Language lang = CompactLangDet::ExtDetectLanguageSummary(NULL, |
| src, strlen(src), |
| is_plain_text, |
| language3, |
| percent3, |
| &text_bytes, |
| &is_reliable); |
| return lang; |
| } |
| }; // end class CompactLangDetTest |
| |
| |
| TEST_F(CompactLangDetTest, EasyTests) { |
| EXPECT_EQ(ENGLISH, TestCompactLangDetPlain(kTeststr_en)); |
| EXPECT_EQ(HINDI, TestCompactLangDetPlain(kTeststr_hi_Deva)); |
| } |
| |
| |
| TEST_F(CompactLangDetTest, FullTests) { |
| // Only the tests reflecting the currently used detection tables are enabled. |
| |
| // Do all the languages in all their scripts |
| //// EXPECT_EQ(AFAR, TestCompactLangDetPlain(kTeststr_aa_Latn)); |
| //// EXPECT_EQ(ABKHAZIAN, TestCompactLangDetPlain(kTeststr_ab_Cyrl)); |
| EXPECT_EQ(AFRIKAANS, TestCompactLangDetPlain(kTeststr_af_Latn)); |
| //// EXPECT_EQ(AMHARIC, TestCompactLangDetPlain(kTeststr_am_Ethi)); |
| EXPECT_EQ(ARABIC, TestCompactLangDetPlain(kTeststr_ar_Arab)); |
| //// EXPECT_EQ(ASSAMESE, TestCompactLangDetPlain(kTeststr_as_Beng)); |
| //// EXPECT_EQ(AYMARA, TestCompactLangDetPlain(kTeststr_ay_Latn)); |
| // AZERBAIJANI Arab & Cyrl removed 2008.05.27. Just AZERBAIJANI Latn left |
| // EXPECT_EQ(AZERBAIJANI, TestCompactLangDetPlain(kTeststr_az_Arab)); |
| // Missing data: az-Cyrl |
| //// EXPECT_EQ(AZERBAIJANI, TestCompactLangDetPlain(kTeststr_az_Latn)); |
| |
| //// EXPECT_EQ(BASHKIR, TestCompactLangDetPlain(kTeststr_ba_Cyrl)); |
| EXPECT_EQ(BELARUSIAN, TestCompactLangDetPlain(kTeststr_be_Cyrl)); |
| EXPECT_EQ(BULGARIAN, TestCompactLangDetPlain(kTeststr_bg_Cyrl)); |
| //// EXPECT_EQ(BIHARI, TestCompactLangDetPlain(kTeststr_bh_Deva)); |
| //// EXPECT_EQ(BISLAMA, TestCompactLangDetPlain(kTeststr_bi_Latn)); |
| //// EXPECT_EQ(BENGALI, TestCompactLangDetPlain(kTeststr_bn_Beng)); |
| |
| //// EXPECT_EQ(TIBETAN, TestCompactLangDetPlain(kTeststr_bo_Tibt)); |
| //// EXPECT_EQ(BRETON, TestCompactLangDetPlain(kTeststr_br_Latn)); |
| EXPECT_EQ(SERBIAN, TestCompactLangDetPlain(kTeststr_bs_Cyrl)); // NOTE: Not BOSNIAN |
| //// EXPECT_EQ(CROATIAN, TestCompactLangDetPlain(kTeststr_bs_Latn)); // NOTE: Not BOSNIAN |
| |
| EXPECT_EQ(CATALAN, TestCompactLangDetPlain(kTeststr_ca_Latn)); |
| EXPECT_EQ(CHEROKEE, TestCompactLangDetPlain(kTeststr_chr_Cher)); |
| //// EXPECT_EQ(CORSICAN, TestCompactLangDetPlain(kTeststr_co_Latn)); |
| // No CREOLES_AND_PIDGINS_ENGLISH_BASED |
| // No CREOLES_AND_PIDGINS_FRENCH_BASED |
| // No CREOLES_AND_PIDGINS_OTHER |
| // No CREOLES_AND_PIDGINS_PORTUGUESE_BASED |
| EXPECT_EQ(CZECH, TestCompactLangDetPlain(kTeststr_cs_Latn)); |
| EXPECT_EQ(WELSH, TestCompactLangDetPlain(kTeststr_cy_Latn)); |
| |
| EXPECT_EQ(DANISH, TestCompactLangDetPlain(kTeststr_da_Latn)); |
| EXPECT_EQ(GERMAN, TestCompactLangDetPlain(kTeststr_de_Latn)); |
| EXPECT_EQ(DHIVEHI, TestCompactLangDetPlain(kTeststr_dv_Thaa)); |
| //// EXPECT_EQ(DZONGKHA, TestCompactLangDetPlain(kTeststr_dz_Tibt)); |
| |
| EXPECT_EQ(GREEK, TestCompactLangDetPlain(kTeststr_el_Grek)); |
| EXPECT_EQ(ENGLISH, TestCompactLangDetPlain(kTeststr_en_Latn)); |
| //// EXPECT_EQ(ESPERANTO, TestCompactLangDetPlain(kTeststr_eo_Latn)); |
| EXPECT_EQ(SPANISH, TestCompactLangDetPlain(kTeststr_es_Latn)); |
| EXPECT_EQ(ESTONIAN, TestCompactLangDetPlain(kTeststr_et_Latn)); |
| //// EXPECT_EQ(BASQUE, TestCompactLangDetPlain(kTeststr_eu_Latn)); |
| |
| EXPECT_EQ(PERSIAN, TestCompactLangDetPlain(kTeststr_fa_Arab)); |
| EXPECT_EQ(FINNISH, TestCompactLangDetPlain(kTeststr_fi_Latn)); |
| //// EXPECT_EQ(FIJIAN, TestCompactLangDetPlain(kTeststr_fj_Latn)); |
| //// EXPECT_EQ(FAROESE, TestCompactLangDetPlain(kTeststr_fo_Latn)); |
| EXPECT_EQ(FRENCH, TestCompactLangDetPlain(kTeststr_fr_Latn)); |
| //// EXPECT_EQ(FRISIAN, TestCompactLangDetPlain(kTeststr_fy_Latn)); |
| |
| EXPECT_EQ(IRISH, TestCompactLangDetPlain(kTeststr_ga_Latn)); |
| //// EXPECT_EQ(SCOTS_GAELIC, TestCompactLangDetPlain(kTeststr_gd_Latn)); |
| //// EXPECT_EQ(GALICIAN, TestCompactLangDetPlain(kTeststr_gl_Latn)); |
| //// EXPECT_EQ(GUARANI, TestCompactLangDetPlain(kTeststr_gn_Latn)); |
| EXPECT_EQ(GUJARATI, TestCompactLangDetPlain(kTeststr_gu_Gujr)); |
| //// EXPECT_EQ(MANX, TestCompactLangDetPlain(kTeststr_gv_Latn)); |
| |
| //// EXPECT_EQ(HAUSA, TestCompactLangDetPlain(kTeststr_ha_Latn)); |
| EXPECT_EQ(HINDI, TestCompactLangDetPlain(kTeststr_hi_Deva)); |
| EXPECT_EQ(CROATIAN, TestCompactLangDetPlain(kTeststr_hr_Latn)); // NOTE: now CROATIAN |
| //// EXPECT_EQ(HAITIAN_CREOLE, TestCompactLangDetPlain(kTeststr_ht_Latn)); |
| EXPECT_EQ(HUNGARIAN, TestCompactLangDetPlain(kTeststr_hu_Latn)); |
| EXPECT_EQ(ARMENIAN, TestCompactLangDetPlain(kTeststr_hy_Armn)); |
| |
| //// EXPECT_EQ(INTERLINGUA, TestCompactLangDetPlain(kTeststr_ia_Latn)); |
| EXPECT_EQ(MALAY, TestCompactLangDetPlain(kTeststr_id_Latn)); |
| //// EXPECT_EQ(INTERLINGUE, TestCompactLangDetPlain(kTeststr_ie_Latn)); |
| //// EXPECT_EQ(INUPIAK, TestCompactLangDetPlain(kTeststr_ik_Latn)); |
| EXPECT_EQ(ICELANDIC, TestCompactLangDetPlain(kTeststr_is_Latn)); |
| EXPECT_EQ(ITALIAN, TestCompactLangDetPlain(kTeststr_it_Latn)); |
| EXPECT_EQ(INUKTITUT, TestCompactLangDetPlain(kTeststr_iu_Cans)); |
| EXPECT_EQ(HEBREW, TestCompactLangDetPlain(kTeststr_iw_Hebr)); |
| |
| EXPECT_EQ(JAPANESE, TestCompactLangDetPlain(kTeststr_ja_Hani)); |
| //// EXPECT_EQ(JAVANESE, TestCompactLangDetPlain(kTeststr_jw_Latn)); |
| |
| EXPECT_EQ(GEORGIAN, TestCompactLangDetPlain(kTeststr_ka_Geor)); |
| //// EXPECT_EQ(KHASI, TestCompactLangDetPlain(kTeststr_kha_Latn)); |
| //// EXPECT_EQ(KAZAKH, TestCompactLangDetPlain(kTeststr_kk_Arab)); |
| //// EXPECT_EQ(KAZAKH, TestCompactLangDetPlain(kTeststr_kk_Cyrl)); |
| //// EXPECT_EQ(KAZAKH, TestCompactLangDetPlain(kTeststr_kk_Latn)); |
| //// EXPECT_EQ(GREENLANDIC, TestCompactLangDetPlain(kTeststr_kl_Latn)); |
| EXPECT_EQ(KHMER, TestCompactLangDetPlain(kTeststr_km_Khmr)); |
| EXPECT_EQ(KANNADA, TestCompactLangDetPlain(kTeststr_kn_Knda)); |
| EXPECT_EQ(KOREAN, TestCompactLangDetPlain(kTeststr_ko_Hani)); |
| //// EXPECT_EQ(KASHMIRI, TestCompactLangDetPlain(kTeststr_ks_Deva)); |
| // KURDISH Latn removed 2008.05.27. Just KURDISH Arab left |
| //// EXPECT_EQ(KURDISH, TestCompactLangDetPlain(kTeststr_ku_Arab)); |
| // EXPECT_EQ(KURDISH, TestCompactLangDetPlain(kTeststr_ku_Latn)); |
| //// EXPECT_EQ(KYRGYZ, TestCompactLangDetPlain(kTeststr_ky_Arab)); |
| //// EXPECT_EQ(KYRGYZ, TestCompactLangDetPlain(kTeststr_ky_Cyrl)); |
| |
| //// EXPECT_EQ(LATIN, TestCompactLangDetPlain(kTeststr_la_Latn)); |
| //// EXPECT_EQ(LUXEMBOURGISH, TestCompactLangDetPlain(kTeststr_lb_Latn)); |
| //// EXPECT_EQ(GANDA, TestCompactLangDetPlain(kTeststr_lg_Latn)); |
| //// EXPECT_EQ(LINGALA, TestCompactLangDetPlain(kTeststr_ln_Latn)); |
| EXPECT_EQ(LAOTHIAN, TestCompactLangDetPlain(kTeststr_lo_Laoo)); |
| EXPECT_EQ(LITHUANIAN, TestCompactLangDetPlain(kTeststr_lt_Latn)); |
| EXPECT_EQ(LATVIAN, TestCompactLangDetPlain(kTeststr_lv_Latn)); |
| |
| //// EXPECT_EQ(MALAGASY, TestCompactLangDetPlain(kTeststr_mg_Latn)); |
| //// EXPECT_EQ(MAORI, TestCompactLangDetPlain(kTeststr_mi_Latn)); |
| EXPECT_EQ(MACEDONIAN, TestCompactLangDetPlain(kTeststr_mk_Cyrl)); |
| EXPECT_EQ(MALAYALAM, TestCompactLangDetPlain(kTeststr_ml_Mlym)); |
| //// EXPECT_EQ(MONGOLIAN, TestCompactLangDetPlain(kTeststr_mn_Cyrl)); |
| //// EXPECT_EQ(MOLDAVIAN, TestCompactLangDetPlain(kTeststr_mo_Cyrl)); |
| //// EXPECT_EQ(MARATHI, TestCompactLangDetPlain(kTeststr_mr_Deva)); |
| EXPECT_EQ(MALAY, TestCompactLangDetPlain(kTeststr_ms_Latn)); |
| // EXPECT_EQ(MALAY, TestCompactLangDetPlain(kTeststr_ms_Latn2)); |
| EXPECT_EQ(MALAY, TestCompactLangDetPlain(kTeststr_ms_Latn3)); |
| //// EXPECT_EQ(MALTESE, TestCompactLangDetPlain(kTeststr_mt_Latn)); |
| //// EXPECT_EQ(BURMESE, TestCompactLangDetPlain(kTeststr_my_Latn)); |
| //// EXPECT_EQ(BURMESE, TestCompactLangDetPlain(kTeststr_my_Mymr)); |
| |
| //// EXPECT_EQ(NAURU, TestCompactLangDetPlain(kTeststr_na_Latn)); |
| //// EXPECT_EQ(NEPALI, TestCompactLangDetPlain(kTeststr_ne_Deva)); |
| EXPECT_EQ(DUTCH, TestCompactLangDetPlain(kTeststr_nl_Latn)); |
| //// EXPECT_EQ(NORWEGIAN_N, TestCompactLangDetPlain(kTeststr_nn_Latn)); |
| EXPECT_EQ(NORWEGIAN, TestCompactLangDetPlain(kTeststr_no_Latn)); |
| |
| //// EXPECT_EQ(OCCITAN, TestCompactLangDetPlain(kTeststr_oc_Latn)); |
| //// EXPECT_EQ(OROMO, TestCompactLangDetPlain(kTeststr_om_Latn)); |
| EXPECT_EQ(ORIYA, TestCompactLangDetPlain(kTeststr_or_Orya)); |
| |
| EXPECT_EQ(PUNJABI, TestCompactLangDetPlain(kTeststr_pa_Guru)); |
| EXPECT_EQ(POLISH, TestCompactLangDetPlain(kTeststr_pl_Latn)); |
| //// EXPECT_EQ(PASHTO, TestCompactLangDetPlain(kTeststr_ps_Arab)); |
| EXPECT_EQ(PORTUGUESE, TestCompactLangDetPlain(kTeststr_pt_BR)); // NOTE: not PORTUGUESE_B |
| // nor PORTUGUESE_P |
| |
| //// EXPECT_EQ(QUECHUA, TestCompactLangDetPlain(kTeststr_qu_Latn)); |
| |
| //// EXPECT_EQ(RHAETO_ROMANCE, TestCompactLangDetPlain(kTeststr_rm_Latn)); |
| //// EXPECT_EQ(RUNDI, TestCompactLangDetPlain(kTeststr_rn_Latn)); |
| EXPECT_EQ(ROMANIAN, TestCompactLangDetPlain(kTeststr_ro_Latn)); |
| EXPECT_EQ(RUSSIAN, TestCompactLangDetPlain(kTeststr_ru_Cyrl)); |
| //// EXPECT_EQ(KINYARWANDA, TestCompactLangDetPlain(kTeststr_rw_Latn)); |
| |
| //// EXPECT_EQ(SANSKRIT, TestCompactLangDetPlain(kTeststr_sa_Deva)); |
| //// EXPECT_EQ(SANSKRIT, TestCompactLangDetPlain(kTeststr_sa_Latn)); |
| //// EXPECT_EQ(SCOTS, TestCompactLangDetPlain(kTeststr_sco_Latn)); |
| //// EXPECT_EQ(SINDHI, TestCompactLangDetPlain(kTeststr_sd_Arab)); |
| //// EXPECT_EQ(SANGO, TestCompactLangDetPlain(kTeststr_sg_Latn)); |
| // No SERBO_CROATIAN (sh) |
| EXPECT_EQ(SINHALESE, TestCompactLangDetPlain(kTeststr_si_Sinh)); |
| //// EXPECT_EQ(LIMBU, TestCompactLangDetPlain(kTeststr_sit_NP)); |
| EXPECT_EQ(SLOVAK, TestCompactLangDetPlain(kTeststr_sk_Latn)); |
| EXPECT_EQ(SLOVENIAN, TestCompactLangDetPlain(kTeststr_sl_Latn)); |
| //// EXPECT_EQ(SAMOAN, TestCompactLangDetPlain(kTeststr_sm_Latn)); |
| //// EXPECT_EQ(SHONA, TestCompactLangDetPlain(kTeststr_sn_Latn)); |
| //// EXPECT_EQ(SOMALI, TestCompactLangDetPlain(kTeststr_so_Latn)); |
| //// EXPECT_EQ(ALBANIAN, TestCompactLangDetPlain(kTeststr_sq_Latn)); |
| EXPECT_EQ(SERBIAN, TestCompactLangDetPlain(kTeststr_sr_Cyrl)); // NOTE: now SERBIAN |
| EXPECT_EQ(CROATIAN, TestCompactLangDetPlain(kTeststr_sr_Latn)); // NOTE: Not SERBIAN |
| EXPECT_EQ(CROATIAN, TestCompactLangDetPlain(kTeststr_sr_ME_Latn)); // NOTE: not SERBIAN nor MONTENEGRIN |
| //// EXPECT_EQ(SISWANT, TestCompactLangDetPlain(kTeststr_ss_Latn)); |
| //// EXPECT_EQ(SESOTHO, TestCompactLangDetPlain(kTeststr_st_Latn)); |
| //// EXPECT_EQ(SUNDANESE, TestCompactLangDetPlain(kTeststr_su_Latn)); |
| EXPECT_EQ(SWEDISH, TestCompactLangDetPlain(kTeststr_sv_Latn)); |
| EXPECT_EQ(SWAHILI, TestCompactLangDetPlain(kTeststr_sw_Latn)); |
| EXPECT_EQ(SYRIAC, TestCompactLangDetPlain(kTeststr_syr_Syrc)); |
| |
| EXPECT_EQ(TAMIL, TestCompactLangDetPlain(kTeststr_ta_Taml)); |
| EXPECT_EQ(TELUGU, TestCompactLangDetPlain(kTeststr_te_Telu)); |
| // Tajik Arab removed 2008.05.27. Just Tajik Cyrl left |
| // EXPECT_EQ(TAJIK, TestCompactLangDetPlain(kTeststr_tg_Arab)); |
| //// EXPECT_EQ(TAJIK, TestCompactLangDetPlain(kTeststr_tg_Cyrl)); |
| EXPECT_EQ(THAI, TestCompactLangDetPlain(kTeststr_th_Thai)); |
| //// EXPECT_EQ(TIGRINYA, TestCompactLangDetPlain(kTeststr_ti_Ethi)); |
| //// EXPECT_EQ(TURKMEN, TestCompactLangDetPlain(kTeststr_tk_Cyrl)); |
| //// EXPECT_EQ(TURKMEN, TestCompactLangDetPlain(kTeststr_tk_Latn)); |
| EXPECT_EQ(TAGALOG, TestCompactLangDetPlain(kTeststr_tl_Latn)); |
| //// EXPECT_EQ(TSWANA, TestCompactLangDetPlain(kTeststr_tn_Latn)); |
| //// EXPECT_EQ(TONGA, TestCompactLangDetPlain(kTeststr_to_Latn)); |
| EXPECT_EQ(TURKISH, TestCompactLangDetPlain(kTeststr_tr_Latn)); |
| //// EXPECT_EQ(TSONGA, TestCompactLangDetPlain(kTeststr_ts_Latn)); |
| //// EXPECT_EQ(TATAR, TestCompactLangDetPlain(kTeststr_tt_Cyrl)); |
| //// EXPECT_EQ(TATAR, TestCompactLangDetPlain(kTeststr_tt_Latn)); |
| //// EXPECT_EQ(TWI, TestCompactLangDetPlain(kTeststr_tw_Latn)); |
| |
| //// EXPECT_EQ(UIGHUR, TestCompactLangDetPlain(kTeststr_ug_Arab)); |
| //// EXPECT_EQ(UIGHUR, TestCompactLangDetPlain(kTeststr_ug_Cyrl)); |
| //// EXPECT_EQ(UIGHUR, TestCompactLangDetPlain(kTeststr_ug_Latn)); |
| EXPECT_EQ(UKRAINIAN, TestCompactLangDetPlain(kTeststr_uk_Cyrl)); |
| //// EXPECT_EQ(URDU, TestCompactLangDetPlain(kTeststr_ur_Arab)); |
| //// EXPECT_EQ(UZBEK, TestCompactLangDetPlain(kTeststr_uz_Arab)); |
| //// EXPECT_EQ(UZBEK, TestCompactLangDetPlain(kTeststr_uz_Cyrl)); |
| //// EXPECT_EQ(UZBEK, TestCompactLangDetPlain(kTeststr_uz_Latn)); |
| |
| EXPECT_EQ(VIETNAMESE, TestCompactLangDetPlain(kTeststr_vi_Latn)); |
| //// EXPECT_EQ(VOLAPUK, TestCompactLangDetPlain(kTeststr_vo_Latn)); |
| |
| //// EXPECT_EQ(WOLOF, TestCompactLangDetPlain(kTeststr_wo_Latn)); |
| |
| //// EXPECT_EQ(XHOSA, TestCompactLangDetPlain(kTeststr_xh_Latn)); |
| |
| EXPECT_EQ(YIDDISH, TestCompactLangDetPlain(kTeststr_yi_Hebr)); |
| //// EXPECT_EQ(YORUBA, TestCompactLangDetPlain(kTeststr_yo_Latn)); |
| |
| // Zhuang Hani removed 2008.05.13. Just Zhuang Latn left |
| // EXPECT_EQ(ZHUANG, TestCompactLangDetPlain(kTeststr_za_Hani)); |
| //// EXPECT_EQ(ZHUANG, TestCompactLangDetPlain(kTeststr_za_Latn)); |
| EXPECT_EQ(CHINESE, TestCompactLangDetPlain(kTeststr_zh_Hani)); |
| EXPECT_EQ(CHINESE_T, TestCompactLangDetPlain(kTeststr_zh_TW)); |
| //// EXPECT_EQ(ZULU, TestCompactLangDetPlain(kTeststr_zu_Latn)); |
| // No TG_UNKNOWN_LANGUAGE |
| // No UNKNOWN_LANGUAGE |
| |
| // This test should be executed with ASAN. |
| EXPECT_EQ(KOREAN, TestCompactLangDetPlain(kTeststr_kr_repetitions)); |
| } |
| |
| |
| TEST_F(CompactLangDetTest, ExtendedTests) { |
| // Do the extended languages, with them not-allowed then allowed |
| // These turn out to be extraordinarily sensitive forms of garbage bytes |
| //// EXPECT_EQ(ENGLISH, TestCompactLangDetPlain(kTeststr_tlh_Latn)); |
| //// EXPECT_EQ(X_KLINGON, TestExtCompactLangDetPlain(kTeststr_tlh_Latn)); |
| |
| //// EXPECT_EQ(ENGLISH, TestCompactLangDetPlain(kTeststr_zzp_Latn)); |
| //// EXPECT_EQ(X_PIG_LATIN, TestExtCompactLangDetPlain(kTeststr_zzp_Latn)); |
| |
| //// EXPECT_EQ(ENGLISH, TestCompactLangDetPlain(kTeststr_xx_Bugi)); |
| //// EXPECT_EQ(X_BUGINESE, TestExtCompactLangDetPlain(kTeststr_xx_Bugi)); |
| |
| //// EXPECT_EQ(ENGLISH, TestCompactLangDetPlain(kTeststr_xx_Goth)); |
| //// EXPECT_EQ(X_GOTHIC, TestExtCompactLangDetPlain(kTeststr_xx_Goth)); |
| |
| // Next three now removed permanently from probability tables (May 2008) |
| // (used to be X_BORK_BORK_BORK, X_ELMER_FUDD, X_HACKER). |
| // |
| // Small changes in probability tables may cause these non-texts to |
| // change detection result. If that happens, cross-check that |
| // the new result is not because of a bug, then change the expected values. |
| EXPECT_EQ(ENGLISH, TestCompactLangDetPlain(kTeststr_zzb_Latn)); |
| EXPECT_EQ(ENGLISH, TestExtCompactLangDetPlain(kTeststr_zzb_Latn)); |
| |
| EXPECT_EQ(ENGLISH, TestCompactLangDetPlain(kTeststr_zze_Latn)); |
| EXPECT_EQ(ENGLISH, TestExtCompactLangDetPlain(kTeststr_zze_Latn)); |
| |
| //// EXPECT_EQ(ENGLISH, TestCompactLangDetPlain(kTeststr_zzh_Latn)); |
| //// EXPECT_EQ(ENGLISH, TestExtCompactLangDetPlain(kTeststr_zzh_Latn)); |
| } |
| |
| |
| } // End namespace |
| |
| #if !defined(CLD_WINDOWS) |
| int main(int argc, char** argv) { |
| FLAGS_logtostderr = true; |
| InitGoogle("Unit test for CLD small", &argc, &argv, false); |
| return RUN_ALL_TESTS(); |
| } |
| #endif |