diff --git a/script/BuildLangModel.py b/script/BuildLangModel.py index 08c3b83..f7e6243 100755 --- a/script/BuildLangModel.py +++ b/script/BuildLangModel.py @@ -43,6 +43,7 @@ import unicodedata import subprocess import wikipedia import importlib +import math import optparse import datetime import operator @@ -452,6 +453,57 @@ for charset in charsets: CTOM_str += ' */\n\n' c_code += CTOM_str +## UNICODE frequency. + +# Since we can't map the full character table from encoding to order, +# just create a list from the most common characters from the language. +# The list is ordered by unicode code points (hence can be used +# generically for various encoding scheme as it is not encoding +# specific) allowing to search from code points efficiently by a divide +# and conqueer search algorithm. +# Each code point is immediately followed by its order. + +# Keep the freq_count more frequent characters. +sorted_chars = [(char, freq, order) for order, (char, freq) in + enumerate(sorted_ratios)][:freq_count] +max_order = len(sorted_chars) + +# Add equivalency characters. +equivalent = [] +if lang.case_mapping: + for char, ratio, order in sorted_chars: + uppercased = chr(char).upper() + try: + if char != ord(uppercased): + equivalent += [(ord(uppercased), ratio, order)] + except TypeError: + # This happens for some case such as 'SS' as uppercase of 'ß'. + # Just ignore such cases. + sys.stderr.write("Ignoring '{}' as uppercase equivalent of '{}'.\n".format(uppercased, char)) + sorted_chars += equivalent + +# Order by code point. +sorted_chars = sorted(sorted_chars, key=operator.itemgetter(0)) + +CTOM_str = 'static const int Unicode_Char_size = {};\n'.format(len(sorted_chars)) + +CTOM_str += 'static const unsigned int Unicode_CharOrder[]' +CTOM_str += ' =\n{' +column = 0 + +max_char_width = math.floor(math.log10(sorted_chars[-1][0])) + 1 +max_order_width = math.floor(math.log10(max_order)) + 1 + +for char, ratio, order in sorted_chars: + if column % 8 == 0: + CTOM_str += '\n ' + column += 1 + CTOM_str += '{}{:>{width}}, '.format('' if column % 8 == 0 else ' ', char, width=max_char_width) + CTOM_str += '{:>{width}},'.format(order, width=max_order_width) + +CTOM_str += '\n};\n\n' +c_code += CTOM_str + ########### SEQUENCES ########### ratios = {} @@ -533,11 +585,22 @@ for charset in charsets: SM_str += '\n {},'.format(freq_count) SM_str += '\n (float){},'.format(ratio_512) SM_str += '\n {},'.format('PR_TRUE' if lang.use_ascii else 'PR_FALSE') - SM_str += '\n "{},"'.format(charset) + SM_str += '\n "{}",'.format(charset) SM_str += '\n "{}"'.format(lang.code) SM_str += '\n};' c_code += SM_str +SM_str = '\n\nconst LanguageModel {}Model ='.format(language_c) +SM_str += '\n{' +SM_str += '\n "{}",'.format(lang.code) +SM_str += '\n Unicode_CharOrder,' +SM_str += '\n {},'.format(len(sorted_chars)) # Order is wrong! +SM_str += '\n {}LangModel,'.format(language_c) +SM_str += '\n {},'.format(freq_count) +SM_str += '\n (float){},'.format(ratio_512) +SM_str += '\n};' +c_code += SM_str + c_code += '\n' lang_model_file = current_dir + '/../src/LangModels/Lang{}Model.cpp'.format(language_c) diff --git a/script/BuildLangModelLogs/LangArabicModel.log b/script/BuildLangModelLogs/LangArabicModel.log index 564c611..b7e318f 100644 --- a/script/BuildLangModelLogs/LangArabicModel.log +++ b/script/BuildLangModelLogs/LangArabicModel.log @@ -1,142 +1,192 @@ = Logs of language model for Arabic (ar) = - Generated by BuildLangModel.py -- Started: 2015-12-13 18:31:12.817808 -- Maximum depth: 2 -- Max number of pages: 50 +- Started: 2021-03-16 11:33:00.432776 +- Maximum depth: 4 +- Max number of pages: 100 == Parsed pages == -الصفحة_الرئيسية (revision 17217037) -11 ديسمبر (revision 17699159) -12 ديسمبر (revision 17710194) -13 ديسمبر (revision 17722318) -1437 هـ (revision 17278274) -14 ديسمبر (revision 17432010) -15 ديسمبر (revision 17206233) -1645 (revision 17168144) -1954 (revision 17409780) -1955 (revision 16826533) -1972 (revision 17004868) -1988 (revision 17671285) -2003 (revision 17656994) -2011 (revision 17589601) -2015 (revision 17678287) -216 ق.م (revision 17586752) -25 يناير (revision 17325864) -2 ربيع الأول (revision 17722146) -6 (عدد) (revision 16972178) -آريز (revision 17466671) -آلهة اليونان (revision 17722617) -أثينا (revision 17642941) -أثينا (ميثولوجيا) (revision 17662932) -أزمة المهاجرين إلى أوروبا (revision 17718437) -أوروبا (revision 17713457) -إس سي إي سانتا مونيكا ستوديو (revision 17035439) -إسبارطة (revision 16733170) -إسماعيل الصفوي (revision 17194218) -إله الحرب (لعبة فيديو) (revision 17630201) -إمارة دبي (revision 17602037) -إيطاليا (revision 17586853) -اتفاق باريس (revision 17718086) -الأزمة الليبية (revision 17630232) -الإمارات العربية المتحدة (revision 17722077) -الإنتخابات البلدية السعودية 2015 (revision 17722004) -الاتحاد الأوروبي لكرة القدم (revision 17596822) -الاحتجاجات اللبنانية 2015 (revision 17315127) -الانتفاضة الفلسطينية (2015) (revision 17710414) -التمرد العراقي (revision 17708640) -الجمعية العامة للأمم المتحدة (revision 17304227) -الجمهورية الرومانية (revision 16472557) -الجيش اللبناني (revision 17516533) -الحرب الأهلية السورية (revision 17675300) -الحرب الأهلية اليمنية (2015) (revision 17686236) -الحرب في شمال غرب باكستان (revision 17490838) -الدولة الصفوية (revision 17031046) -الرياض (revision 17580586) -السعودية (revision 17711339) -السلطة الوطنية الفلسطينية (revision 17438123) -العراق (revision 17704602) -العلاقات الخارجية في تركيا (revision 17647409) +الصفحة_الرئيسية (revision 52017134) +1442 هـ (revision 53072582) +1521 (revision 51053075) +15 مارس (revision 53063546) +16 مارس (revision 53079323) +1775 (revision 50066071) +17 مارس (revision 52787393) +1977 (revision 52535026) +1988 (revision 52921343) +1989 (revision 52945821) +2021 (revision 53072089) +3 شعبان (revision 53076204) +آذار (revision 50305961) +آفة (كائن حي) (revision 50728417) +أبو الريحان البيروني (revision 52901629) +أبو موسى الأشعري (revision 52327088) +أتراك (revision 52923553) +أحلام الجريتلي (revision 53054581) +أستراليا المفتوحة 2021 (revision 52951662) +ألمان (revision 51707635) +أم (revision 52894160) +أمريكا الجنوبية (revision 52623681) +إسبان (revision 53023219) +إسبانيا (revision 52945464) +إسكندنافيا (revision 52901031) +إيران (revision 53077058) +اشتباكات الصحراء الغربية 2020 (revision 52776181) +اغتيال (revision 52605819) +الإسلام (revision 53061751) +الاحتجاجات الروسية 2021 (revision 52959948) +التفسير الموضوعي (تفسير) (revision 53063711) +الجزري (revision 52615628) +الحزب التقدمي الاشتراكي (revision 52719072) +الحضارة الغربية (revision 52663404) +الحملة الصليبية الثانية (revision 53028660) +الشمس (revision 53011313) +العراق (revision 53078113) +الفتح الإسلامي لفارس (revision 52960739) +الفتح الإسلامي للشام (revision 53000955) +الفتح الإسلامي للمغرب (revision 53015306) +الفتح الإسلامي لمصر (revision 52728321) +الفلبين (revision 53043940) +القرآن الكريم (revision 53047128) +القمر (revision 52920452) +القواعد الصاروخية الإيرانية تحت الأرض (revision 50043780) +اللغة العربية (revision 52929542) +المكثرون لرواية الحديث (revision 51989111) +الهجوم الكيماوي على حلبجة (revision 52723565) +انتهازية (revision 52279176) +انقراض العصر الطباشيري-الباليوجيني (revision 52688972) +باتا (revision 46639883) +باتريك أتشي (revision 53024512) +بحث علمي (revision 51195242) +برسفيرنس (مركبة جوالة) (revision 52965815) +برنامج أبولو (revision 52571274) +بعقلين (revision 48961465) +بلاد السند (revision 52279660) +بوتان (revision 52999635) +ترابط زوجي (revision 50219604) +تسمية ثنائية (revision 53076878) +تفجيرات باتا 2021 (revision 53043062) +تقويم هجري (revision 53077283) +توقيت عالمي منسق (revision 52887567) +تونس (revision 53047571) +جائحة فيروس كورونا 2019–20 (revision 52954613) +جائزة الملك فيصل العالمية (revision 52691751) +جائزة الملك فيصل العالمية في الدراسات الإسلامية (revision 53078767) +جواثم (revision 49526826) +جيرارد كايبر (revision 52265632) +جيفة (revision 52535861) +حامد باكايوكو (revision 53079223) +حديث نبوي (revision 53075245) +حرس الثورة الإسلامية (revision 53019030) +حرملة بن كاهل (revision 52891720) +حرية (revision 52761732) +حصار البصرة (1775) (revision 52323981) +خوسيه دي سان مارتين (revision 49958538) +خير الدين حسيب (revision 53076407) +دير دوريت (revision 32094684) +ذكاء (revision 52187723) +روما القديمة (revision 51009123) +ساحل العاج (revision 52429752) +ساعة الفيل (revision 52709413) +سالم بن عبد الله بن عمر بن الخطاب (revision 51752881) +سرب (revision 52828105) +سرعة الصوت (revision 52975385) +سرعة الضوء (revision 52244159) +سوسن ربيع (revision 53077510) +سيبيريا (revision 52919273) +سيمون بوليفار (revision 53011030) +شريعة إسلامية (revision 53070018) +شفق (revision 48963569) +صباح عبد الجليل (revision 52995683) +صحابة (revision 52201334) +صلاة الجمعة (revision 52577966) +صلاة العيد (revision 49726428) +طائر التعريشة الساتاني (revision 44913097) +طائر القيثارة (revision 52673886) +طابا (revision 53078953) +عبد الله الرضيع (revision 50505781) +عبد الله بن عباس (revision 52388329) == End of Parsed pages == -- Wikipedia parsing ended at: 2015-12-13 18:33:58.846891 +- Wikipedia parsing ended at: 2021-03-16 11:42:48.951707 -95 characters appeared 727795 times. +101 characters appeared 1520487 times. First 64 characters: -[ 0] Char ا: 14.933875610577156 % -[ 1] Char ل: 11.460782225764122 % -[ 2] Char ي: 8.30302489025069 % -[ 3] Char م: 6.3702003998378665 % -[ 4] Char و: 5.952637762007158 % -[ 5] Char ر: 4.9419135883043985 % -[ 6] Char ن: 4.900967992360486 % -[ 7] Char ت: 4.229625100474721 % -[ 8] Char ة: 3.6022506337636284 % -[ 9] Char ب: 3.5434428650925054 % -[10] Char ع: 3.3116468236247845 % -[11] Char د: 3.1756195082406444 % -[12] Char س: 2.5401383631379715 % -[13] Char ف: 2.3899587109007343 % -[14] Char ق: 2.010868445097864 % -[15] Char أ: 1.8763525443291036 % -[16] Char ه: 1.8663222473361318 % -[17] Char ك: 1.8573911609725264 % -[18] Char ح: 1.8431014227907585 % -[19] Char ج: 1.3270220323030524 % -[20] Char ط: 1.0305099650313618 % -[21] Char ش: 0.9638703206260004 % -[22] Char إ: 0.8946200509758929 % -[23] Char ص: 0.8509264284585631 % -[24] Char ى: 0.7726076711161797 % -[25] Char خ: 0.717097534333157 % -[26] Char ز: 0.6687322666410184 % -[27] Char ث: 0.6549921337739336 % -[28] Char ض: 0.5409490309771295 % -[29] Char غ: 0.4574090231452538 % -[30] Char ذ: 0.44765352880962356 % -[31] Char ئ: 0.39269299734128427 % -[32] Char ء: 0.295138053984982 % -[33] Char ظ: 0.2397653185306302 % -[34] Char آ: 0.12324899181775088 % -[35] Char ؤ: 0.08491402111858422 % -[36] Char ـ: 0.047678261048784344 % -[37] Char a: 0.03311372020967443 % -[38] Char e: 0.029403884335561525 % -[39] Char i: 0.027205463076827956 % -[40] Char o: 0.02432003517474014 % -[41] Char t: 0.02349562720271505 % -[42] Char r: 0.02294602188803166 % -[43] Char n: 0.020472797971956388 % -[44] Char s: 0.01799957405588112 % -[45] Char l: 0.012915724895059736 % -[46] Char h: 0.011816514265692949 % -[47] Char d: 0.011129507622338709 % -[48] Char پ: 0.010717303636326163 % -[49] Char c: 0.009480691678288529 % -[50] Char u: 0.007969277062909199 % -[51] Char m: 0.007694474405567502 % -[52] Char A: 0.006870066433542411 % -[53] Char گ: 0.006595263776200715 % -[54] Char f: 0.006183059790188171 % -[55] Char S: 0.005770855804175626 % -[56] Char y: 0.0054960531468339294 % -[57] Char T: 0.0049464478321505365 % -[58] Char b: 0.0048090465034796885 % -[59] Char G: 0.0046716451748088405 % -[60] Char I: 0.004396842517467144 % -[61] Char C: 0.0042594411887962955 % -[62] Char p: 0.0039846385314545995 % -[63] Char k: 0.003709835874112903 % +[ 0] Char ا: 14.550206611434364 % +[ 1] Char ل: 11.41772340046314 % +[ 2] Char ي: 7.748043883308441 % +[ 3] Char م: 6.294036055553254 % +[ 4] Char و: 5.778148711564124 % +[ 5] Char ن: 5.249304992413615 % +[ 6] Char ر: 4.93203822196441 % +[ 7] Char ت: 4.071261378755622 % +[ 8] Char ب: 3.8685631643019636 % +[ 9] Char ة: 3.2951942371095577 % +[10] Char ع: 3.235344991440243 % +[11] Char د: 2.921103567475421 % +[12] Char س: 2.615806646160079 % +[13] Char ف: 2.609032500771135 % +[14] Char ه: 2.300841769775079 % +[15] Char ق: 2.1174794654607374 % +[16] Char أ: 2.0117238753110023 % +[17] Char ك: 1.952006166445356 % +[18] Char ح: 1.788966298297848 % +[19] Char ج: 1.297939410202126 % +[20] Char ط: 0.9575221623072082 % +[21] Char ص: 0.8946475701535099 % +[22] Char ش: 0.8939898861351658 % +[23] Char إ: 0.8849796150838514 % +[24] Char ى: 0.8706421034839494 % +[25] Char خ: 0.7856693283138889 % +[26] Char ث: 0.6599201440064926 % +[27] Char ز: 0.6011231927665281 % +[28] Char ذ: 0.5680416866438187 % +[29] Char ض: 0.5665290134016273 % +[30] Char غ: 0.5086528197873444 % +[31] Char ئ: 0.3490329085352259 % +[32] Char ء: 0.29898315473923814 % +[33] Char ظ: 0.20197476203348005 % +[34] Char آ: 0.15396382869435912 % +[35] Char ؤ: 0.09148384695166746 % +[36] Char a: 0.05748158320327632 % +[37] Char e: 0.045972112882254175 % +[38] Char i: 0.042946766397871206 % +[39] Char t: 0.042223313977692675 % +[40] Char ـ: 0.03972411470798501 % +[41] Char r: 0.035778010597920275 % +[42] Char s: 0.034988789775907324 % +[43] Char n: 0.031437296076849065 % +[44] Char l: 0.030319233245664053 % +[45] Char o: 0.029661549227319933 % +[46] Char c: 0.0209143517833431 % +[47] Char m: 0.01861245771913867 % +[48] Char d: 0.018086310504463375 % +[49] Char y: 0.015060964020080407 % +[50] Char h: 0.01479789041274276 % +[51] Char p: 0.01479789041274276 % +[52] Char u: 0.014732122010908347 % +[53] Char f: 0.010194102284333902 % +[54] Char C: 0.008221050229301533 % +[55] Char b: 0.007892208220129471 % +[56] Char g: 0.007431829407288587 % +[57] Char v: 0.007234524201785348 % +[58] Char S: 0.007168755799950937 % +[59] Char E: 0.006905682192613288 % +[60] Char I: 0.006445303379772402 % +[61] Char T: 0.006379534977937989 % +[62] Char A: 0.005853387763262692 % +[63] Char B: 0.005458777352256218 % -The first 64 characters have an accumulated ratio of 0.999523217389512. +The first 64 characters have an accumulated ratio of 0.9992864128400966. -1479 sequences found. +1820 sequences found. -First 512 (typical positive ratio): 0.9696025116913417 -Next 512 (512-1024): 1.3740132867084825e-06 -Rest: 0.0012305764497782395 +First 512 (typical positive ratio): 0.9644868613755061 +Next 512 (512-1024): 0.0774804388330844 +Rest: 0.0019191680534433112 -- Processing end: 2015-12-13 18:33:59.193909 +- Processing end: 2021-03-16 11:42:49.142159 diff --git a/script/BuildLangModelLogs/LangDanishModel.log b/script/BuildLangModelLogs/LangDanishModel.log index 14bf65d..2408080 100644 --- a/script/BuildLangModelLogs/LangDanishModel.log +++ b/script/BuildLangModelLogs/LangDanishModel.log @@ -1,256 +1,156 @@ = Logs of language model for Danish (da) = - Generated by BuildLangModel.py -- Started: 2022-11-30 19:37:01.097250 -- Maximum depth: 2 -- Max number of pages: 200 +- Started: 2021-03-16 01:32:17.684746 +- Maximum depth: 4 +- Max number of pages: 100 == Parsed pages == Forside (revision 10000691) -Hans Magnus Enzensberger (revision 11341046) -28. november (revision 9410945) -Golfkrigen (revision 11144370) -29. november (revision 6877900) -8. december (revision 10277754) -Det Konservative Folkeparti (revision 11313857) -1990 (revision 11340072) -1940 (revision 11263756) -Angolas håndboldlandshold (damer) (revision 11331888) -Skjoldvulkan (revision 10870812) -Casper & Mandrilaftalen (revision 11221713) -26. november (revision 10617630) -Døde i 2022 (revision 11343986) -Vikingetidens rustning og våben (revision 11332607) -Middelaldercentret (revision 11339897) -Ruslands invasion af Ukraine 2022 (revision 11335164) -Saddam Hussein (revision 11002258) -The Jimi Hendrix Experience (revision 10497780) -Færøerne (revision 11333678) -27. november (revision 9745974) -Thomas Vinterberg (revision 11234643) -Anwar Ibrahim (revision 11342876) -Mandatområdet i Palæstina (revision 11341286) -Kunst (revision 11336917) -Afrikamesterskabet i håndbold 2022 (kvinder) (revision 11341917) -Dansk (sprog) (revision 11313509) -Sergej Sjojgu (revision 11309097) -Fernando Gomes (revision 11340427) -Folketinget (revision 11330485) -15. januar (revision 10515606) -Rock and Roll Hall of Fame (revision 8408189) -Thomas Edison (revision 11052704) -Ukraine (revision 11334630) -1947 (revision 11252357) -1937 (revision 11303923) -IC4 (revision 11317878) -Jimi Hendrix (revision 11341476) -Ismail Sabri Yaakob (revision 11105534) -Okipa-ceremonien (revision 11340589) -SI-præfiks (revision 11332802) -Sporvejsmuseet Skjoldenæsholms historie (revision 11338275) -Irak (revision 11255676) -Woodstockfestivalen (revision 11226413) -Nikolaj Lie Kaas (revision 11322663) -Torben Rechendorff (revision 11342962) -Folketingsvalget 2022 (revision 11339557) -Kherson (revision 11314559) -Keltere (revision 11318773) -Little Richard (revision 11226619) -Invasion (revision 10307980) -Tate Gallery (revision 8312688) -24. januar (revision 10441562) -Hans Christian Ægidius (revision 9773029) -Slaget ved Irpin (1321) (revision 11230064) -Auschwitz (revision 11310714) -Jazz fusion (revision 11223082) -Lutsk (revision 11248429) -Planetarium (revision 11266837) -Bibliothèque nationale de France (revision 11055813) -Digtsamling (revision 10585337) -Kenneth Gøtterup (revision 11027437) -Straf (revision 11007456) -1716 (revision 11339928) -Kamel (revision 11285016) -Amnesti (revision 10831621) -Zulu Royal (revision 10969220) -Stephen Roche (revision 11239346) -13. december (revision 10768225) -Enhed (politisk parti) (revision 10158693) -The Everly Brothers (revision 10865882) -3. november (revision 9423371) -Annelise Gotfredsen (revision 11306090) -Virtual International Authority File (revision 8702589) -Europæiske Fællesskab (revision 10868689) -Væringer (revision 11331002) -Rom (revision 11341285) -Decentralisering (revision 11154770) -Kreml (Moskva) (revision 11045482) -Folketingsvalget 1994 (revision 11266325) -28. december (revision 6878014) -Østjyllands Storkreds (revision 11201505) -Bruxelles (revision 10802416) -Erik Haunstrup Clemmensen (revision 10627614) -Hviderussere (revision 10750673) -Hvidmelet Gåsefod (revision 11317723) -Mario Draghi (revision 11302527) -Folketingsvalget 1968 (revision 11300317) -Skudår (revision 10360386) -1921 (revision 11303917) -Rundkørsel (revision 11103019) -Valerij Zaluzjnyj (revision 11335164) -Angrebet på Pearl Harbor (revision 11309782) -Folketingsmedlemmer valgt i 2007 (revision 11187293) -Ingeniørvidenskab (revision 9816520) -Vikinger (revision 11327511) -Martin Luther King (revision 11320659) -1757 (revision 11186195) -Dieseltogsæt (revision 8177984) -El-værk (revision 11334293) -Soul (revision 11283982) -John McVie (revision 11040471) -Botswanas håndboldlandshold (herrer) (revision 11333322) -1971 (revision 11243510) -Rana Hussein (revision 11266594) -DR (revision 11342995) -Ewan McGregor (revision 11331681) -Eliane Paulo (revision 10589121) -Zepto- (revision 11332802) -København (revision 11336925) -Gallien (revision 9984925) -Augustoprøret (revision 11234324) -1991 (revision 11250037) -Afledte SI-enheder (revision 11097802) -Gemeinsame Normdatei (revision 11281765) -Litteraturvidenskab (revision 10931878) -Thorvald Stauning (revision 11107677) -Afrikamesterskabet i håndbold 2018 (mænd) (revision 11131830) -Folkeforbundet (revision 11315450) -Readymades (revision 10932287) -Al Anbar (revision 9458175) -2007 (revision 11250033) -Varieté (revision 10934358) -Damaskus (revision 11030795) -Palæstina (revision 11311424) -1569 (revision 10832219) -Pædagog (revision 11251603) -Carina Christensen (revision 11073847) -Vest-Tyskland (revision 10580737) -20. november (revision 6877846) -Tessa Jowell (revision 11225831) -Hillerød (revision 11317306) -Påskekrisen (1920) (revision 11287865) -Grad (vinkelmål) (revision 9624298) -Kvinde (revision 11333939) -1931 (revision 11236350) -Afrikaans (revision 11080347) -Den Store Danske Encyklopædi (revision 11301417) -22. juni (revision 10375853) -Automatic Train Control (revision 10619401) -Luc Montagnier (revision 11162267) -Reprise Records (revision 11081843) -1966 (revision 11336105) -Prosa (skriveform) (revision 11236012) -Michael af Rumænien (revision 10819975) -Mykolajiv (revision 11236676) -Khmelnytskyj oblast (revision 11188686) -Sierra Leones håndboldlandshold (herrer) (revision 11333322) -1969 (revision 11340081) -H. Edvard Hass (revision 10348478) -Københavns Idrætspark (revision 9400386) -Sanna Nielsen (revision 11315712) -19. maj (revision 7148596) -Patricia Schumann (revision 10952761) -Torstenssonfejden (revision 11326728) -International Standard Name Identifier (revision 10880739) -Bent Mejding (revision 11335462) -Afdeling Q (revision 11279134) -Alfred Bindslev (revision 10398140) -Sakser (revision 9042633) -Folketingsmedlemmer valgt i 1998 (revision 11213304) -1996 (revision 11229565) -1 (tal) (revision 9378579) -Farrah Fawcett (revision 10977527) -Google+ (revision 10469085) -1530 (revision 10865231) -De største helte (revision 10737852) -Afrikamesterskabet i håndbold 1974 (mænd) (revision 11018946) -1902 (revision 11217211) -ISO 639-3 (revision 10880691) -1974 (revision 11336110) -Dansk fonologi (revision 11226101) -Europa (revision 11149054) -Sovemedicin (revision 11327388) -Slotsbryggen (Nykøbing Falster) (revision 11005548) -Olieraffinaderi (revision 11322152) -Slaget ved Stiklestad (revision 11261889) -Rolling Stone (revision 11267586) -Jørgen Hald (revision 10296412) -Nikolaj Coster-Waldau (revision 11228953) -Aserbajdsjan (revision 11297538) -Kultstatus (revision 7820159) -Al Kut (revision 9425606) -Library of Congress Control Number (revision 8316539) -Rwandas håndboldlandshold (herrer) (revision 11333322) -Levon Helm (revision 11317127) -Howard Hughes (revision 11040881) -Wim Kieft (revision 10910953) -Afrikamesterskabet i håndbold 2016 (mænd) (revision 11018957) -24. februar (revision 10755036) -Iværksætter (revision 10972242) -1992 (revision 11303945) -Internationalt Standardbognummer (revision 11037702) -Afrikamesterskabet i håndbold 2014 (mænd) (revision 11018956) -En mand kommer hjem (revision 10737861) -Jamaica (revision 11243987) -Henitjesk (revision 11328921) -August (revision 11210562) +1. symfoni (Beethoven) (revision 10648993) +15. marts (revision 8172123) +1917 (revision 10645384) +1930 (revision 10645389) +1940 (revision 10648721) +1951 (revision 10640371) +1972 (revision 10641861) +2. marts (revision 9423344) +2003 (revision 10654209) +44 f.Kr. (revision 7242128) +7. marts (revision 9423388) +9. marts (revision 10601197) +Abdikation (revision 10197388) +Afsnit af Badehotellet (revision 10654331) +Agnes Slott-Møller (revision 10648962) +Australian Open-mesterskabet i damesingle 2021 (revision 10630904) +Australian Open-mesterskabet i herresingle 2021 (revision 10630887) +Australian Open 2021 (revision 10630544) +Casper & Mandrilaftalen (revision 10444147) +Coronaviruspandemien (revision 10652415) +Cykling under sommer-OL 2012 – Linjeløb (kvinder) (revision 10651872) +Dansk (sprog) (revision 10633727) +Den danske Treårsekspedition til Østgrønland 1931-34 (revision 10654093) +Dnepr (revision 10635465) +Donald Trump (revision 10653185) +Døde i 2021 (revision 10653976) +Encyklopædi (revision 10590147) +Eurovision Song Contest 2014 (revision 10592331) +Folkerepublikken Kina (revision 10634829) +Folketinget (revision 10643927) +Fram-ekspeditionen 1910-1912 (revision 10630146) +Frankrig (revision 10648749) +Frankrigs præsidenter (revision 10477099) +Geologi (revision 10631000) +Geoteknik (revision 10603548) +Greater London (revision 10380043) +Hortus Botanicus Amsterdam (revision 8854568) +Hu Jintao (revision 10610855) +IC4 (revision 10577458) +Idus martius (revision 10652897) +Inger Støjberg (revision 10643259) +Italiens premierministre (revision 10625575) +John Polkinghorne (revision 10654447) +Julius Cæsar (revision 10653812) +Korruption (revision 10401686) +Lars Göran Petrov (revision 10650013) +London Underground (revision 10635531) +Marge Simpson (revision 10640942) +Mario Draghi (revision 10652699) +Matilde af Skotland (revision 10648200) +Metrosystemer i verden (revision 10510595) +Middelaldercentret (revision 10574228) +Naomi Osaka (revision 10478959) +Nederlandene (revision 10642742) +Nicolas Sarkozy (revision 10639376) +Nikolaj 2. af Rusland (revision 10639924) +Novak Djokovic (revision 10479710) +Outlaw Gentlemen & Shady Ladies (revision 10492201) +Paris-Nice 2021 (revision 10653019) +Rigsretssagen mod Donald Trump 2021 (revision 10653875) +Rigsretssagen mod Inger Støjberg (revision 10643260) +Rusland (revision 10631140) +Sanja Ilić (revision 10645645) +Senat (revision 10429780) +Senatet (USA) (revision 10624834) +Shu-bi-dua (revision 10630614) +Svend Johansen (skuespiller) (revision 10643631) +Tennis (revision 10651841) +Tommy Troelsen (revision 10648382) +Træsko (revision 10626215) +USA's præsidenter (revision 10639768) +Undergrundsbane (revision 10541653) +Vilhelm Erobreren (revision 10631208) +Wikimedia (revision 10260889) +Wikipedia (revision 10627445) +Zar (revision 10557166) +1800 (revision 10645359) +2. april (revision 9568657) +Burgtheater (revision 9296862) +C-dur (revision 10513719) +Cello (revision 10641506) +Coda (revision 9298442) +Dominant (revision 9513277) +Dynamik (musik) (revision 9504157) +F-dur (revision 8135200) +Fagot (revision 10578018) +Fløjte (revision 10329382) +Harmonik (revision 10577145) +International Music Score Library Project (revision 10115839) +Italienske og franske musikudtryk (revision 10352094) +Johann Georg Albrechtsberger (revision 10289540) +Joseph Haydn (revision 10289602) +Klarinet (revision 10490230) +Klassicisme (musik) (revision 10436811) +Kontrabas (revision 10147393) +Kontrapunkt (musikteori) (revision 10184029) +Leipzig (revision 10611798) +Ludwig van Beethoven (revision 10642134) == End of Parsed pages == -- Wikipedia parsing ended at: 2022-11-30 19:41:17.518631 +- Wikipedia parsing ended at: 2021-03-16 01:36:49.098009 -60 characters appeared 1532370 times. +57 characters appeared 1058523 times. -First 31 characters: -[ 0] Char e: 15.035728968852169 % -[ 1] Char r: 8.617892545534042 % -[ 2] Char n: 7.618264518360449 % -[ 3] Char t: 6.856503324915001 % -[ 4] Char a: 6.475133290262796 % -[ 5] Char i: 6.3714377076032545 % -[ 6] Char s: 6.279488635251278 % -[ 7] Char d: 5.919523352715076 % -[ 8] Char l: 5.094722553952375 % -[ 9] Char o: 4.86860223053179 % -[10] Char g: 3.8343872563414845 % -[11] Char k: 3.3303314473658454 % -[12] Char m: 3.2096034247603384 % -[13] Char f: 2.608247355403721 % -[14] Char v: 2.342188896937424 % -[15] Char u: 1.9602967951604378 % -[16] Char b: 1.9047619047619049 % -[17] Char p: 1.5793183108518178 % -[18] Char h: 1.45728512043436 % -[19] Char ø: 0.8954103773892728 % -[20] Char æ: 0.7449897870618715 % -[21] Char å: 0.7295235484902471 % -[22] Char y: 0.6777736447463732 % -[23] Char j: 0.666418684782396 % -[24] Char c: 0.5946344551250677 % -[25] Char w: 0.12248999915164091 % -[26] Char z: 0.06571519933175407 % -[27] Char x: 0.045354581465311905 % -[28] Char é: 0.021926819240783886 % -[29] Char ó: 0.009592983417842949 % -[30] Char q: 0.009397208246050236 % +First 30 characters: +[ 0] Char e: 15.118707859914238 % +[ 1] Char r: 8.552388564065213 % +[ 2] Char n: 7.6833474567864855 % +[ 3] Char t: 7.125305732610439 % +[ 4] Char a: 6.351302711419591 % +[ 5] Char i: 6.265806222443915 % +[ 6] Char s: 6.152629654716997 % +[ 7] Char d: 5.90341447469729 % +[ 8] Char o: 5.144999211164992 % +[ 9] Char l: 5.1253491893893655 % +[10] Char g: 3.907992551885977 % +[11] Char m: 3.3046990948708723 % +[12] Char k: 3.0474538578755492 % +[13] Char f: 2.586434116216653 % +[14] Char v: 2.2680659749481116 % +[15] Char u: 1.9654745338551927 % +[16] Char b: 1.7524418458550264 % +[17] Char p: 1.6338804163915193 % +[18] Char h: 1.5844719481768466 % +[19] Char ø: 0.7598323324103491 % +[20] Char æ: 0.7542585281566863 % +[21] Char å: 0.728278932059105 % +[22] Char y: 0.6751860847615027 % +[23] Char c: 0.6527963964883143 % +[24] Char j: 0.5847770903419198 % +[25] Char w: 0.17241004682940286 % +[26] Char z: 0.0783166733268904 % +[27] Char x: 0.05602145631223884 % +[28] Char é: 0.019177665482941794 % +[29] Char q: 0.016626941502452003 % -The first 31 characters have an accumulated ratio of 0.9994694492844417. +The first 30 characters have an accumulated ratio of 0.9997184756495605. -1065 sequences found. +936 sequences found. -First 512 (typical positive ratio): 0.9958348814328518 -Next 512 (512-1024): 2.6103356239028435e-06 -Rest: 3.268948339453948e-05 +First 512 (typical positive ratio): 0.9962304038307248 +Next 512 (512-1024): 0.007598323324103491 +Rest: -5.2909066017292616e-17 -- Processing end: 2022-11-30 19:41:17.605842 +- Processing end: 2021-03-16 01:36:49.182013 diff --git a/script/BuildLangModelLogs/LangFrenchModel.log b/script/BuildLangModelLogs/LangFrenchModel.log index ff6d14a..6328353 100644 --- a/script/BuildLangModelLogs/LangFrenchModel.log +++ b/script/BuildLangModelLogs/LangFrenchModel.log @@ -1,116 +1,159 @@ = Logs of language model for French (fr) = - Generated by BuildLangModel.py -- Started: 2015-12-03 21:07:37.508739 -- Maximum depth: 2 -- Max number of pages: 50 +- Started: 2021-03-16 01:17:58.545030 +- Maximum depth: 4 +- Max number of pages: 100 == Parsed pages == -Wikipédia:Accueil_principal (revision 115957655) -Bœuf (animal) (revision 115500130) -1500 av. J.-C. (revision 110583603) -1898 dans les chemins de fer (revision 106801806) -1913 dans les chemins de fer (revision 112852042) -1974 dans les chemins de fer (revision 90170756) -1er décembre (revision 121012781) -2009 dans les chemins de fer (revision 107042206) -2011 dans les chemins de fer (revision 109560866) -24 novembre (revision 120782024) -26 novembre (revision 120833172) -29 novembre (revision 120918160) -2 décembre (revision 121025437) -30 novembre (revision 120947714) -3 décembre (revision 121030621) -Amphibien (revision 120332329) -Angleterre (revision 120784240) -Anne-Josèphe Théroigne de Méricourt (revision 121009789) -Années 1930 (revision 120558236) -Antonio Troyo Calderón (revision 121028881) -António Costa (revision 120993829) -Attentat du 24 novembre 2015 à Tunis (revision 121015161) -Balard (métro de Paris) (revision 118979088) -Bois de Vincennes (revision 120822909) -Buse à tête blanche (revision 121009499) -Californie (revision 120922479) -Charenton-le-Pont (revision 120210025) -Charenton - Écoles (métro de Paris) (revision 108644873) -Chronique médiévale (revision 100253272) -Concorde (métro de Paris) (revision 120856751) -Conférence de Paris de 2015 sur le climat (revision 121029398) -Crise de la dette publique grecque (revision 120905208) -Crise entre la Colombie et le Venezuela de 2015 (revision 120857143) -Crise migratoire en Europe (revision 121002308) -Crise russo-turque de 2015 (revision 121030214) -Créteil (revision 120684618) -Créteil - Préfecture (métro de Paris) (revision 113486387) -Deuxième guerre civile libyenne (revision 121027704) -Devise (monnaie) (revision 121015771) -Droits de tirage spéciaux (revision 121009135) -Décembre 2015 (revision 121010045) -Département français (revision 120993190) -Eldar Riazanov (revision 120996396) -Enfants verts de Woolpit (revision 121002303) -Ernst Larsen (revision 121026772) -Fatima Mernissi (revision 120992271) -Fejervarya cancrivora (revision 120353807) -Fonds monétaire international (revision 120754406) -Français (revision 120883858) -Freyja (revision 121028677) -Fusillade du 2 décembre 2015 en Californie (revision 121030353) +Wikipédia:Accueil_principal (revision 164303621) +Bœuf (animal) (revision 178255345) +10 mars (revision 180841287) +12 mars (revision 180798998) +13 mars (revision 180904703) +1493 (revision 163870551) +14 mars (revision 180901488) +15 mars (revision 180904428) +1891 (revision 180890066) +1917 (revision 178369116) +1939 (revision 178458019) +2011 (revision 176114496) +45e parallèle nord (revision 180910832) +6 mars (revision 180750121) +7 mars (revision 180750121) +Absolutisme (revision 179767600) +Alassane Ouattara (revision 180842696) +Ambassadeur (revision 180674153) +Amiral de France (revision 177268292) +Amirautés de Bretagne (revision 175194082) +Aurora Cornu (revision 180901231) +Bata (Guinée équatoriale) (revision 180763894) +Bob Walkup (revision 180908319) +Bourgogne-Franche-Comté (revision 180662628) +Centre de données (revision 180741567) +Championnats du monde de ski acrobatique 2021 (revision 180882257) +Christophe Colomb (revision 180494940) +Claude Debussy (revision 179962158) +Couronne solaire (revision 180875717) +Crise présidentielle depuis 2019 au Venezuela (revision 180336636) +Critique musical (revision 174352172) +Côte d'Ivoire (revision 180838790) +Daniel Vachez (revision 180915214) +Degré Celsius (revision 179948881) +Deuxième République (Tchécoslovaquie) (revision 180896689) +Deuxième guerre civile libyenne (revision 180269091) +Empire romain (revision 180843240) +Empire russe (revision 179593986) +Excommunication (revision 178073962) +Explosions de Bata (revision 180862772) +Fatima Aziz (revision 180862495) +Fort du Lomont (revision 180886100) +Frankie de la Cruz (revision 180903250) +GINK (revision 179590111) +Giovanni Gastel (revision 180881061) +Goodwill Zwelithini kaBhekuzulu (revision 180806403) +Gouvernement de l'Église catholique (revision 176961659) +Guerre civile syrienne (revision 180897321) +Guerre civile yéménite (revision 180691885) +Guerre du Tigré (revision 180793174) +Guinée équatoriale (revision 180759310) +Hamed Bakayoko (revision 180904779) +Helena Fuchsová (revision 180909783) +Henri-Charles de Beaumanoir de Lavardin (revision 180903071) +Henry Darrow (revision 180905848) +Heure en France (revision 180854115) +Incendie du centre de données d'OVHcloud à Strasbourg (revision 180901025) +Innocent XI (revision 180108629) +Ivo Trumbić (revision 180827381) +Jean-Claude Fasquelle (revision 180871354) +Jean-Jacques Viton (revision 180889491) +Jean Frydman (revision 180909934) +Le Mans (revision 180520548) +Lieutenant général (revision 180899945) +Liste des ambassadeurs de France près le Saint-Siège (revision 180150184) +Manifestation des agriculteurs indiens de 2020-2021 (revision 180901643) +Manifestations de 2020-2021 en Arménie (revision 180901656) +Manifestations de 2020-2021 en Biélorussie (revision 180901634) +Manifestations de 2021 au Sénégal (revision 180900196) +Manifestations de 2021 en Birmanie (revision 180901671) +Manifestations de 2021 en Russie (revision 180897927) +Manifestations de Deraa (revision 180914771) +Mars 1891 (revision 155220626) +Mars 2021 (revision 180914744) +Marvin Hagler (revision 180908678) +Militaire (revision 178062901) +Murray Walker (revision 180862148) +OVHcloud (revision 180900746) +Obren Joksimović (revision 180901629) +Palais Farnèse (revision 180885444) +Pandémie de Covid-19 (revision 180845115) +Pays-Bas (revision 180853920) +Photosphère (revision 179722426) +Premier ministre ivoirien (revision 180838804) +Province de Bretagne (revision 176523092) +Président de la république de Côte d'Ivoire (revision 180747416) +Pôle Nord (revision 178839482) +Querelle des Franchises (revision 180092394) +Raoul Casadei (revision 180910155) +Rassemblement des houphouëtistes pour la démocratie et la paix (revision 180912125) +Roi des Français (revision 180882393) +Ronald DeFeo Jr. (revision 180915749) +Royaume de France (revision 180809662) +Révolte du Papier timbré (revision 180903105) == End of Parsed pages == -- Wikipedia parsing ended at: 2015-12-03 21:10:27.682316 +- Wikipedia parsing ended at: 2021-03-16 01:24:27.092152 -56 characters appeared 728239 times. +57 characters appeared 1900431 times. First 38 characters: -[ 0] Char e: 14.339660468609894 % -[ 1] Char s: 7.954806045817375 % -[ 2] Char a: 7.864176458552756 % -[ 3] Char n: 7.572102015959047 % -[ 4] Char i: 7.34154583866011 % -[ 5] Char r: 7.020222756540091 % -[ 6] Char t: 6.833608197308851 % -[ 7] Char l: 5.9446143367768 % -[ 8] Char o: 5.386418469760614 % -[ 9] Char u: 5.024861343597363 % -[10] Char d: 4.169235649285468 % -[11] Char c: 3.4240132703686568 % -[12] Char p: 2.8882001650557028 % -[13] Char m: 2.803063280049544 % -[14] Char é: 2.498355622261373 % -[15] Char g: 1.277739862874688 % -[16] Char v: 1.1729665672945284 % -[17] Char f: 1.1614318925517584 % -[18] Char b: 0.9925312981040565 % -[19] Char h: 0.8580974103282026 % -[20] Char q: 0.7740590657737364 % -[21] Char x: 0.43570860665248634 % -[22] Char y: 0.41044217626356183 % -[23] Char è: 0.4100302235941771 % -[24] Char à: 0.363479571953713 % -[25] Char j: 0.29591933417463223 % -[26] Char k: 0.1359443808969308 % -[27] Char ç: 0.11685724054877589 % -[28] Char ê: 0.11218844362908331 % -[29] Char z: 0.10738232915292918 % -[30] Char w: 0.08239053387692777 % -[31] Char ô: 0.04792382720507965 % -[32] Char â: 0.03364280133307884 % -[33] Char î: 0.029385957082770905 % -[34] Char û: 0.024854477719539875 % -[35] Char œ: 0.021146903695078125 % -[36] Char ï: 0.017851282340001016 % -[37] Char ù: 0.015242248767231636 % +[ 0] Char e: 14.210092342210793 % +[ 1] Char a: 8.0327567799094 % +[ 2] Char s: 7.818647454182762 % +[ 3] Char i: 7.531554684174274 % +[ 4] Char n: 7.491616375443256 % +[ 5] Char r: 7.05650455080979 % +[ 6] Char t: 6.771779664718161 % +[ 7] Char l: 5.854461435327039 % +[ 8] Char o: 5.412772155368966 % +[ 9] Char u: 5.014546700195903 % +[10] Char d: 4.239248886173716 % +[11] Char c: 3.238896860764742 % +[12] Char m: 2.8875028875028876 % +[13] Char p: 2.787104609428072 % +[14] Char é: 2.546790701688196 % +[15] Char v: 1.3356443880361877 % +[16] Char g: 1.1728392138414918 % +[17] Char f: 1.1096956427252553 % +[18] Char b: 1.084859171419536 % +[19] Char h: 0.9054261901642312 % +[20] Char q: 0.7540920980556516 % +[21] Char y: 0.42858698895145364 % +[22] Char x: 0.4087493836924361 % +[23] Char à: 0.39127966235027745 % +[24] Char è: 0.3704422838819194 % +[25] Char j: 0.35176231076003284 % +[26] Char k: 0.17332910271406854 % +[27] Char z: 0.11539487621492178 % +[28] Char ê: 0.10397641377140239 % +[29] Char ç: 0.09292628882606103 % +[30] Char ô: 0.07540394784130547 % +[31] Char w: 0.06340666932922058 % +[32] Char î: 0.031729644485908724 % +[33] Char û: 0.029309140926453 % +[34] Char â: 0.02504694987610705 % +[35] Char ï: 0.019942844544211285 % +[36] Char ù: 0.016259469562430837 % +[37] Char œ: 0.010839646374953892 % -The first 38 characters have an accumulated ratio of 0.999621003544166. +The first 38 characters have an accumulated ratio of 0.9996521841624343. -914 sequences found. +1049 sequences found. -First 512 (typical positive ratio): 0.997057879992383 -Next 512 (512-1024): 1.3731755646154627e-06 -Rest: 3.8163916471489756e-17 +First 512 (typical positive ratio): 0.997006678170155 +Next 512 (512-1024): 0.00010839646374953892 +Rest: 1.646491655585584e-05 -- Processing end: 2015-12-03 21:10:27.987730 +- Processing end: 2021-03-16 01:24:27.266283 diff --git a/script/BuildLangModelLogs/LangGermanModel.log b/script/BuildLangModelLogs/LangGermanModel.log index 9115d29..8d52ec6 100644 --- a/script/BuildLangModelLogs/LangGermanModel.log +++ b/script/BuildLangModelLogs/LangGermanModel.log @@ -1,159 +1,150 @@ = Logs of language model for German (de) = - Generated by BuildLangModel.py -- Started: 2015-12-03 22:42:29.154759 -- Maximum depth: 3 +- Started: 2021-03-16 01:05:29.301622 +- Maximum depth: 4 - Max number of pages: 100 == Parsed pages == -Wikipedia:Hauptseite (revision 140459035) -1740 (revision 145584733) -1890 (revision 148575121) -1925 (revision 148682812) -1965 (revision 148411693) -3. Dezember (revision 148684818) -Bundeswehreinsatz in Syrien (revision 148714599) -Clara Klabunde (revision 148697193) -Day Tripper (revision 145956669) -Dezember 2015 (revision 148713161) -Edwar al-Charrat (revision 148656295) -Enzyklika (revision 148704406) -Enzyklopädie (revision 148364925) -Facebook Inc. (revision 148280344) -Franz Neubauer (CSU) (revision 148710968) -Freie Inhalte (revision 148123311) -Gabriele Ferzetti (revision 148715582) -Georg von Waldburg zu Zeil und Trauchburg (revision 148710609) -Jim Loscutoff (revision 148690370) -Katarina Witt (revision 148713884) -Klavierkonzert (Gershwin) (revision 143900338) -Ludolf Camphausen (revision 145088962) -Mark Zuckerberg (revision 148714452) -Montenegro (revision 148692773) -NATO (revision 148697872) -NATO-Osterweiterung (revision 148697354) -Nekrolog 2015 (revision 148711617) -Peter-Ulrich-Haus (revision 148654149) -Philanthropie (revision 145561255) -Präsidentschaftswahl in Burkina Faso 2015 (revision 148677453) -Québec (Stadt) (revision 148716893) -Rivka Zohar (revision 148708850) -Roch Marc Kaboré (revision 148673951) -Rubber Soul (revision 148665720) -Salve Regina (Latry) (revision 148713279) -Schießerei in San Bernardino (revision 148711974) -Single (Musik) (revision 146450210) -The Giving Pledge (revision 148711856) -Ubi primum (Benedikt XIV.) (revision 136691297) -VTech (revision 148704025) -Walter Damrosch (revision 148716127) -We Can Work It Out (revision 148706519) -1. August (revision 148089156) -1. Januar (revision 148659041) -1. Juni (revision 148375663) -1. November (revision 147888516) -10. August (revision 148079904) -10. November (revision 148658709) -10. September (revision 148201788) -11. August (revision 148315737) -11. Oktober (revision 148087353) -12. Januar (revision 147377586) -12. September (revision 148359994) -13. Dezember (revision 148614781) -13. September (revision 148320520) -14. August (revision 148513270) -14. Dezember (revision 147968142) -15. April (revision 146544147) -15. August (revision 147827975) -16. April (revision 148712866) -16. Dezember (revision 148392316) -16. Februar (revision 148221712) -16. Jahrhundert (revision 147390194) -16. Juli (revision 147928181) -1652 (revision 142931287) -1654 (revision 145531451) -1656 (revision 144194148) -1657 (revision 147492859) -1662 (revision 147548355) -1665 (revision 147757128) -1666 (revision 147843417) -1667 (revision 148566099) -1668 (revision 145304760) -1670 (revision 147643990) -1672 (revision 145296252) -1673 (revision 147879655) -1674 (revision 146784434) -1679 (revision 146069377) -1685 (revision 148596629) -1688 (revision 140370621) -1692 (revision 146892539) -1693 (revision 147464373) -17. August (revision 148288443) -17. Februar (revision 145814425) -17. Jahrhundert (revision 147869798) -17. Oktober (revision 148327370) -1700er (revision 127393249) -1707 (revision 148288721) -1710er (revision 134739897) -1720er (revision 127302296) -1730 (revision 148694277) -1730er (revision 127393280) -1731 (revision 147730204) -1735 (revision 145436596) -1736 (revision 145680122) -1737 (revision 146645905) -1738 (revision 145094942) -1739 (revision 147843445) -1740er (revision 127393296) -1741 (revision 146530178) -1742 (revision 147010984) +Wikipedia:Hauptseite (revision 201839754) +1021 (revision 209824844) +1521 (revision 209838003) +16. März (revision 209315535) +1861 (revision 209842356) +1946 (revision 209524711) +1951 (revision 209835290) +Beyoncé (revision 209832932) +Bolivien (revision 209448707) +Bund der Schweizerinnen gegen das Frauenstimmrecht (revision 209693790) +Bundesgrenzschutz (revision 208691250) +Clara Weaver Parrish (revision 209287165) +Dornmühle (Fränkisch-Crumbach) (revision 209842366) +Edmund Weiskopf (revision 209843848) +Enrico Letta (revision 209811620) +Enzyklopädie (revision 209393223) +Ferdinand Magellan (revision 209566955) +Freie Inhalte (revision 207460431) +Geschichte der Bundesrepublik Deutschland (bis 1990) (revision 209662112) +Giovanni Gastel (revision 209840651) +Henry Darrow (revision 209836134) +Heribert von Köln (revision 208577962) +Homonhon (revision 207392862) +Internationales Olympisches Komitee (revision 209815926) +Jeanine Áñez (revision 209843969) +Jeanne d’Arc Mujawamariya (revision 209842628) +Kommunalwahlen in Hessen 2021 (revision 209834340) +Landtagswahl in Baden-Württemberg 2021 (revision 209842530) +Mark Lubotsky (revision 209830272) +Marvelous Marvin Hagler (revision 209843820) +Max Blokzijl (revision 209843982) +Molly Pitcher (revision 209843994) +Murray Walker (revision 209841073) +März 2021 (revision 209804897) +Nekrolog 2021 (revision 207237920) +Oscarverleihung 2021 (revision 209715006) +Thomas Bach (revision 209739384) +1. Dezember (revision 209839074) +1. Januar (revision 209777781) +1. November (revision 209796293) +10. Februar (revision 209675106) +10. Mai (revision 208810425) +10. März (revision 209821650) +11. Juli (revision 209510718) +11. März (revision 209819434) +11. November (revision 209630921) +12. Dezember (revision 209724301) +12. Mai (revision 208883973) +12. März (revision 209795040) +12. September (revision 209262794) +13. Dezember (revision 209710424) +13. Januar (revision 209629276) +13. März (revision 209795132) +13. Oktober (revision 209183744) +14. Februar (revision 209414444) +14. September (revision 209562392) +16. April (revision 209621904) +19. August (revision 208018991) +1920 (revision 209819215) +1921 (revision 209733600) +1923 (revision 209799201) +1924 (revision 209534204) +1925 (revision 209632533) +1926 (revision 209684778) +1927 (revision 209374750) +1929 (revision 209747684) +1930 (revision 209715589) +1931 (revision 209767120) +1933 (revision 209704894) +1934 (revision 209767120) +1936 (revision 209834629) +1939 (revision 209524711) +1940 (revision 209524711) +1941 (revision 209524711) +1942 (revision 209524711) +1944 (revision 209505481) +1945 (revision 209524711) +1947 (revision 209505481) +1948 (revision 209767120) +1950 (revision 209655464) +1952 (revision 209572541) +1954 (revision 209187815) +1955 (revision 209259419) +1957 (revision 209842142) +1965 (revision 209593366) +1980er (revision 209258403) +1990er (revision 209258403) +2. März (revision 209835819) +2. September (revision 209803579) +20. April (revision 209655478) +20. Jahrhundert (revision 207914301) +20. Januar (revision 209517100) == End of Parsed pages == -- Wikipedia parsing ended at: 2015-12-03 22:50:46.517106 +- Wikipedia parsing ended at: 2021-03-16 01:10:34.749053 -59 characters appeared 1746165 times. +59 characters appeared 3848604 times. First 31 characters: -[ 0] Char e: 14.27997926885489 % -[ 1] Char r: 8.696257226550754 % -[ 2] Char n: 8.464091308667852 % -[ 3] Char i: 8.258784250056554 % -[ 4] Char s: 6.690833913175444 % -[ 5] Char a: 6.370703799469123 % -[ 6] Char t: 5.925728668253001 % -[ 7] Char h: 4.540979804314025 % -[ 8] Char d: 4.367284878576767 % -[ 9] Char l: 4.083634708060234 % -[10] Char u: 3.899917819908199 % -[11] Char o: 3.6450163644329145 % -[12] Char c: 3.392405643223865 % -[13] Char m: 2.578565026787274 % -[14] Char g: 2.543631329227192 % -[15] Char b: 1.9455206123132693 % -[16] Char k: 1.7604292836014925 % -[17] Char f: 1.6422273954637734 % -[18] Char p: 1.519329502080273 % -[19] Char w: 1.0273370500496803 % -[20] Char z: 1.0037997554641171 % -[21] Char v: 0.9010603236234834 % -[22] Char ä: 0.4926224039538073 % -[23] Char j: 0.4661644231787947 % -[24] Char ü: 0.4094687500894818 % -[25] Char y: 0.34229296773214446 % -[26] Char ö: 0.3044958523392692 % -[27] Char ß: 0.14477440562604335 % -[28] Char x: 0.09918879372796958 % -[29] Char é: 0.07633871942227682 % -[30] Char q: 0.06099079983850323 % +[ 0] Char e: 13.62925362027374 % +[ 1] Char r: 9.404189155340482 % +[ 2] Char i: 8.18457809636949 % +[ 3] Char n: 7.829540269666611 % +[ 4] Char s: 6.804155480792516 % +[ 5] Char a: 6.737923673103287 % +[ 6] Char t: 5.6408765360115 % +[ 7] Char h: 4.424695292111114 % +[ 8] Char u: 4.194118178955279 % +[ 9] Char l: 4.1823216937881895 % +[10] Char d: 4.112010484840737 % +[11] Char o: 3.6970808116397533 % +[12] Char c: 3.4451453046351355 % +[13] Char m: 2.8236732072200725 % +[14] Char g: 2.3015618130626065 % +[15] Char b: 2.0475736137051253 % +[16] Char k: 1.9373258459431004 % +[17] Char p: 1.6796479970399656 % +[18] Char f: 1.6060368902594293 % +[19] Char z: 1.0385064298639195 % +[20] Char w: 0.9370410673584499 % +[21] Char v: 0.7894031186373033 % +[22] Char j: 0.6687879553209424 % +[23] Char ä: 0.5280616036360197 % +[24] Char y: 0.35885739348605367 % +[25] Char ü: 0.33731711550473886 % +[26] Char ö: 0.27194276158316105 % +[27] Char ß: 0.13979094757475696 % +[28] Char x: 0.09044838076351841 % +[29] Char é: 0.04185933392991329 % +[30] Char q: 0.02814007364748361 % -The first 31 characters have an accumulated ratio of 0.9993385504806246. +The first 31 characters have an accumulated ratio of 0.9991186414606439. -1188 sequences found. +1337 sequences found. -First 512 (typical positive ratio): 0.9934041448127945 -Next 512 (512-1024): 1.1453671331174316e-06 -Rest: 0.0001130256702826099 +First 512 (typical positive ratio): 0.9936565191798025 +Next 512 (512-1024): 0.0033731711550473885 +Rest: 0.00017862552962171364 -- Processing end: 2015-12-03 22:50:46.681265 +- Processing end: 2021-03-16 01:10:34.853392 diff --git a/script/BuildLangModelLogs/LangItalianModel.log b/script/BuildLangModelLogs/LangItalianModel.log index 6b539c0..fa16c78 100644 --- a/script/BuildLangModelLogs/LangItalianModel.log +++ b/script/BuildLangModelLogs/LangItalianModel.log @@ -1,162 +1,162 @@ = Logs of language model for Italian (it) = - Generated by BuildLangModel.py -- Started: 2016-09-21 18:43:12.831409 -- Maximum depth: 5 +- Started: 2021-03-16 01:25:53.681909 +- Maximum depth: 4 - Max number of pages: 100 == Parsed pages == -Pieve Ligure (revision 83186252) -010 (prefisso) (revision 76157203) -1000 (revision 83185341) -1143 (revision 70627567) -1162 (revision 70627612) -118 - Emergenza sanitaria (revision 83267411) -1201 (revision 77523243) -1202 (revision 76764411) -1374 (revision 78259457) -1404 (revision 70628069) -1520 (revision 76854924) -1537 (revision 70628296) -1582 (revision 80626188) -1584 (revision 76837051) -1600 (revision 76869356) -1619 (revision 70628455) -1742 (revision 70628675) -1748 (revision 70628682) -1749 (revision 70628684) -1750 (revision 70628690) -1754 (revision 70628697) -1775 (revision 70628734) -1797 (revision 78338823) -1798 (revision 82047236) -1803 (revision 77502534) -1805 (revision 79369853) -1809 (revision 70628789) -1810 (revision 82930218) -1814 (revision 78338825) -1815 (revision 82669615) -1816 (revision 83185384) -1818 (revision 72407239) -1823 (revision 74880156) -1859 (revision 83185401) -1860 (revision 83185403) -1861 (revision 83185412) -1868 (revision 83185430) -1874 (revision 83185441) -1897 (revision 83185267) -1908 (revision 83185631) -1909 (revision 83185630) -1913 (revision 83185626) -1915 (revision 83185625) -1917 (revision 83185270) -1920 (revision 83185621) -1921 (revision 83185619) -1923 (revision 83185616) -1925 (revision 83185614) -1926 (revision 83185612) -1928 (revision 83185610) -1929 (revision 83185609) -1939 (revision 83185598) -1946 (revision 83185590) -1947 (revision 83185589) -1948 (revision 83185587) -1951 (revision 83185584) -1956 (revision 83185478) -1960 (revision 83185487) -1964 (revision 83185493) -1965 (revision 83185494) -1969 (revision 83185500) -1970 (revision 83185503) -1971 (revision 83185505) -1975 (revision 83185510) -1976 (revision 83185513) -1977 (revision 83185514) -1980 (revision 83185518) -1981 (revision 83308867) -1983 (revision 83185524) -1985 (revision 83185526) -1988 (revision 83185280) -1990 (revision 83185531) -1995 (revision 83185538) -1999 (revision 83326325) -2000 (revision 83185544) -2001 (revision 83309058) -2002 (revision 83185545) -2003 (revision 83185546) -2004 (revision 83185283) -2005 (revision 83185285) -2006 (revision 83185547) -2007 (revision 83185549) -2008 (revision 83185551) -2009 (revision 83185552) -2010 (revision 83185287) -2012 (revision 83185289) -712 (revision 70630167) -749 (revision 78272323) -ATP (Provincia di Genova) (revision 82754117) -Abbazia di San Colombano (revision 83062997) -Abbazia di San Fruttuoso (revision 83288120) -Acacia dealbata (revision 83036867) -Acquedotto (revision 82973825) -Affresco (revision 82000422) -Agricoltura (revision 82578266) -Allevamento (revision 82971452) -Altitudine (revision 82971213) -Angelo (revision 82333116) -Anni 1960 (revision 83161222) -Anni 1970 (revision 81663175) -Antica Roma (revision 83125874) +Pieve Ligure (revision 118508492) +010 (prefisso) (revision 94383168) +AMT (Genova) (revision 118888771) +Abbazia di San Colombano (revision 119100076) +Abbazia di San Fruttuoso (revision 119098176) +Acacia dealbata (revision 118537500) +Affresco (revision 119234348) +Agenzia nazionale per le nuove tecnologie, l'energia e lo sviluppo economico sostenibile (revision 119261985) +Agricoltura (revision 119211593) +Altitudine (revision 118983270) +Antica Roma (revision 118468482) +Anton Maria Maragliano (revision 116868790) +Appennino Ligure (revision 117194376) +Arcidiocesi di Genova (revision 119158953) +Area (revision 118021697) +Area naturale marina protetta Portofino (revision 117836953) +Arenzano (revision 118507675) +Austria (revision 119220244) +Avegno (revision 118656626) +Bargagli (revision 118656627) +Batteria di Punta Chiappa (revision 118356835) +Battesimo (revision 118993799) +Bogliasco (revision 118656629) +Bogliasco Pieve (revision 118656629) +Borzonasca (revision 118854360) +Busalla (revision 118656635) +Calcio (sport) (revision 118995232) +Calcio a 5 (revision 118431165) +Camogli (revision 118850151) +Campo Ligure (revision 119083085) +Campomorone (revision 119226877) +Cantiere navale (revision 115540115) +Carabinieri (revision 119285803) +Carasco (revision 118801735) +Caravella (revision 118751709) +Casarza Ligure (revision 118656643) +Casella (Italia) (revision 118797269) +Castello della Dragonara (revision 108868054) +Castiglione Chiavarese (revision 118656646) +Centrismo (revision 117397211) +Centro-destra (revision 117992364) +Centrolabrus melanocercus (revision 116914326) +Ceranesi (revision 118656648) +Cesare Lanza (revision 115376996) +Chiavari (revision 119146951) +Chiesa di San Michele Arcangelo (Pieve Ligure) (revision 119097578) +Chiesa di Santa Croce (Pieve Ligure) (revision 119097599) +Chilometro quadrato (revision 116585233) +Cicagna (revision 118656655) +Circondario di Genova (revision 113691033) +Città dell'olio (revision 118165836) +Città metropolitana di Genova (revision 119014943) +Città metropolitane d'Italia (revision 119240923) +Classificazione climatica dei comuni italiani (revision 118213893) +Classificazione sismica dell'Italia (revision 118461862) +Claudio Burlando (revision 119123207) +Codice catastale (revision 116588085) +Codice postale (revision 105346722) +Cogoleto (revision 118508042) +Cogorno (revision 118962627) +Compagnia di Gesù (revision 119271066) +Comune (Italia) (revision 118913656) +Comune medievale (revision 113420512) +Comuni d'Italia (revision 119120484) +Comuni della Liguria (revision 113527316) +Comunità montana Fontanabuona (revision 105560751) +Concilio di Trento (revision 118571991) +Congresso di Vienna (revision 118881415) +Coordinate geografiche (revision 118353691) +Corallo (revision 117035534) +Coreglia Ligure (revision 118656657) +Corona (copricapo) (revision 117780990) +Cristo degli abissi (revision 117435230) +Cristoforo Colombo (revision 119014639) +Croce (revision 117653124) +Crocefieschi (revision 118656658) +Crêuza (revision 119275449) +Davagna (revision 118656659) +Decreto del presidente della Repubblica (revision 119120849) +Democrazia Cristiana (revision 119162011) +Densità di popolazione (revision 119143170) +Dipartimento di Genova (revision 118450361) +Ebano (revision 116535223) +Erba sintetica (revision 114157150) +Etnico (onomastica) (revision 117289144) +Fascia (Italia) (revision 118955929) +Favale di Malvaro (revision 118656662) +Federico Barbarossa (revision 118793984) +Fermata ferroviaria (revision 119085486) +Ferrovia Genova-Pisa (revision 119025272) +Flora (revision 110652725) +Floricoltura (revision 113487805) +Fontanigorda (revision 118803588) +Francesco Bossi (vescovo) (revision 117422608) +Frazione (geografia) (revision 119001222) +Fuso orario (revision 119022172) +Galleria (ingegneria) (revision 115407813) +Gas (revision 117414169) +Genova (revision 119208791) +Germania nazista (revision 119177156) +Giacomo il Maggiore (revision 118986303) == End of Parsed pages == -- Wikipedia parsing ended at: 2016-09-21 18:46:08.840718 +- Wikipedia parsing ended at: 2021-03-16 01:31:12.602302 -59 characters appeared 823241 times. +54 characters appeared 1487235 times. First 34 characters: -[ 0] Char i: 11.823147778111148 % -[ 1] Char a: 11.252112078965942 % -[ 2] Char e: 10.910170897707962 % -[ 3] Char o: 8.936386793174782 % -[ 4] Char n: 7.317055394471364 % -[ 5] Char l: 6.931263141655967 % -[ 6] Char r: 6.521784021932824 % -[ 7] Char t: 6.386708145002497 % -[ 8] Char s: 4.572415610981475 % -[ 9] Char c: 4.116291584116923 % -[10] Char d: 3.9770856893667834 % -[11] Char u: 2.8944136650142545 % -[12] Char m: 2.762860450342002 % -[13] Char p: 2.6809889206198427 % -[14] Char g: 2.1493098618751985 % -[15] Char v: 1.5369739845318686 % -[16] Char b: 1.2855287819727153 % -[17] Char f: 0.9932692856648295 % -[18] Char z: 0.9664241698360504 % -[19] Char h: 0.7159507361756764 % -[20] Char q: 0.2416060424590126 % -[21] Char k: 0.18876610858788617 % -[22] Char à: 0.15596890825408355 % -[23] Char y: 0.12462936126844994 % -[24] Char è: 0.11600491229178332 % -[25] Char w: 0.10628722330398996 % -[26] Char x: 0.10312897438295712 % -[27] Char j: 0.07555503188009344 % -[28] Char ù: 0.05575524056746445 % -[29] Char ò: 0.03304014255849745 % -[30] Char é: 0.021014502436103158 % -[31] Char ì: 0.0191924357508919 % -[32] Char á: 0.004737373381549267 % -[33] Char ó: 0.003644133370422513 % +[ 0] Char i: 11.700840822062418 % +[ 1] Char e: 11.23655642854021 % +[ 2] Char a: 11.108197426768466 % +[ 3] Char o: 9.061513479712351 % +[ 4] Char n: 7.150383093458666 % +[ 5] Char l: 7.047440384337378 % +[ 6] Char t: 6.5587482812064 % +[ 7] Char r: 6.521363469794619 % +[ 8] Char s: 4.669067094305877 % +[ 9] Char c: 4.495120139049982 % +[10] Char d: 3.939861555167811 % +[11] Char u: 2.7531627483215497 % +[12] Char p: 2.6924460492121285 % +[13] Char m: 2.5125820734450173 % +[14] Char g: 1.9460273594959776 % +[15] Char v: 1.64123356429885 % +[16] Char f: 1.1068862688142762 % +[17] Char b: 1.0097933413347588 % +[18] Char z: 0.9880079476343685 % +[19] Char h: 0.7280624783574889 % +[20] Char q: 0.27574660359660713 % +[21] Char à: 0.2058854182425777 % +[22] Char è: 0.14859790147488458 % +[23] Char ò: 0.10186688721015845 % +[24] Char ù: 0.07302141221797497 % +[25] Char x: 0.06501998675394272 % +[26] Char k: 0.05291699025372587 % +[27] Char y: 0.04471384818135668 % +[28] Char w: 0.04115018810073727 % +[29] Char ì: 0.041015710361845974 % +[30] Char é: 0.024474948478216286 % +[31] Char j: 0.019028600053118707 % +[32] Char ö: 0.006791125814010562 % +[33] Char ó: 0.004505004252858493 % -The first 34 characters have an accumulated ratio of 0.9997947138201325. +The first 34 characters have an accumulated ratio of 0.9997202863031062. -872 sequences found. +921 sequences found. -First 512 (typical positive ratio): 0.9989484485502651 -Next 512 (512-1024): 1.214711123474171e-06 -Rest: -4.336808689942018e-17 +First 512 (typical positive ratio): 0.9992462827093448 +Next 512 (512-1024): 0.0007302141221797497 +Rest: -2.0166160408230382e-17 -- Processing end: 2016-09-21 18:46:08.920456 +- Processing end: 2021-03-16 01:31:12.679004 diff --git a/script/BuildLangModelLogs/LangSpanishModel.log b/script/BuildLangModelLogs/LangSpanishModel.log index 1fd5d83..3b3611c 100644 --- a/script/BuildLangModelLogs/LangSpanishModel.log +++ b/script/BuildLangModelLogs/LangSpanishModel.log @@ -1,109 +1,161 @@ = Logs of language model for Spanish (es) = - Generated by BuildLangModel.py -- Started: 2015-12-12 18:37:37.085123 -- Maximum depth: 2 -- Max number of pages: 50 +- Started: 2021-03-16 11:26:55.275471 +- Maximum depth: 4 +- Max number of pages: 100 == Parsed pages == -Wikipedia:Portada (revision 84894710) -11 de diciembre (revision 87735970) -12 de diciembre (revision 87742023) -13 de diciembre (revision 87697780) -1474 (revision 66715698) -1915 (revision 86935345) -2000 (revision 87686385) -2015 (revision 87743360) -Actuación (revision 87459085) -Akiyuki Nosaka (revision 87726149) -Alberto Podestá (revision 87729965) -Alejandro Magno (revision 87717064) -Argentina (revision 87742018) -Arnold Peralta (revision 87733100) -Atentados del 11 de diciembre de 2007 (revision 87720544) -Cantante (revision 86761085) -Canto (revision 87664585) -Carlo Furno (revision 87726011) -Ciencia ficción (revision 87662615) -Copa Mundial de Clubes de la FIFA 2015 (revision 87734956) -Corona de Castilla (revision 87209578) -Crisis migratoria en Europa (revision 87609406) -Dictadura de Primo de Rivera (revision 87371131) -Dionisio Miguel Recio (revision 87724426) -Disneyland (revision 87665192) -Dolph Schayes (revision 87730770) -Día Internacional de las Montañas (revision 87739490) -El discurso del rey (revision 87570241) -Elecciones regionales de Francia de 2015 (revision 87744011) -Estados Unidos (revision 87510736) -Fiction House (revision 87732511) -Filoxeno de Eretria (revision 83958621) -Frank Sinatra (revision 87742871) -Fundación Wikimedia (revision 87703852) -Geoffrey Marcy (revision 87706505) -Gheorghe Gruia (revision 87737327) -Grupo de Acción Republicana (revision 87739104) -Guerra contra el Estado Islámico (revision 87648946) -Here We Go Again (canción) (revision 87680365) -Isaac Asimov (revision 87591711) -Isabel I de Castilla (revision 87743713) -John "Hot Rod" Williams (revision 87730438) -José Subirà-Puig (revision 87740413) -Julio Terrazas Sandoval (revision 87736542) -Libertad Lamarque (revision 87508996) -Mosaico de Issos (revision 87731652) -Museo Arqueológico Nacional de Nápoles (revision 87302262) -Philip K. Dick (revision 87725371) -Planet Comics (revision 86698920) +Wikipedia:Portada (revision 123425818) +15 de marzo (revision 134002604) +16 de marzo (revision 133912948) +17 de marzo (revision 134014922) +1971 (revision 133880223) +1996 (revision 133952177) +2021 (revision 134007556) +Accipitridae (revision 132476150) +Alan Tudyk (revision 133512405) +América del Norte (revision 133943336) +Aquila (animal) (revision 117806396) +Aquila chrysaetos (revision 131580419) +Aranjuez (revision 134014704) +Aurora Cornu (revision 134003763) +Beatlemanía en el Reino Unido (revision 127586904) +Bebe Daniels (revision 134008877) +Bob Walkup (revision 134014429) +Carlos IV de España (revision 133996847) +Carlos Velasco Carballo (revision 133836906) +Centre Bell (revision 121340657) +Chemancheri Kunhiraman Nair (revision 134003097) +Comunidad de Madrid (revision 133999674) +Copa Libertadores 2021 (revision 134005909) +Copa Libertadores Femenina 2020 (revision 134010574) +Derecho del consumo (revision 133897891) +Elecciones generales de los Países Bajos de 2021 (revision 133978338) +España (revision 133914408) +Especie (revision 133121989) +Estados Unidos (revision 134015635) +Eurasia (revision 133581203) +Familia (biología) (revision 132469743) +Fernando VII de España (revision 133996527) +Fernando VI de España (revision 133038817) +Frankie de la Cruz (revision 134001053) +Fundación Wikimedia (revision 133870365) +Género (biología) (revision 132578267) +Henry Darrow (revision 134014606) +Hueso oracular (revision 133943486) +Hungría (revision 133720525) +Incendios en la Patagonia argentina de 2021 (revision 134013731) +Iztapalapa (revision 133879018) +Jim Dornan (revision 134003749) +Josep Anton Codina Olivé (revision 134001289) +Laxman Pai (revision 134003882) +Lorenzo I Suárez de Figueroa (revision 130396052) +Manuel Godoy (revision 133790185) +Mark Lubotski (revision 134012323) +Marvin Hagler (revision 133992695) +Motín de Aranjuez (revision 133995861) +Orden de Santiago (revision 132833929) +Pandemia de COVID-19 (revision 133965486) +Partido judicial de Aranjuez (revision 119111968) +Pirámide de Mayo (revision 134001660) +Problema de los puentes de Königsberg (revision 133982384) +Protestas en Birmania de 2021 (revision 134001180) +Protestas en Paraguay de 2021 (revision 133946792) +Raoul Casadei (revision 134004118) +Reconquista (revision 134015214) +Reserva del Regajal-Mar de Ontígola (revision 130660000) +Reserva natural del Carrizal de Villamejor (revision 133997932) +Revolución de Octubre (revision 133949459) +Reyes Católicos (revision 133817736) +Río Jarama (revision 133989542) +Sitio de Osaka (revision 133841594) +Tajo (revision 133599349) +The Beatles (revision 133923045) +The Beatles en los Estados Unidos (revision 132489522) +Thomas E. Dewey (revision 133282206) +Tirreno-Adriático 2021 (revision 133997271) +Torneo de Acapulco 2021 (revision 134007806) +Torneo de Dubái 2021 (revision 133993636) +Wikipedia en español (revision 133678639) +Zona especial de conservación (revision 125067184) +África (revision 133842601) +Águila (revision 133882643) +0 de marzo (revision 124578320) +1086 (revision 131546903) +10 de marzo (revision 133880871) +1190 (revision 133725464) +11 de marzo (revision 133917865) +12 Horas de Sebring (revision 130945879) +12 de marzo (revision 133976376) +1311 (revision 130818429) +13 de febrero (revision 133955522) +13 de marzo (revision 133955664) +1416 (revision 130880976) +1455 (revision 130905583) +1493 (revision 130905628) +14 de marzo (revision 133988159) +1530 (revision 130937867) +1552 (revision 133149262) +1575 (revision 130983277) +1583 (revision 130984233) +1591 (revision 130984579) +1594 (revision 130984689) +15 de abril (revision 134002206) +15 de febrero (revision 133748957) +1638 (revision 131037338) +1657 (revision 131045532) +1660 (revision 131045617) +1666 (revision 132768900) == End of Parsed pages == -- Wikipedia parsing ended at: 2015-12-12 18:39:02.288858 +- Wikipedia parsing ended at: 2021-03-16 11:33:00.148262 -52 characters appeared 991829 times. +54 characters appeared 1548023 times. First 33 characters: -[ 0] Char e: 12.571925200815867 % -[ 1] Char a: 11.81988024145291 % -[ 2] Char o: 8.07941691561751 % -[ 3] Char n: 7.234513207417812 % -[ 4] Char s: 7.042242160695039 % -[ 5] Char i: 7.040528155559072 % -[ 6] Char r: 6.8208330266608455 % -[ 7] Char l: 5.722559029832763 % -[ 8] Char d: 5.275707808503281 % -[ 9] Char t: 4.668647518876742 % -[10] Char c: 4.466999855821921 % -[11] Char u: 3.673717949364255 % -[12] Char m: 2.710547886782903 % -[13] Char p: 2.4541528832086983 % -[14] Char b: 1.3867309788280036 % -[15] Char g: 1.2748165258325779 % -[16] Char f: 0.925058654263991 % -[17] Char y: 0.9045914164639268 % -[18] Char v: 0.8877538365988492 % -[19] Char ó: 0.8641610600214351 % -[20] Char h: 0.7369213846338432 % -[21] Char q: 0.5913317719082624 % -[22] Char í: 0.5612862701130941 % -[23] Char j: 0.43283670874717317 % -[24] Char z: 0.38071078784750195 % -[25] Char á: 0.37587124393418625 % -[26] Char é: 0.29632124085905936 % -[27] Char k: 0.2001353055819098 % -[28] Char x: 0.18743150280945606 % -[29] Char ñ: 0.17462687620547493 % -[30] Char ú: 0.12865120902897575 % -[31] Char w: 0.0972949974239511 % -[32] Char ü: 0.004436248587206061 % +[ 0] Char e: 12.61415366567551 % +[ 1] Char a: 11.863648020733542 % +[ 2] Char o: 8.178496055937154 % +[ 3] Char n: 7.268238262609793 % +[ 4] Char i: 6.931486160089352 % +[ 5] Char s: 6.9263182782167965 % +[ 6] Char r: 6.65558586661826 % +[ 7] Char l: 5.899266354569667 % +[ 8] Char d: 5.353731824397958 % +[ 9] Char t: 4.62951777848262 % +[10] Char c: 4.489080588595906 % +[11] Char u: 3.50666624462298 % +[12] Char m: 2.6851022239333653 % +[13] Char p: 2.477159577086387 % +[14] Char b: 1.394552923309279 % +[15] Char g: 1.3049547713438365 % +[16] Char v: 0.9327380794729794 % +[17] Char f: 0.9320274957155029 % +[18] Char y: 0.9299603429664806 % +[19] Char ó: 0.786745416573268 % +[20] Char h: 0.7480509010525037 % +[21] Char í: 0.5318396432094356 % +[22] Char q: 0.49553527305472855 % +[23] Char z: 0.47085863711327286 % +[24] Char j: 0.408844054642599 % +[25] Char á: 0.39095026365887325 % +[26] Char é: 0.305228023097848 % +[27] Char ñ: 0.23759336909076934 % +[28] Char x: 0.19883425504659816 % +[29] Char k: 0.19489374511877408 % +[30] Char ú: 0.13029522171182212 % +[31] Char w: 0.10923610308115578 % +[32] Char ü: 0.0067182464343230035 % -The first 33 characters have an accumulated ratio of 0.9999263986029848. +The first 33 characters have an accumulated ratio of 0.9998830766726332. -897 sequences found. +1002 sequences found. -First 512 (typical positive ratio): 0.9970385677528184 -Next 512 (512-1024): 1.0082383152741046e-06 -Rest: 4.597017211338539e-17 +First 512 (typical positive ratio): 0.9966074680689881 +Next 512 (512-1024): 6.718246434323004e-05 +Rest: 3.209238430557093e-17 -- Processing end: 2015-12-12 18:39:02.460105 +- Processing end: 2021-03-16 11:33:00.247475 diff --git a/script/header-template.cpp b/script/header-template.cpp index d7c8217..286078a 100644 --- a/script/header-template.cpp +++ b/script/header-template.cpp @@ -36,3 +36,4 @@ * ***** END LICENSE BLOCK ***** */ #include "../nsSBCharSetProber.h" +#include "../nsLanguageDetector.h" diff --git a/src/LangModels/LangArabicModel.cpp b/src/LangModels/LangArabicModel.cpp index 0a6d654..89157f6 100644 --- a/src/LangModels/LangArabicModel.cpp +++ b/src/LangModels/LangArabicModel.cpp @@ -36,12 +36,13 @@ * ***** END LICENSE BLOCK ***** */ #include "../nsSBCharSetProber.h" +#include "../nsLanguageDetector.h" /********* Language model for: Arabic *********/ /** * Generated by BuildLangModel.py - * On: 2015-12-13 18:33:58.848027 + * On: 2021-03-16 11:42:48.952857 **/ /* Character Mapping Table: @@ -67,17 +68,17 @@ static const unsigned char Iso_8859_6_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 52, 72, 61, 68, 74, 69, 59, 78, 60, 90, 86, 67, 65, 71, 75, /* 4X */ - 64, 85, 76, 55, 57, 79, 81, 70, 82, 87, 91,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 37, 58, 49, 47, 38, 54, 66, 46, 39, 88, 63, 45, 51, 43, 40, /* 6X */ - 62, 89, 42, 44, 41, 50, 77, 73, 83, 56, 80,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 62, 63, 54, 67, 59, 70, 78, 76, 60, 83, 81, 77, 65, 71, 75, /* 4X */ + 66, 90, 68, 58, 61, 73, 69, 79, 84, 87, 88,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 36, 55, 46, 48, 37, 53, 56, 50, 38, 91, 64, 44, 47, 43, 45, /* 6X */ + 51, 80, 41, 42, 39, 52, 57, 72, 85, 49, 86,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ SYM,ILL,ILL,ILL,SYM,ILL,ILL,ILL,ILL,ILL,ILL,ILL,SYM,SYM,ILL,ILL, /* AX */ ILL,ILL,ILL,ILL,ILL,ILL,ILL,ILL,ILL,ILL,ILL,SYM,ILL,ILL,ILL,SYM, /* BX */ - ILL, 32, 34, 15, 35, 22, 31, 0, 9, 8, 7, 27, 19, 18, 25, 11, /* CX */ - 30, 5, 26, 12, 21, 23, 28,SYM, 33, 10, 29,ILL,ILL,ILL,ILL,ILL, /* DX */ - 36, 13, 14, 17, 1, 3, 6, 16, 4, 24, 2,SYM,SYM,SYM,SYM,SYM, /* EX */ + ILL, 32, 34, 16, 35, 23, 31, 0, 8, 9, 7, 26, 19, 18, 25, 11, /* CX */ + 28, 6, 27, 12, 22, 21, 29,SYM, 33, 10, 30,ILL,ILL,ILL,ILL,ILL, /* DX */ + 40, 13, 15, 17, 1, 3, 5, 14, 4, 24, 2,SYM,SYM,SYM,SYM,SYM, /* EX */ SYM,SYM,SYM,ILL,ILL,ILL,ILL,ILL,ILL,ILL,ILL,ILL,ILL,ILL,ILL,ILL, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -88,159 +89,172 @@ static const unsigned char Windows_1256_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 52, 72, 61, 68, 74, 69, 59, 78, 60, 90, 86, 67, 65, 71, 75, /* 4X */ - 64, 85, 76, 55, 57, 79, 81, 70, 82, 87, 91,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 37, 58, 49, 47, 38, 54, 66, 46, 39, 88, 63, 45, 51, 43, 40, /* 6X */ - 62, 89, 42, 44, 41, 50, 77, 73, 83, 56, 80,SYM,SYM,SYM,SYM,CTR, /* 7X */ - SYM, 48,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 95,SYM, 96, 92, 97, 98, /* 8X */ - 53,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 84,SYM, 99,SYM,100,SYM,SYM,101, /* 9X */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,102,SYM,SYM,SYM,SYM,SYM, /* AX */ + SYM, 62, 63, 54, 67, 59, 70, 78, 76, 60, 83, 81, 77, 65, 71, 75, /* 4X */ + 66, 90, 68, 58, 61, 73, 69, 79, 84, 87, 88,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 36, 55, 46, 48, 37, 53, 56, 50, 38, 91, 64, 44, 47, 43, 45, /* 6X */ + 51, 80, 41, 42, 39, 52, 57, 72, 85, 49, 86,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 74,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 97,SYM,101, 93, 98,102, /* 8X */ + 82,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 94,SYM,103,SYM,104,SYM,SYM,105, /* 9X */ + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 99,SYM,SYM,SYM,SYM,SYM, /* AX */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ - 103, 32, 34, 15, 35, 22, 31, 0, 9, 8, 7, 27, 19, 18, 25, 11, /* CX */ - 30, 5, 26, 12, 21, 23, 28,SYM, 20, 33, 10, 29, 36, 13, 14, 17, /* DX */ - 104, 1, 93, 3, 6, 16, 4,105,106, 94,107,108, 24, 2,109,110, /* EX */ - SYM,SYM,SYM,SYM,111,SYM,SYM,SYM,SYM,112,SYM,113,114,SYM,SYM,115, /* FX */ + 106, 32, 34, 16, 35, 23, 31, 0, 8, 9, 7, 26, 19, 18, 25, 11, /* CX */ + 28, 6, 27, 12, 22, 21, 29,SYM, 20, 33, 10, 30, 40, 13, 15, 17, /* DX */ + 107, 1, 96, 3, 5, 14, 4,108,109, 89,110,111, 24, 2,100,112, /* EX */ + SYM,SYM,SYM,SYM, 95,SYM,SYM,SYM,SYM,113,SYM,114, 92,SYM,SYM,115, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ +static const int Unicode_Char_size = 64; +static const unsigned int Unicode_CharOrder[] = +{ + 65, 62, 66, 63, 67, 54, 69, 59, 73, 60, 83, 58, 84, 61, 97, 36, + 98, 55, 99, 46, 100, 48, 101, 37, 102, 53, 103, 56, 104, 50, 105, 38, + 108, 44, 109, 47, 110, 43, 111, 45, 112, 51, 114, 41, 115, 42, 116, 39, + 117, 52, 118, 57, 121, 49, 1569, 32, 1570, 34, 1571, 16, 1572, 35,1573, 23, + 1574, 31, 1575, 0, 1576, 8, 1577, 9, 1578, 7, 1579, 26, 1580, 19,1581, 18, + 1582, 25, 1583, 11, 1584, 28, 1585, 6, 1586, 27, 1587, 12, 1588, 22,1589, 21, + 1590, 29, 1591, 20, 1592, 33, 1593, 10, 1594, 30, 1600, 40, 1601, 13,1602, 15, + 1603, 17, 1604, 1, 1605, 3, 1606, 5, 1607, 14, 1608, 4, 1609, 24,1610, 2, +}; + /* Model Table: - * Total sequences: 1479 - * First 512 sequences: 0.9696025116913417 - * Next 512 sequences (512-1024): 0.029166911858880054 - * Rest: 0.0012305764497782395 + * Total sequences: 1820 + * First 512 sequences: 0.9644868613755061 + * Next 512 sequences (512-1024): 0.03359397057105059 + * Rest: 0.0019191680534433112 * Negative sequences: TODO */ static const PRUint8 ArabicLangModel[] = { - 2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,3,3,3,3,3,1,3,1,3,3,3,3,2,2,3, - 3,2,1,2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0, + 1,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,3,3,3,3,3,1,1,3,3,3,3,3,3,3, + 3,2,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0, 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2, - 1,2,3,2,2,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,0,3,1,3,3,3,3,2,2,3, - 2,2,0,2,2,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,1,3,2,3,3,3,2,2,2,2, - 0,2,1,3,1,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,2,3,2,0,0,0,0,2,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,1,2,3,3,3,2,3,2,2, + 2,2,0,2,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,0,2,3,3,2,3,2,3,2, + 0,2,2,3,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2, - 2,2,2,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,1,2,3,2,3,2,3,3,2,3, - 1,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0, - 3,2,3,3,3,2,2,3,3,3,3,3,3,3,3,1,3,2,3,3,3,3,0,3,2,2,3,2,2,2,3,2, - 0,3,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,2,3,3,2,2, - 0,3,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 2,0,0,0,0,0,0,0,0,1,3,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0, + 2,2,2,1,0,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0,0,0,1,1,0,0,1,1,1,1,1,1, + 3,2,3,3,3,2,2,3,3,3,3,3,3,3,3,3,1,3,3,3,3,3,3,1,2,2,2,3,3,2,3,2, + 0,3,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0, + 3,2,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,1,3,2,2,3,2,3,3,3, + 2,0,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,3,2,3,2,3,3,2, + 0,2,2,2,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,2,3,2,3,2,2,2,2,2,2,2,2,2, + 1,2,2,2,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 2,0,1,0,1,0,0,1,1,0,1,1,1,1,1,0,1,0,1,1,0,1,0,1,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,2,3,3,2,2,1,2,2,2,2,2,2,2, - 1,2,2,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,0,3,2,0,2,2,3,0,3,2,0,3,3,3,0,2,0, - 0,3,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,2,2,0,2,0,0,3,3,2,3,0,2,0,2, - 2,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,2,3,3,2,3,3,1,0,0,2,2,0,1,0,1,0,1, - 0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0, - 3,3,3,2,3,3,3,3,3,2,3,2,3,2,3,2,3,2,2,2,2,2,2,2,2,2,2,1,3,2,2,2, - 1,3,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,0,2,0,2,1,3,2,0,3,2,0,2,0,3,0,2,0, - 0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,3,3,3,3,3,3,3,2,3,3,3,3,3,3,0,3,3,3,3,3,3,0,3,2,3,2,3,2,3,2,2, + 3,3,3,3,3,3,3,3,3,3,0,3,3,2,3,3,0,2,0,2,2,3,3,0,2,0,3,3,2,3,0,0, + 0,3,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,2,3,2,2,2,0,0,2,1,3,3,3,2,0,0,2,2, + 2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,2,2,3,2,2,3,0,0,0,2,2,1,1,0,0,1,1, + 0,0,0,2,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0, + 3,3,3,2,3,3,3,3,3,3,3,2,3,2,3,3,3,3,2,2,2,3,2,3,2,2,1,2,2,3,2,2, + 1,3,1,2,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,2,3,3,0,3,2,2,2,2,0,2,0,3,2,0,2,0,2,0,0,2,3,2,0,0, + 0,1,0,2,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,2,0,1,2,1,3,3,2,0,2,0,1,2,2,2,0,0, + 0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,0,3,3,3,3,3,3,0,2,3,3,2,2,2,3,2, 0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,2,3,3,1,3,2,1,2,0,2,2,0,3,2,2,0,0,2,0,2,1,2,0,3,0, - 0,1,0,2,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,2,2,3,2,2,2,2,2,2,2,2,2,2,1,0,2,3,3,0,1,3,0, - 0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,2,3,3,2,1,3,3,3,3,0,2,3,0,3,2,2,0,3,2,0,3,2,3,0,2,0, - 0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,2,2,0,2,3,1,2,1,0,1,0,0,1,0,3,2,0,2,2,2, - 0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,2,3,3,3,2,3,3,2,2,3,2,3,2,2,0,2,1,2,1,1,0,2,1,0,0,0,1,0,2, - 1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,2,3,3,3,3,2,3,2,3,3,2,1,2,2,2,3,3,2,2,2,0,0,0,2,3,1,0,0,2,1,2, - 0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,3,3,3,1,2,3,2,0,2,3,3,3,2,3,0,2,2,2,3,2,2,0,3,0,2,2,2,3,2,3,1, + 3,3,3,3,3,3,3,3,3,3,2,2,3,2,2,2,2,2,2,2,1,1,2,1,1,2,3,3,3,1,2,1, + 0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,2,3,3,3,3,0,3,3,3,2,3,0,3,1,3,2,3,2,0,2,0,2,2,2,3,0,0, + 0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,2,2,1,2,1,0,0,1,0,1,0,1,3,2,0,2,2, + 0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,2,3,3,3,2,3,3,3,2,2,2,2,3,2,0,2,0,2,0,1,0,2,1,0,1,0,0,2,2, + 1,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,2,3,3,2,3,0,3,2,2,0,1,3,0,2,2,0,0,2,2,0,0,0,0,2,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,2,3,3,3,2,3,3,3,2,3,2,1,3,3,2,2,3,2,2,2,0,1,0,2,3,0,0,2,0,2,2, + 0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,3,3,3,1,3,2,2,2,0,3,2,3,2,2,3,0,2,2,2,2,2,2,0,0,2,2,2,3,3,2,0, 0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,2,3,3,2,3,0,3,2,0,2,1,3,0,2,0,0,2,2,2,0,0,0,2,0,0, - 0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0, - 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,2,3,2,3,2,3,2,2,0,0,2,0,0,1,3,2,0,3,0,1,2,0,2,0,2,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,2,2,3,3,2,2,0,2,2,1,2,2,2,2,0,0,0,0,1,2,2,0,0,1,0,2, - 2,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,2,3,2,2,1,1,2,3,1,2,2,0,0,0,0,0,0,1,0,0,2,0,1,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,2,2,3,2,3,2,0,2,0,1,2,0,2,1,2,0,0,0,2,2,0,0,0,2,0,2, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,2,2,3,2,1,2,2,2,0,0,2,0,0,2,2,1,0,2,1,0,2,0,2,0,2,0, - 0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,2,2,2,2,2,2,2,0,0,0,2,2,0,3,3,0,2,0,0,0,0,2,2,0,0,0,0,0,1, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,2,3,2,2,3,2,2,2,2,2,2,0,2,2,2,2,2,2,0,1,0,1,2,0,1,1,1,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 2,0,1,1,1,0,0,2,2,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,2,2,3,2,2,1,2,3,2,0,0,0,2,0,0,3,0,0,0,1,0,0,0,2,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 1,3,2,2,2,3,2,2,0,2,0,2,2,2,2,0,1,2,1,1,0,2,0,1,0,3,1,2,0,1,2,1, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 2,2,2,2,3,2,1,2,1,1,0,2,2,0,2,0,2,2,0,0,0,2,0,0,2,2,1,2,0,0,0,0, + 0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,2,3,3,3,2,1,3,2,2,2,0,0,0,0,1,3,3,2,0,0,1,0,2,3,2,0,0, + 0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,2,2,3,2,2,1,2,2,3,2,1,0,0,1,0,0,0,1,0,1,0,0,0,1,0, + 0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,2,2,3,2,2,1,2,2,2,1,2,2,2,0,0,0,0,2,2,0,1,0,0,1,2, + 2,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,2,2,2,2,2,2,2,2,0,0,2,3,1,0,3,1,2,0,0,0,0,2,1,0,0,0,0,0,1, 0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 1,0,1,0,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0, - 0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,2,2,1,2,2,2,2,2,1,2,0,2,1,2,0,0,1,0,1,0,1,0,0,0,1,1, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,2,2,1,1,2,2,2,2,2,0,2,0,2,1,2,0,0,1,0,0,0,2,0,0,0,1,2, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,2,2,1,2,2,2,2,2,2,0,2,0,2,1,2,0,0,1,0,0,0,1,0,0,0,1,1, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,1,0,1,2,2,2,2,2,1,1,2,0,2,2,2,0,0,2,0,0,0,1,0,0,0,2,2, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,2,2,2,2,2,2,1,2,2,2,1,0,1,1,1,0,0,0,0,2,0,2,0,0,0,1,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,2,2,2,2,2,1,2,2,2,0,1,0,2,1,2,0,0,0,0,2,0,1,0,0,0,0,2, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,2,2,2,2,2,0,1,2,1,1,2,0,2,1,0,0,0,1,0,1,0,0,0,0,0,0,1, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,2,2,2,1,2,0,0,2,1,2,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,1,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,2,2,2,2,1,0,0,1,2,0,2,0,0,1,0,0,0,1,1,1,0,1,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,2,2,2,2,2,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,2,2,2,1,1,1,1,1,0,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0, - 2,2,1,0,2,2,1,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,2,3,3,3,1,3,2,2,2,3,2,0,2,2,0,0,0,2,2,2,0,0,0,2,2,0,0,0,0,2,1, + 0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,2,3,0,2,2,2,2,0,0,1,0,1,2,1,2,0,1,1,0,2,2,2,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 2,3,3,2,2,2,3,2,2,3,2,3,2,2,2,2,0,2,2,2,2,2,2,0,1,0,0,2,2,1,1,0, + 0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 2,1,0,1,1,0,0,2,0,2,0,0,1,0,2,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,2,2,3,2,2,3,2,1,2,1,0,0,2,3,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,3,2,2,2,3,2,2,2,1,0,2,2,2,1,0,0,2,2,1,0,0,2,0,0,3,2,2,2,0,1,1, + 1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 2,2,2,2,2,2,2,2,0,1,0,2,2,0,2,2,0,2,0,1,0,0,2,0,1,2,2,0,1,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,2,2,2,2,2,1,0,0,1,2,0,0,1,2,0,0,0,0,0,1,0,0,0,0,0,0,1, + 0,0,0,0,1,2,2,2,0,2,2,2,2,1,2,2,2,2,1,1,1,1,1,2,2,1,0,0,0,1,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,1,1,1,0,2,2,2,2,1,0,2,0,1,0,2,0,0,0,0,0,0,2,0,0,0,0,1, + 0,0,0,0,2,1,1,2,0,2,2,2,2,2,2,2,2,1,1,2,2,2,0,2,2,2,0,0,1,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,2,2,2,2,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,2,0,0,0,0,0,1,0, + 0,0,0,0,2,2,1,2,0,2,2,2,2,2,2,2,2,1,0,1,1,1,0,2,2,2,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,2,2,2,1,0,2,2,1,1,2,1,1,0,2,2,1,2,1,1,0,0,1,0,0,1,0,0,0, + 1,1,0,1,0,1,0,0,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,1,1,0,0,1,0,1,1, + 0,0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,2,2,2,2,0,1,2,2,1,2,1,2,2,1,2,1,2,0,1,1,1,1,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,2,2,2,2,0,0,2,1,1,2,1,1,0,1,1,2,1,0,0,0,1,1,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,2,2,2,2,0,1,2,2,1,2,2,1,2,1,0,0,2,1,1,0,2,1,1,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,2,2,2,1,0,0,1,0,2,2,1,1,1,1,0,0,1,1,0,1,0,0,1,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,1,1,1,2,0,2,2,2,2,1,1,2,2,1,1,2,2,2,0,1,2,2,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,2,2,2,2,0,1,0,0,1,2,1,0,2,1,2,0,2,1,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,2,2,2,0,0,0,1,1,0,2,1,2,0,1,1,1,2,0,0,2,0,1,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,2,2,2,0,0,1,1,0,0,2,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0, 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,2,2,2,1,1,0,0,0,1,0,0,0,0,0,0,0,1,1,0,1,0,0,1, - 2,2,2,0,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,2,1,0,0,0,1,2,1,2,1,1,0,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,1,2,1,0,1,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, + 0,0,0,0,2,2,2,2,0,2,1,1,0,2,1,0,0,1,0,0,1,0,0,2,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,2,1,0,2,2,0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,1,0,0,1,2,2,0, + 0,0,0,0,2,2,1,1,0,2,2,1,2,2,0,0,0,0,2,1,1,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,1,0,1,0,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,1,1,2,2,0,2,2,2,2,1,1,2,1,1,1,1,1,1,0,1,1,1,0,0,0,1,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,1,2,1,0,1,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0, + 0,0,0,0,1,1,1,2,0,2,0,0,1,1,1,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,2,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0, + 0,0,0,0,1,1,1,0,0,1,0,0,1,2,0,0,0,1,1,0,1,1,1,0,0,0,1,1,1,1,1,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,2,0,0,2,0,2,0,0,1,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0, + 0,0,0,0,2,1,1,0,0,1,1,1,2,1,1,0,1,1,0,0,1,2,0,1,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,1,0,2,1,1,0,0,0,0,0,0,1,0,0,2,0,1,0,2,0,0,0,0, + 0,0,0,0,2,2,1,0,0,1,1,1,1,1,0,0,0,1,2,0,1,0,0,1,1,0,1,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0, + 0,0,0,0,1,2,2,1,0,0,1,0,0,1,0,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,2,1,0,1,0,0,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0, + 0,0,0,0,1,1,1,1,0,0,0,1,0,1,1,0,0,1,1,0,2,0,0,0,0,1,1,1,1,0,1,1, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,1,1,1,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0, + 0,0,0,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,0,0,0,1,0,0,0,1,1,1,1,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,1,0,1,1,2,0,0,1,1,0,0,0,0,0,0,1,1,0,1,2,1,1,1,1,0, + 0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,1,1,1,0,0,1,0,0,0,1,0,0,0,1,2,0,1,0,1,0,0,0,0,0,0,0,1,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,1,0,1,0,1,1,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,1,0,1,1,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,1,1,1,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1, }; @@ -249,7 +263,7 @@ const SequenceModel Iso_8859_6ArabicModel = Iso_8859_6_CharToOrderMap, ArabicLangModel, 64, - (float)0.9696025116913417, + (float)0.9644868613755061, PR_FALSE, "ISO-8859-6", "ar" @@ -260,8 +274,18 @@ const SequenceModel Windows_1256ArabicModel = Windows_1256_CharToOrderMap, ArabicLangModel, 64, - (float)0.9696025116913417, + (float)0.9644868613755061, PR_FALSE, "WINDOWS-1256", "ar" }; + +const LanguageModel ArabicModel = +{ + "ar", + Unicode_CharOrder, + 64, + ArabicLangModel, + 64, + (float)0.9644868613755061, +}; diff --git a/src/LangModels/LangDanishModel.cpp b/src/LangModels/LangDanishModel.cpp index a4544a4..1ce75cb 100644 --- a/src/LangModels/LangDanishModel.cpp +++ b/src/LangModels/LangDanishModel.cpp @@ -36,12 +36,13 @@ * ***** END LICENSE BLOCK ***** */ #include "../nsSBCharSetProber.h" +#include "../nsLanguageDetector.h" /********* Language model for: Danish *********/ /** * Generated by BuildLangModel.py - * On: 2022-11-30 19:41:17.519380 + * On: 2021-03-16 01:36:49.098484 **/ /* Character Mapping Table: @@ -67,18 +68,18 @@ static const unsigned char Iso_8859_15_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 4, 16, 24, 7, 0, 13, 10, 18, 5, 23, 11, 8, 12, 2, 9, /* 4X */ - 17, 30, 1, 6, 3, 15, 14, 25, 27, 22, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 4, 16, 24, 7, 0, 13, 10, 18, 5, 23, 11, 8, 12, 2, 9, /* 6X */ - 17, 30, 1, 6, 3, 15, 14, 25, 27, 22, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 4, 16, 23, 7, 0, 13, 10, 18, 5, 24, 12, 9, 11, 2, 8, /* 4X */ + 17, 29, 1, 6, 3, 15, 14, 25, 27, 22, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 4, 16, 23, 7, 0, 13, 10, 18, 5, 24, 12, 9, 11, 2, 8, /* 6X */ + 17, 29, 1, 6, 3, 15, 14, 25, 27, 22, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM,SYM,SYM,SYM,SYM,SYM, 50,SYM, 50,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM, 60, 57,SYM,SYM, 61,SYM,SYM,SYM, 43, 43, 62,SYM, /* BX */ - 39, 32, 44, 53, 36, 21, 20, 42, 38, 28, 63, 46, 64, 35, 47, 52, /* CX */ - 31, 48, 58, 29, 49, 59, 34,SYM, 19, 65, 37, 66, 33, 40, 55, 41, /* DX */ - 39, 32, 44, 53, 36, 21, 20, 42, 38, 28, 67, 46, 68, 35, 47, 52, /* EX */ - 31, 48, 58, 29, 49, 59, 34,SYM, 19, 69, 37, 70, 33, 40, 55, 71, /* FX */ + SYM,SYM,SYM,SYM,SYM,SYM, 40,SYM, 40,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ + SYM,SYM,SYM,SYM, 56, 52,SYM,SYM, 56,SYM,SYM,SYM, 57, 58, 59,SYM, /* BX */ + 41, 32, 48, 60, 33, 21, 20, 37, 34, 28, 39, 46, 43, 36, 53, 45, /* CX */ + 49, 54, 47, 35, 42, 61, 30,SYM, 19, 55, 38, 62, 31, 51, 50, 44, /* DX */ + 41, 32, 48, 63, 33, 21, 20, 37, 34, 28, 39, 46, 43, 36, 53, 45, /* EX */ + 49, 54, 47, 35, 42, 64, 30,SYM, 19, 55, 38, 65, 31, 51, 50, 66, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -88,18 +89,18 @@ static const unsigned char Iso_8859_1_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 4, 16, 24, 7, 0, 13, 10, 18, 5, 23, 11, 8, 12, 2, 9, /* 4X */ - 17, 30, 1, 6, 3, 15, 14, 25, 27, 22, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 4, 16, 24, 7, 0, 13, 10, 18, 5, 23, 11, 8, 12, 2, 9, /* 6X */ - 17, 30, 1, 6, 3, 15, 14, 25, 27, 22, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 4, 16, 23, 7, 0, 13, 10, 18, 5, 24, 12, 9, 11, 2, 8, /* 4X */ + 17, 29, 1, 6, 3, 15, 14, 25, 27, 22, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 4, 16, 23, 7, 0, 13, 10, 18, 5, 24, 12, 9, 11, 2, 8, /* 6X */ + 17, 29, 1, 6, 3, 15, 14, 25, 27, 22, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM, 57,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ - 39, 32, 44, 53, 36, 21, 20, 42, 38, 28, 72, 46, 73, 35, 47, 52, /* CX */ - 31, 48, 58, 29, 49, 59, 34,SYM, 19, 74, 37, 75, 33, 40, 55, 41, /* DX */ - 39, 32, 44, 53, 36, 21, 20, 42, 38, 28, 76, 46, 77, 35, 47, 52, /* EX */ - 31, 48, 58, 29, 49, 59, 34,SYM, 19, 78, 37, 79, 33, 40, 55, 80, /* FX */ + SYM,SYM,SYM,SYM,SYM, 52,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ + 41, 32, 48, 67, 33, 21, 20, 37, 34, 28, 39, 46, 43, 36, 53, 45, /* CX */ + 49, 54, 47, 35, 42, 68, 30,SYM, 19, 55, 38, 69, 31, 51, 50, 44, /* DX */ + 41, 32, 48, 70, 33, 21, 20, 37, 34, 28, 39, 46, 43, 36, 53, 45, /* EX */ + 49, 54, 47, 35, 42, 71, 30,SYM, 19, 55, 38, 72, 31, 51, 50, 73, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -109,83 +110,74 @@ static const unsigned char Windows_1252_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 4, 16, 24, 7, 0, 13, 10, 18, 5, 23, 11, 8, 12, 2, 9, /* 4X */ - 17, 30, 1, 6, 3, 15, 14, 25, 27, 22, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 4, 16, 24, 7, 0, 13, 10, 18, 5, 23, 11, 8, 12, 2, 9, /* 6X */ - 17, 30, 1, 6, 3, 15, 14, 25, 27, 22, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */ - SYM,ILL,SYM, 81,SYM,SYM,SYM,SYM,SYM,SYM, 50,SYM, 43,ILL, 82,ILL, /* 8X */ - ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 50,SYM, 43,ILL, 83, 84, /* 9X */ + SYM, 4, 16, 23, 7, 0, 13, 10, 18, 5, 24, 12, 9, 11, 2, 8, /* 4X */ + 17, 29, 1, 6, 3, 15, 14, 25, 27, 22, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 4, 16, 23, 7, 0, 13, 10, 18, 5, 24, 12, 9, 11, 2, 8, /* 6X */ + 17, 29, 1, 6, 3, 15, 14, 25, 27, 22, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM,ILL,SYM, 74,SYM,SYM,SYM,SYM,SYM,SYM, 40,SYM, 75,ILL, 56,ILL, /* 8X */ + ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 40,SYM, 76,ILL, 56, 77, /* 9X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM, 57,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ - 39, 32, 44, 53, 36, 21, 20, 42, 38, 28, 85, 46, 86, 35, 47, 52, /* CX */ - 31, 48, 58, 29, 49, 59, 34,SYM, 19, 87, 37, 88, 33, 40, 55, 41, /* DX */ - 39, 32, 44, 53, 36, 21, 20, 42, 38, 28, 89, 46, 90, 35, 47, 52, /* EX */ - 31, 48, 58, 29, 49, 59, 34,SYM, 19, 91, 37, 92, 33, 40, 55, 93, /* FX */ + SYM,SYM,SYM,SYM,SYM, 52,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ + 41, 32, 48, 78, 33, 21, 20, 37, 34, 28, 39, 46, 43, 36, 53, 45, /* CX */ + 49, 54, 47, 35, 42, 79, 30,SYM, 19, 55, 38, 80, 31, 51, 50, 44, /* DX */ + 41, 32, 48, 81, 33, 21, 20, 37, 34, 28, 39, 46, 43, 36, 53, 45, /* EX */ + 49, 54, 47, 35, 42, 82, 30,SYM, 19, 55, 38, 83, 31, 51, 50, 84, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ -static const unsigned char Ibm865_CharToOrderMap[] = +static const int Unicode_Char_size = 60; +static const unsigned int Unicode_CharOrder[] = { - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ - NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 4, 16, 24, 7, 0, 13, 10, 18, 5, 23, 11, 8, 12, 2, 9, /* 4X */ - 17, 30, 1, 6, 3, 15, 14, 25, 27, 22, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 4, 16, 24, 7, 0, 13, 10, 18, 5, 23, 11, 8, 12, 2, 9, /* 6X */ - 17, 30, 1, 6, 3, 15, 14, 25, 27, 22, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */ - 42, 33, 28, 44, 36, 39, 21, 42, 94, 46, 38, 52, 47, 95, 36, 21, /* 8X */ - 28, 20, 20, 49, 34, 58, 96, 97, 98, 34, 33, 19,SYM, 19,SYM,SYM, /* 9X */ - 32, 35, 29, 37, 48, 48,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* CX */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* DX */ - 45, 41, 99, 56,100,101, 57, 54,102,103,104,105,106,107, 51,SYM, /* EX */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* FX */ + 65, 4, 66, 16, 67, 23, 68, 7, 69, 0, 70, 13, 71, 10, 72, 18, + 73, 5, 74, 24, 75, 12, 76, 9, 77, 11, 78, 2, 79, 8, 80, 17, + 81, 29, 82, 1, 83, 6, 84, 3, 85, 15, 86, 14, 87, 25, 88, 27, + 89, 22, 90, 26, 97, 4, 98, 16, 99, 23, 100, 7, 101, 0,102, 13, + 103, 10, 104, 18, 105, 5, 106, 24, 107, 12, 108, 9, 109, 11,110, 2, + 111, 8, 112, 17, 113, 29, 114, 1, 115, 6, 116, 3, 117, 15,118, 14, + 119, 25, 120, 27, 121, 22, 122, 26, 197, 21, 198, 20, 201, 28,216, 19, + 229, 21, 230, 20, 233, 28, 248, 19, }; -/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ /* Model Table: - * Total sequences: 1065 - * First 512 sequences: 0.9958348814328518 - * Next 512 sequences (512-1024): 0.0041324290837536455 - * Rest: 3.268948339453948e-05 + * Total sequences: 936 + * First 512 sequences: 0.9962304038307248 + * Next 512 sequences (512-1024): 0.003769596169275244 + * Rest: -5.2909066017292616e-17 * Negative sequences: TODO */ static const PRUint8 DanishLangModel[] = { - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,3,3,3,3,2,3,0,0,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,2,2,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,2,2, - 3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,1,2,2,1, - 3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,0,2,2,3,3,3,3,3,3,2,0,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,3,3,2,3,3,2,0,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,0,2,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,0,2,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,0,2,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,1,2,3,3,3,3,2,2,2,0,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,2,2,0,0,2,0,0, - 3,3,3,3,3,3,3,2,3,3,2,3,3,3,3,3,2,2,3,3,3,3,3,3,1,2,2,1,2,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,2,2,2,2, - 3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,3,2,3,3,3,3,3,3,2,0,2,2,2,2,2, - 3,3,3,3,3,3,3,3,3,3,2,2,3,2,2,3,2,2,2,3,3,3,2,3,2,0,0,0,1,1,0, - 3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,2,3,3,3,2,2,0,2,2,3,2,2,3,0,0,2, - 3,3,3,3,3,3,3,2,3,3,2,2,2,2,2,3,3,2,2,3,3,3,3,3,2,0,2,0,2,2,0, - 3,3,3,3,3,3,3,3,3,3,3,2,2,3,2,3,3,3,3,3,3,3,3,2,2,0,1,0,2,2,0, - 3,3,3,3,3,3,3,2,3,3,1,2,3,2,3,3,2,2,2,3,3,3,3,3,2,3,0,0,2,2,1, - 3,3,3,3,0,2,3,3,3,2,3,3,3,2,3,2,3,2,2,0,0,0,2,3,0,2,1,0,0,0,0, - 2,3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,3,2,2,0,0,0,0,0,2,0,0,0,0,0,0, - 3,3,3,3,0,0,3,3,3,2,2,3,2,2,3,0,3,2,2,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,2,3,3,3,3,3,3,3,2,3,2,3,3,3,2,0,0,2,2,2,2,2,0,0,0,0, - 3,3,3,2,3,3,3,3,3,3,2,2,2,2,2,3,2,2,2,3,3,2,3,0,2,0,0,0,0,2,0, - 3,3,2,3,3,3,2,2,3,3,2,3,2,2,2,3,2,2,3,0,2,0,3,2,3,0,2,2,2,2,2, - 3,2,2,2,3,3,2,2,2,3,0,2,2,2,0,2,2,0,2,0,2,0,2,2,2,2,2,0,0,0,2, - 3,2,2,2,3,3,2,2,2,3,2,2,2,2,0,2,2,2,2,0,0,0,2,2,0,2,3,0,0,0,0, - 3,2,1,2,2,2,2,2,2,2,0,2,1,2,2,0,0,2,0,0,0,0,2,0,2,2,0,2,0,0,0, - 2,2,3,2,2,0,2,2,2,2,2,0,2,2,2,2,2,1,2,0,0,0,0,0,1,0,2,0,0,0,0, - 0,3,2,2,2,0,2,0,2,0,2,2,0,2,2,0,0,2,2,0,0,0,0,0,0,2,0,0,0,0,0, - 0,0,0,0,2,1,0,0,0,0,0,0,0,0,2,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,3,3,3,3,2,3,0,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,2, + 3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,2,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,2,0,3,3,3,3,3,3,0,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,3,3,2,3,3,2,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,0,2,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,0,2,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,0,3,3,3,3,3,2,0,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,2,2, + 3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,3,3,3,3,2,3,2,2,0,2,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,0,2,0, + 3,3,3,3,3,3,3,2,3,3,2,3,3,2,3,3,2,2,3,3,3,3,3,2,2,2,0,0,2,0, + 3,3,2,3,3,3,3,2,3,3,3,2,2,3,3,3,3,2,3,3,3,3,3,2,3,2,0,3,2,0, + 3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,3,3,2,3,2,3,3,2,0,3,0,2,0,0,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,2,0,2,3,2,2,2,2,0,2, + 3,3,3,3,3,3,3,3,3,3,0,2,2,2,0,3,3,2,2,3,3,3,3,2,3,2,2,0,2,0, + 3,3,3,3,3,3,3,3,3,3,3,2,2,3,2,3,3,3,3,2,3,3,2,2,2,0,2,0,2,0, + 3,3,3,3,3,3,3,2,3,2,2,3,2,2,3,3,2,2,2,3,3,3,3,2,3,2,0,0,2,0, + 3,3,3,3,2,2,3,3,0,3,3,3,3,3,3,2,3,2,2,0,0,0,2,2,3,0,0,0,0,0, + 2,3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,3,2,2,0,0,0,0,2,0,0,0,0,0,0, + 3,3,3,3,0,0,3,3,2,3,2,2,3,2,3,0,3,2,2,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,2,3,3,3,3,3,3,3,2,3,2,3,3,2,2,0,0,0,2,2,2,2,0,0,0, + 3,3,2,3,3,3,2,2,3,3,2,2,3,2,2,3,2,2,3,0,3,0,3,3,0,0,2,0,2,2, + 3,3,2,3,3,3,3,3,3,3,2,2,2,2,2,3,3,2,2,3,3,2,3,0,0,2,2,0,2,0, + 3,2,2,2,3,3,2,2,3,2,0,2,2,2,0,2,2,0,3,0,2,0,2,2,0,2,0,0,0,0, + 3,2,2,2,3,3,2,2,3,0,2,2,2,0,2,2,2,2,2,0,0,0,2,2,2,2,2,2,0,0, + 3,2,2,2,3,3,2,0,2,2,0,0,0,2,2,2,2,2,0,0,0,0,0,2,0,2,0,2,0,0, + 2,2,3,2,2,0,2,2,2,2,2,2,0,0,2,0,2,2,0,0,0,0,0,2,0,0,2,0,0,0, + 0,2,0,0,2,2,0,2,2,2,0,0,2,2,0,3,2,0,0,0,0,0,0,0,0,0,0,0,0,2, }; @@ -193,8 +185,8 @@ const SequenceModel Iso_8859_15DanishModel = { Iso_8859_15_CharToOrderMap, DanishLangModel, - 31, - (float)0.9958348814328518, + 30, + (float)0.9962304038307248, PR_TRUE, "ISO-8859-15", "da" @@ -204,8 +196,8 @@ const SequenceModel Iso_8859_1DanishModel = { Iso_8859_1_CharToOrderMap, DanishLangModel, - 31, - (float)0.9958348814328518, + 30, + (float)0.9962304038307248, PR_TRUE, "ISO-8859-1", "da" @@ -215,19 +207,19 @@ const SequenceModel Windows_1252DanishModel = { Windows_1252_CharToOrderMap, DanishLangModel, - 31, - (float)0.9958348814328518, + 30, + (float)0.9962304038307248, PR_TRUE, "WINDOWS-1252", "da" }; -const SequenceModel Ibm865DanishModel = +const LanguageModel DanishModel = { - Ibm865_CharToOrderMap, + "da", + Unicode_CharOrder, + 60, DanishLangModel, - 31, - (float)0.9958348814328518, - PR_TRUE, - "IBM865" + 30, + (float)0.9962304038307248, }; diff --git a/src/LangModels/LangFrenchModel.cpp b/src/LangModels/LangFrenchModel.cpp index 5baaf31..ffea661 100644 --- a/src/LangModels/LangFrenchModel.cpp +++ b/src/LangModels/LangFrenchModel.cpp @@ -36,12 +36,13 @@ * ***** END LICENSE BLOCK ***** */ #include "../nsSBCharSetProber.h" +#include "../nsLanguageDetector.h" /********* Language model for: French *********/ /** * Generated by BuildLangModel.py - * On: 2015-12-03 21:10:27.685575 + * On: 2021-03-16 01:24:27.092782 **/ /* Character Mapping Table: @@ -61,24 +62,24 @@ * ligature of 'o' and 'e' exists in ISO-8859-15 but not in ISO-8859-1 * even though they are both used for French. Same for the euro sign. */ -static const unsigned char Windows_1252_CharToOrderMap[] = +static const unsigned char Iso_8859_15_CharToOrderMap[] = { CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 2, 18, 11, 10, 0, 17, 15, 19, 4, 25, 26, 7, 13, 3, 8, /* 4X */ - 12, 20, 5, 1, 6, 9, 16, 30, 21, 22, 29,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 2, 18, 11, 10, 0, 17, 15, 19, 4, 25, 26, 7, 13, 3, 8, /* 6X */ - 12, 20, 5, 1, 6, 9, 16, 30, 21, 22, 29,SYM,SYM,SYM,SYM,CTR, /* 7X */ - SYM,ILL,SYM, 56,SYM,SYM,SYM,SYM,SYM,SYM, 51,SYM, 35,ILL, 57,ILL, /* 8X */ - ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 51,SYM, 35,ILL, 58, 59, /* 9X */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM, 60,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ - 24, 38, 32, 46, 49, 61, 47, 27, 23, 14, 28, 41, 62, 39, 33, 36, /* CX */ - 48, 45, 54, 40, 31, 55, 42,SYM, 52, 37, 43, 34, 44, 53, 50, 63, /* DX */ - 24, 38, 32, 46, 49, 64, 47, 27, 23, 14, 28, 41, 65, 39, 33, 36, /* EX */ - 48, 45, 54, 40, 31, 55, 42,SYM, 52, 37, 43, 34, 44, 53, 50, 66, /* FX */ + SYM, 1, 18, 11, 10, 0, 17, 16, 19, 3, 25, 26, 7, 12, 4, 8, /* 4X */ + 13, 20, 5, 2, 6, 9, 15, 31, 22, 21, 27,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 1, 18, 11, 10, 0, 17, 16, 19, 3, 25, 26, 7, 12, 4, 8, /* 6X */ + 13, 20, 5, 2, 6, 9, 15, 31, 22, 21, 27,SYM,SYM,SYM,SYM,CTR, /* 7X */ + CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ + CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ + SYM,SYM,SYM,SYM,SYM,SYM, 49,SYM, 49,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ + SYM,SYM,SYM,SYM, 57, 58,SYM,SYM, 59,SYM,SYM,SYM, 37, 37, 53,SYM, /* BX */ + 23, 40, 34, 47, 48, 55, 43, 29, 24, 14, 28, 39, 60, 41, 32, 35, /* CX */ + 54, 45, 51, 38, 30, 61, 44,SYM, 50, 36, 46, 33, 42, 52, 62, 56, /* DX */ + 23, 40, 34, 47, 48, 55, 43, 29, 24, 14, 28, 39, 63, 41, 32, 35, /* EX */ + 54, 45, 51, 38, 30, 64, 44,SYM, 50, 36, 46, 33, 42, 52, 65, 53, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -88,101 +89,116 @@ static const unsigned char Iso_8859_1_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 2, 18, 11, 10, 0, 17, 15, 19, 4, 25, 26, 7, 13, 3, 8, /* 4X */ - 12, 20, 5, 1, 6, 9, 16, 30, 21, 22, 29,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 2, 18, 11, 10, 0, 17, 15, 19, 4, 25, 26, 7, 13, 3, 8, /* 6X */ - 12, 20, 5, 1, 6, 9, 16, 30, 21, 22, 29,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 1, 18, 11, 10, 0, 17, 16, 19, 3, 25, 26, 7, 12, 4, 8, /* 4X */ + 13, 20, 5, 2, 6, 9, 15, 31, 22, 21, 27,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 1, 18, 11, 10, 0, 17, 16, 19, 3, 25, 26, 7, 12, 4, 8, /* 6X */ + 13, 20, 5, 2, 6, 9, 15, 31, 22, 21, 27,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM, 67,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ - 24, 38, 32, 46, 49, 68, 47, 27, 23, 14, 28, 41, 69, 39, 33, 36, /* CX */ - 48, 45, 54, 40, 31, 55, 42,SYM, 52, 37, 43, 34, 44, 53, 50, 70, /* DX */ - 24, 38, 32, 46, 49, 71, 47, 27, 23, 14, 28, 41, 72, 39, 33, 36, /* EX */ - 48, 45, 54, 40, 31, 55, 42,SYM, 52, 37, 43, 34, 44, 53, 50, 73, /* FX */ + SYM,SYM,SYM,SYM,SYM, 66,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ + 23, 40, 34, 47, 48, 55, 43, 29, 24, 14, 28, 39, 67, 41, 32, 35, /* CX */ + 54, 45, 51, 38, 30, 68, 44,SYM, 50, 36, 46, 33, 42, 52, 69, 56, /* DX */ + 23, 40, 34, 47, 48, 55, 43, 29, 24, 14, 28, 39, 70, 41, 32, 35, /* EX */ + 54, 45, 51, 38, 30, 71, 44,SYM, 50, 36, 46, 33, 42, 52, 72, 53, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ -static const unsigned char Iso_8859_15_CharToOrderMap[] = +static const unsigned char Windows_1252_CharToOrderMap[] = { CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 2, 18, 11, 10, 0, 17, 15, 19, 4, 25, 26, 7, 13, 3, 8, /* 4X */ - 12, 20, 5, 1, 6, 9, 16, 30, 21, 22, 29,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 2, 18, 11, 10, 0, 17, 15, 19, 4, 25, 26, 7, 13, 3, 8, /* 6X */ - 12, 20, 5, 1, 6, 9, 16, 30, 21, 22, 29,SYM,SYM,SYM,SYM,CTR, /* 7X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM,SYM,SYM,SYM,SYM,SYM, 51,SYM, 51,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM, 74, 75,SYM,SYM, 76,SYM,SYM,SYM, 35, 35, 77,SYM, /* BX */ - 24, 38, 32, 46, 49, 78, 47, 27, 23, 14, 28, 41, 79, 39, 33, 36, /* CX */ - 48, 45, 54, 40, 31, 55, 42,SYM, 52, 37, 43, 34, 44, 53, 50, 80, /* DX */ - 24, 38, 32, 46, 49, 81, 47, 27, 23, 14, 28, 41, 82, 39, 33, 36, /* EX */ - 48, 45, 54, 40, 31, 55, 42,SYM, 52, 37, 43, 34, 44, 53, 50, 83, /* FX */ + SYM, 1, 18, 11, 10, 0, 17, 16, 19, 3, 25, 26, 7, 12, 4, 8, /* 4X */ + 13, 20, 5, 2, 6, 9, 15, 31, 22, 21, 27,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 1, 18, 11, 10, 0, 17, 16, 19, 3, 25, 26, 7, 12, 4, 8, /* 6X */ + 13, 20, 5, 2, 6, 9, 15, 31, 22, 21, 27,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM,ILL,SYM, 73,SYM,SYM,SYM,SYM,SYM,SYM, 49,SYM, 37,ILL, 74,ILL, /* 8X */ + ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 49,SYM, 37,ILL, 75, 53, /* 9X */ + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ + SYM,SYM,SYM,SYM,SYM, 76,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ + 23, 40, 34, 47, 48, 55, 43, 29, 24, 14, 28, 39, 77, 41, 32, 35, /* CX */ + 54, 45, 51, 38, 30, 78, 44,SYM, 50, 36, 46, 33, 42, 52, 79, 56, /* DX */ + 23, 40, 34, 47, 48, 55, 43, 29, 24, 14, 28, 39, 80, 41, 32, 35, /* EX */ + 54, 45, 51, 38, 30, 81, 44,SYM, 50, 36, 46, 33, 42, 52, 82, 53, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ +static const int Unicode_Char_size = 76; +static const unsigned int Unicode_CharOrder[] = +{ + 65, 1, 66, 18, 67, 11, 68, 10, 69, 0, 70, 17, 71, 16, 72, 19, + 73, 3, 74, 25, 75, 26, 76, 7, 77, 12, 78, 4, 79, 8, 80, 13, + 81, 20, 82, 5, 83, 2, 84, 6, 85, 9, 86, 15, 87, 31, 88, 22, + 89, 21, 90, 27, 97, 1, 98, 18, 99, 11, 100, 10, 101, 0,102, 17, + 103, 16, 104, 19, 105, 3, 106, 25, 107, 26, 108, 7, 109, 12,110, 4, + 111, 8, 112, 13, 113, 20, 114, 5, 115, 2, 116, 6, 117, 9,118, 15, + 119, 31, 120, 22, 121, 21, 122, 27, 192, 23, 194, 34, 199, 29,200, 24, + 201, 14, 202, 28, 206, 32, 207, 35, 212, 30, 217, 36, 219, 33,224, 23, + 226, 34, 231, 29, 232, 24, 233, 14, 234, 28, 238, 32, 239, 35,244, 30, + 249, 36, 251, 33, 338, 37, 339, 37, +}; + /* Model Table: - * Total sequences: 914 - * First 512 sequences: 0.997057879992383 - * Next 512 sequences (512-1024): 0.002942120007616917 - * Rest: 3.8163916471489756e-17 + * Total sequences: 1049 + * First 512 sequences: 0.997006678170155 + * Next 512 sequences (512-1024): 0.0029768569132891634 + * Rest: 1.646491655585584e-05 * Negative sequences: TODO */ static const PRUint8 FrenchLangModel[] = { - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,0,0,3,3,3,0,3,3,0,0,0,2,0,2,0, - 3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,2,2,3,3,3,3,0,3,3,0,0,3,0,0,2,3,0,0,0,2,2,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,3,3,0,3,3,2,2,3,0,0,3,0, - 3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,3,3,0,3,3,3,2,3,2,0,2,2,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,0,2,3,0,2,3,2,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,0,2,3,3,3,2,3,3,3,0,2,0,0,0, - 3,3,3,2,3,3,3,3,3,3,2,3,3,3,3,2,2,2,3,3,2,2,3,3,2,0,2,0,3,3,2,3,2,0,0,0,0,0, - 3,3,3,2,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,0,3,3,3,2,3,0,0,2,2,2,2,0,2,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,0,3,3,0,0,3,3,0,0,2,3,0,3,3, - 3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,3,3,2,0,3,3,2,3,3,2,0,0,0,0,0,2,0, - 3,3,3,2,3,3,3,2,3,3,3,2,2,3,3,3,2,2,2,3,0,0,3,3,0,3,0,0,2,2,3,2,2,2,3,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,2,2,2,2,3,3,0,3,3,0,0,3,0,2,2,2,3,2,0,0,2,0,0, - 3,3,3,2,3,3,3,3,3,3,2,2,3,2,3,0,0,2,2,3,0,0,3,3,0,0,2,2,3,2,2,3,2,0,0,0,0,0, - 3,3,3,3,3,2,3,2,3,3,2,3,3,3,3,2,0,2,3,2,0,0,3,3,0,2,2,0,3,0,2,2,3,0,2,2,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,0,0,3,2,2,0,3,0,0,2,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,2,2,2,3,3,3,2,2,3,3,0,2,3,3,0,0,0,0,2,0,2,0,2,0,0,0,0,0, - 3,2,3,2,3,3,0,2,3,3,0,0,0,2,3,0,2,2,0,0,0,0,2,3,0,0,2,0,3,0,0,0,0,0,0,2,0,0, - 3,3,3,2,3,3,3,3,3,3,2,2,2,3,3,2,0,3,0,0,0,0,0,3,0,2,0,0,3,0,0,0,0,0,2,2,0,0, - 3,3,3,3,3,3,3,3,3,3,3,2,2,2,3,2,2,0,3,2,0,0,3,2,0,3,0,0,0,0,0,0,3,2,0,2,0,0, - 3,3,3,3,3,3,3,3,3,3,0,2,0,3,3,0,0,2,2,0,0,0,3,3,0,2,2,0,2,2,2,3,3,0,0,2,0,0, - 0,0,2,0,0,0,0,2,2,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,3,0,3,0,3,2,3,2,2,3,3,2,3,0,3,2,2,2,2,3,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0, - 3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,0,2,2,2,0,3,2,0,0,2,2,0,0,0,0,0,0,0, - 0,3,0,3,0,3,3,3,0,0,3,3,2,3,0,3,3,2,3,0,3,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,2,3,2,3,2,2,2,3,3,2,2,2,2,3,0,0,0,0,0,0,0,0,0,3,2,0,0,0,0,0,0,2,0,0,0,0,0, - 3,3,3,2,3,3,2,3,3,3,0,0,2,3,2,2,2,2,2,3,0,0,3,0,0,0,2,0,0,0,2,0,0,0,0,0,0,0, - 0,0,3,0,0,0,0,0,3,3,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,2,0,0,3,2,0,0,0,3,0,3,0,0,2,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,0,3,2,3,2,0,2,3,3,0,2,0,2,2,2,0,0,2,2,2,0,3,0,0,0,2,0,0,3,2,0,0,0,0,0,0,0, - 3,2,3,2,3,2,2,2,3,2,0,2,0,0,2,0,0,2,2,2,0,0,2,0,0,0,2,0,0,0,2,0,0,0,0,0,0,0, - 0,2,0,3,0,0,3,3,0,0,0,0,2,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,2,0,2,2,0,3,3,0,0,0,3,2,2,0,3,0,0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,3,0,0,3,3,0,0,0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,2,3,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,2,0,0,2,0,2,2,0,3,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 2,2,2,2,0,2,2,3,0,0,2,2,0,2,0,2,0,2,2,0,2,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,2,0,3,3,3,0,3,0,3,0,2,0,3,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,3,3,0,3,2,3,3,0,0,3,0,0, + 3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,2,3,3,3,3,3,0,2,3,2,3,2,0,0,0,2,0,2,0,0,0,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,0,2,2,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,2,3,3,3,3,2,3,2,2,1,0,0,0,0,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,3,2,3,3,3,3,3,2,0,2,3,0,0,0, + 3,3,3,3,2,3,3,3,3,3,2,3,2,2,3,2,2,2,3,3,0,3,0,2,3,2,2,3,3,0,3,2,0,0,2,0,0,0, + 3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,2,2,0,2,2,0,2,2,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,0,3,3,3,2,1,0,0,3,2,3,0,2,3,0, + 3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,0,2,3,3,3,3,0,2,2,0,0,2,2,0,0, + 3,3,3,3,2,3,2,3,3,3,3,2,3,2,3,2,3,2,2,3,0,3,0,0,3,3,2,2,0,0,2,2,2,2,0,0,0,2, + 3,3,3,3,2,3,3,3,3,3,2,3,2,2,3,2,2,2,2,3,3,3,0,0,3,0,3,2,2,0,3,2,0,0,2,0,0,2, + 3,3,3,3,3,2,3,2,3,3,2,2,3,3,3,2,2,2,3,2,0,3,0,0,3,2,2,2,3,0,2,2,0,2,2,0,0,2, + 3,3,3,3,2,3,3,3,3,3,2,2,2,3,3,2,3,2,2,3,0,3,0,0,3,2,2,0,3,2,3,2,0,0,2,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,0,0,0,3,2,3,0,2,0,2,0,0,3,2,0,0, + 3,3,3,3,2,3,2,2,3,3,2,2,2,2,3,1,2,0,0,3,0,2,0,0,3,0,2,0,3,0,2,0,0,0,2,0,0,2, + 3,3,3,3,3,3,3,3,3,3,2,2,3,2,3,2,3,2,3,3,2,3,0,0,3,2,2,1,2,0,0,2,0,0,2,0,0,0, + 3,3,3,3,2,3,3,3,3,3,2,2,3,2,3,2,2,3,2,0,0,2,0,0,3,0,2,0,3,0,0,0,0,2,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,2,2,0,3,2,2,2,3,2,2,3,2,0,2,3,2,2,2,0,0,2,1,2,3,0,0,2, + 3,3,2,3,3,3,3,3,3,3,2,2,3,2,3,2,2,2,2,2,2,3,0,0,3,1,2,2,2,0,3,2,0,0,2,0,0,2, + 2,3,2,2,0,2,0,0,2,3,1,2,2,2,0,2,0,0,2,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,2,2,2,2,2,0,2,2,0,2,2,2,2,0,2,2,2,0,0,2,0,0,0, + 3,3,2,3,0,1,3,2,2,2,1,3,0,3,3,3,0,2,2,2,2,2,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,1,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,3,1,3,3,3,3,0,0,3,3,3,2,0,3,3,0,2,0,3,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0, + 3,3,2,3,2,2,2,0,3,3,2,2,2,2,3,0,0,1,2,1,0,0,0,3,0,2,2,2,0,0,0,0,0,0,2,0,0,0, + 3,3,3,3,2,3,2,3,3,3,2,2,3,2,3,0,2,0,2,3,0,3,0,0,0,2,2,1,0,0,0,2,0,0,0,0,0,2, + 3,3,2,3,2,2,2,2,3,3,2,2,2,1,3,2,2,2,3,2,2,2,0,0,2,0,2,3,0,0,0,2,0,0,0,0,0,0, + 0,0,0,0,2,2,3,2,0,0,0,3,3,0,0,2,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,3,0,0,0,0,0,0,3,3,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,2,2,0,3,0,3,3,0,0,0,0,3,3,0,2,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,2,3,2,2,2,2,3,3,2,2,2,0,2,0,2,2,2,2,0,2,0,0,0,0,2,1,0,0,0,2,0,0,0,0,0,0, + 0,0,0,0,3,0,3,3,0,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,2,2,3,2,0,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,2,2,3,2,0,0,0,3,2,2,0,0,3,0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 2,2,2,0,2,2,2,2,0,2,3,2,2,0,0,2,2,2,2,2,2,0,0,0,0,0,2,0,0,0,0,2,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,2,2,2,2,2,0,3,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, }; -const SequenceModel Windows_1252FrenchModel = +const SequenceModel Iso_8859_15FrenchModel = { - Windows_1252_CharToOrderMap, + Iso_8859_15_CharToOrderMap, FrenchLangModel, 38, - (float)0.997057879992383, + (float)0.997006678170155, PR_TRUE, - "WINDOWS-1252", + "ISO-8859-15", "fr" }; @@ -191,19 +207,29 @@ const SequenceModel Iso_8859_1FrenchModel = Iso_8859_1_CharToOrderMap, FrenchLangModel, 38, - (float)0.997057879992383, + (float)0.997006678170155, PR_TRUE, "ISO-8859-1", "fr" }; -const SequenceModel Iso_8859_15FrenchModel = +const SequenceModel Windows_1252FrenchModel = { - Iso_8859_15_CharToOrderMap, + Windows_1252_CharToOrderMap, FrenchLangModel, 38, - (float)0.997057879992383, + (float)0.997006678170155, PR_TRUE, - "ISO-8859-15", + "WINDOWS-1252", "fr" }; + +const LanguageModel FrenchModel = +{ + "fr", + Unicode_CharOrder, + 76, + FrenchLangModel, + 38, + (float)0.997006678170155, +}; diff --git a/src/LangModels/LangGermanModel.cpp b/src/LangModels/LangGermanModel.cpp index dd4228c..164da12 100644 --- a/src/LangModels/LangGermanModel.cpp +++ b/src/LangModels/LangGermanModel.cpp @@ -36,12 +36,13 @@ * ***** END LICENSE BLOCK ***** */ #include "../nsSBCharSetProber.h" +#include "../nsLanguageDetector.h" /********* Language model for: German *********/ /** * Generated by BuildLangModel.py - * On: 2015-12-03 22:50:46.518374 + * On: 2021-03-16 01:10:34.750155 **/ /* Character Mapping Table: @@ -61,110 +62,133 @@ * ligature of 'o' and 'e' exists in ISO-8859-15 but not in ISO-8859-1 * even though they are both used for French. Same for the euro sign. */ -static const unsigned char Windows_1252_CharToOrderMap[] = -{ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ - NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 5, 15, 12, 8, 0, 17, 14, 7, 3, 23, 16, 9, 13, 2, 11, /* 4X */ - 18, 30, 1, 4, 6, 10, 21, 19, 28, 25, 20,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 5, 15, 12, 8, 0, 17, 14, 7, 3, 23, 16, 9, 13, 2, 11, /* 6X */ - 18, 30, 1, 4, 6, 10, 21, 19, 28, 25, 20,SYM,SYM,SYM,SYM,CTR, /* 7X */ - SYM,ILL,SYM, 59,SYM,SYM,SYM,SYM,SYM,SYM, 36,SYM, 54,ILL, 42,ILL, /* 8X */ - ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 36,SYM, 54,ILL, 42, 56, /* 9X */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM, 60,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ - 41, 31, 37, 44, 22, 49, 50, 35, 32, 29, 48, 43, 57, 33, 47, 52, /* CX */ - 53, 39, 51, 34, 40, 55, 26,SYM, 38, 58, 46, 61, 24, 45, 62, 27, /* DX */ - 41, 31, 37, 44, 22, 49, 50, 35, 32, 29, 48, 43, 57, 33, 47, 52, /* EX */ - 53, 39, 51, 34, 40, 55, 26,SYM, 38, 58, 46, 63, 24, 45, 64, 56, /* FX */ -}; -/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ - static const unsigned char Iso_8859_1_CharToOrderMap[] = { CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 5, 15, 12, 8, 0, 17, 14, 7, 3, 23, 16, 9, 13, 2, 11, /* 4X */ - 18, 30, 1, 4, 6, 10, 21, 19, 28, 25, 20,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 5, 15, 12, 8, 0, 17, 14, 7, 3, 23, 16, 9, 13, 2, 11, /* 6X */ - 18, 30, 1, 4, 6, 10, 21, 19, 28, 25, 20,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 5, 15, 12, 10, 0, 18, 14, 7, 2, 22, 16, 9, 13, 3, 11, /* 4X */ + 17, 30, 1, 4, 6, 8, 21, 20, 28, 24, 19,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 5, 15, 12, 10, 0, 18, 14, 7, 2, 22, 16, 9, 13, 3, 11, /* 6X */ + 17, 30, 1, 4, 6, 8, 21, 20, 28, 24, 19,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM, 65,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ - 41, 31, 37, 44, 22, 49, 50, 35, 32, 29, 48, 43, 57, 33, 47, 52, /* CX */ - 53, 39, 51, 34, 40, 55, 26,SYM, 38, 58, 46, 66, 24, 45, 67, 27, /* DX */ - 41, 31, 37, 44, 22, 49, 50, 35, 32, 29, 48, 43, 57, 33, 47, 52, /* EX */ - 53, 39, 51, 34, 40, 55, 26,SYM, 38, 58, 46, 68, 24, 45, 69, 56, /* FX */ + SYM,SYM,SYM,SYM,SYM, 59,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ + 48, 31, 46, 42, 23, 41, 45, 36, 35, 29, 51, 40, 55, 32, 52, 47, /* CX */ + 50, 37, 53, 33, 49, 58, 26,SYM, 39, 60, 38, 61, 25, 44, 54, 27, /* DX */ + 48, 31, 46, 42, 23, 41, 45, 36, 35, 29, 51, 40, 55, 32, 52, 47, /* EX */ + 50, 37, 53, 33, 49, 58, 26,SYM, 39, 62, 38, 63, 25, 44, 54, 57, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ +static const unsigned char Windows_1252_CharToOrderMap[] = +{ + CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ + CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ + NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ + SYM, 5, 15, 12, 10, 0, 18, 14, 7, 2, 22, 16, 9, 13, 3, 11, /* 4X */ + 17, 30, 1, 4, 6, 8, 21, 20, 28, 24, 19,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 5, 15, 12, 10, 0, 18, 14, 7, 2, 22, 16, 9, 13, 3, 11, /* 6X */ + 17, 30, 1, 4, 6, 8, 21, 20, 28, 24, 19,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM,ILL,SYM, 64,SYM,SYM,SYM,SYM,SYM,SYM, 34,SYM, 56,ILL, 43,ILL, /* 8X */ + ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 34,SYM, 56,ILL, 43, 57, /* 9X */ + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ + SYM,SYM,SYM,SYM,SYM, 65,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ + 48, 31, 46, 42, 23, 41, 45, 36, 35, 29, 51, 40, 55, 32, 52, 47, /* CX */ + 50, 37, 53, 33, 49, 58, 26,SYM, 39, 66, 38, 67, 25, 44, 54, 27, /* DX */ + 48, 31, 46, 42, 23, 41, 45, 36, 35, 29, 51, 40, 55, 32, 52, 47, /* EX */ + 50, 37, 53, 33, 49, 58, 26,SYM, 39, 68, 38, 69, 25, 44, 54, 57, /* FX */ +}; +/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ + +static const int Unicode_Char_size = 61; +static const unsigned int Unicode_CharOrder[] = +{ + 65, 5, 66, 15, 67, 12, 68, 10, 69, 0, 70, 18, 71, 14, 72, 7, + 73, 2, 74, 22, 75, 16, 76, 9, 77, 13, 78, 3, 79, 11, 80, 17, + 81, 30, 82, 1, 83, 4, 84, 6, 85, 8, 86, 21, 87, 20, 88, 28, + 89, 24, 90, 19, 97, 5, 98, 15, 99, 12, 100, 10, 101, 0,102, 18, + 103, 14, 104, 7, 105, 2, 106, 22, 107, 16, 108, 9, 109, 13,110, 3, + 111, 11, 112, 17, 113, 30, 114, 1, 115, 4, 116, 6, 117, 8,118, 21, + 119, 20, 120, 28, 121, 24, 122, 19, 196, 23, 201, 29, 214, 26,220, 25, + 223, 27, 228, 23, 233, 29, 246, 26, 252, 25, +}; + /* Model Table: - * Total sequences: 1188 - * First 512 sequences: 0.9934041448127945 - * Next 512 sequences (512-1024): 0.006482829516922903 - * Rest: 0.0001130256702826099 + * Total sequences: 1337 + * First 512 sequences: 0.9936565191798025 + * Next 512 sequences (512-1024): 0.00616485529057582 + * Rest: 0.00017862552962171364 * Negative sequences: TODO */ static const PRUint8 GermanLangModel[] = { - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,2,3,3,0,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,2,3,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,1,2,3,3,0,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,0,2,3,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,0,2,3,3,2,2, 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,1,2,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,0,2,2,3,3,2,3, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,0,0,3,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,0,3,0,3,3,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,3,3,3,0,0,2,2, - 3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,3,3,3,2,3,2,2,3,2,3,3,3,0,0,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,2,3,2,2,3,2,3,3,2,0,0,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,2,2,2, - 3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,3,3,3,3,3,2,2,2,2,0,3,3,3,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,0,3,0,3,3,1,2, - 3,3,2,3,2,3,3,3,2,3,3,3,3,2,2,2,3,2,2,2,2,2,2,2,1,3,2,0,1,2,3, - 3,3,2,3,3,3,3,2,3,3,3,3,3,3,2,3,2,3,3,2,2,2,3,2,3,3,3,0,0,2,2, - 3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,2,3,2,2,2,3,2,3,2,3,3,2,0,2,2,1, - 3,3,3,3,3,3,3,3,2,3,3,3,2,2,3,3,2,2,2,2,2,2,3,2,3,3,3,0,0,2,0, - 3,3,3,3,3,3,3,3,2,3,3,3,1,3,2,2,3,3,3,2,2,2,3,2,3,3,3,0,1,2,1, - 3,3,3,3,3,3,3,2,2,3,3,3,2,3,3,2,3,3,2,2,2,2,3,2,3,2,3,0,0,2,0, - 3,3,2,3,3,3,3,3,3,3,3,3,2,2,2,2,2,3,3,2,2,2,3,2,2,2,2,0,0,2,0, - 3,3,3,3,3,3,2,2,2,2,3,3,1,2,2,2,2,2,2,2,2,2,3,3,3,2,3,0,0,0,0, - 3,2,2,3,3,3,3,2,2,3,3,3,2,3,2,3,2,2,2,3,3,2,2,2,3,3,3,0,0,2,2, - 3,2,2,3,2,3,2,0,2,2,2,3,1,2,2,2,2,2,2,2,2,2,2,1,0,2,3,0,0,2,1, - 2,3,3,3,3,2,3,3,3,3,3,2,3,3,3,2,2,3,2,0,2,2,0,0,0,0,0,2,0,0,2, - 3,2,2,3,2,3,2,2,2,2,3,3,2,2,2,1,2,1,2,0,2,0,3,2,3,2,2,0,0,2,0, - 2,3,3,0,3,1,3,3,3,3,0,0,3,2,3,3,2,2,2,1,1,0,0,0,0,0,0,2,0,0,0, - 3,3,3,2,3,3,2,2,2,3,2,3,3,3,2,2,3,2,3,2,2,2,0,2,2,2,1,0,0,1,0, - 2,3,3,2,3,0,3,3,2,3,0,1,3,3,3,2,2,3,2,2,2,2,0,0,0,0,1,3,1,0,0, - 3,2,2,3,2,2,3,2,1,2,2,2,0,2,2,3,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0, - 3,1,2,3,1,3,3,2,1,2,2,2,2,0,0,2,2,2,3,2,0,2,0,0,0,2,0,0,2,2,0, - 2,3,2,0,2,2,2,2,2,2,2,2,2,2,2,3,2,2,2,1,2,2,0,2,0,0,0,0,0,0,2, - 0,1,0,2,0,2,0,0,0,0,3,2,0,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,0,1,3,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,0,0,3,3,2,2, + 3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,2,3,3,3,3,0,1,2,2, + 3,3,3,3,3,3,3,2,3,3,3,3,2,3,2,3,3,2,3,2,3,2,2,3,3,3,3,0,0,2,2, + 3,3,3,3,3,3,3,3,2,3,3,2,3,3,3,3,3,3,3,3,3,3,2,1,2,1,1,3,3,2,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,1,0,2,2, + 3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,2,3,2,2,3,3,2,2,0,0,2,1, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,0,0,3,3,1,2, + 3,3,3,2,2,3,3,3,3,3,2,3,3,2,2,2,3,2,2,3,2,2,2,2,3,1,1,0,0,2,2, + 3,3,3,2,3,3,3,2,3,3,3,3,3,3,2,3,3,3,3,2,2,2,2,3,3,3,3,0,1,2,0, + 3,3,3,3,3,3,3,3,3,3,2,3,2,2,3,3,3,2,2,3,3,2,2,2,3,3,2,0,1,2,1, + 3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,2,2,2,2,2,2,3,3,3,3,0,0,2,0, + 3,3,3,3,3,3,3,3,3,3,2,3,2,3,2,2,3,3,2,3,3,2,2,3,3,3,3,0,2,2,0, + 3,3,3,2,3,3,3,3,3,3,3,3,2,2,2,2,2,3,3,2,2,2,2,3,2,2,2,1,0,2,1, + 3,3,3,3,3,3,3,2,3,3,2,3,2,3,3,3,2,2,3,2,2,2,2,3,2,3,2,0,0,2,1, + 3,2,3,2,3,3,3,2,3,3,2,3,2,3,2,3,2,2,2,3,3,2,2,2,3,2,3,0,0,2,2, + 3,3,3,3,3,3,2,2,3,3,2,3,2,2,2,2,2,2,2,0,2,0,3,3,2,3,2,0,0,1,0, + 3,2,3,2,2,3,2,2,2,2,2,3,2,1,2,1,2,2,2,1,2,2,0,2,2,0,3,0,0,2,0, + 3,2,3,2,2,3,2,1,3,2,2,3,2,2,1,1,2,0,1,2,0,1,2,3,2,3,2,0,0,2,0, + 2,3,3,3,3,1,3,3,3,3,3,3,3,3,3,2,2,2,3,2,0,2,0,1,1,0,0,2,0,0,2, + 3,3,3,3,3,3,3,2,2,3,3,3,3,3,2,2,3,3,2,2,2,2,2,0,2,2,2,1,1,1,0, + 2,3,1,3,3,1,3,3,0,3,3,0,3,2,3,3,2,2,2,1,1,0,0,0,0,1,0,2,1,0,0, + 2,3,2,3,3,0,3,3,0,3,2,1,2,3,2,2,2,2,3,2,2,2,0,0,1,0,1,3,1,0,0, + 3,2,3,2,2,2,3,2,2,2,2,2,0,2,2,3,2,1,2,1,2,2,0,0,0,0,0,0,0,0,0, + 3,1,3,1,2,3,3,2,2,2,1,3,2,1,1,2,2,3,2,1,2,2,0,0,2,0,0,0,2,0,1, + 2,2,1,2,2,2,2,1,0,2,2,2,2,2,2,2,2,2,1,2,0,2,2,0,0,0,0,0,0,0,2, + 1,0,2,0,0,2,1,0,3,1,0,1,0,1,1,0,0,1,0,0,1,2,0,0,0,0,0,0,0,0,0, }; -const SequenceModel Windows_1252GermanModel = -{ - Windows_1252_CharToOrderMap, - GermanLangModel, - 31, - (float)0.9934041448127945, - PR_TRUE, - "WINDOWS-1252", - "de" -}; - const SequenceModel Iso_8859_1GermanModel = { Iso_8859_1_CharToOrderMap, GermanLangModel, 31, - (float)0.9934041448127945, + (float)0.9936565191798025, PR_TRUE, "ISO-8859-1", "de" }; + +const SequenceModel Windows_1252GermanModel = +{ + Windows_1252_CharToOrderMap, + GermanLangModel, + 31, + (float)0.9936565191798025, + PR_TRUE, + "WINDOWS-1252", + "de" +}; + +const LanguageModel GermanModel = +{ + "de", + Unicode_CharOrder, + 61, + GermanLangModel, + 31, + (float)0.9936565191798025, +}; diff --git a/src/LangModels/LangItalianModel.cpp b/src/LangModels/LangItalianModel.cpp index 4bb5dc5..c9fd4f7 100644 --- a/src/LangModels/LangItalianModel.cpp +++ b/src/LangModels/LangItalianModel.cpp @@ -36,12 +36,13 @@ * ***** END LICENSE BLOCK ***** */ #include "../nsSBCharSetProber.h" +#include "../nsLanguageDetector.h" /********* Language model for: Italian *********/ /** * Generated by BuildLangModel.py - * On: 2016-09-21 18:46:08.841217 + * On: 2021-03-16 01:31:12.602629 **/ /* Character Mapping Table: @@ -61,45 +62,45 @@ * ligature of 'o' and 'e' exists in ISO-8859-15 but not in ISO-8859-1 * even though they are both used for French. Same for the euro sign. */ +static const unsigned char Iso_8859_1_CharToOrderMap[] = +{ + CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ + CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ + NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ + SYM, 2, 17, 9, 10, 1, 16, 14, 19, 0, 31, 26, 5, 13, 4, 3, /* 4X */ + 12, 20, 7, 8, 6, 11, 15, 28, 25, 27, 18,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 2, 17, 9, 10, 1, 16, 14, 19, 0, 31, 26, 5, 13, 4, 3, /* 6X */ + 12, 20, 7, 8, 6, 11, 15, 28, 25, 27, 18,SYM,SYM,SYM,SYM,CTR, /* 7X */ + CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ + CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ + SYM,SYM,SYM,SYM,SYM, 54,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ + 21, 37, 46, 38, 35, 55, 39, 41, 22, 30, 40, 45, 29, 36, 48, 56, /* CX */ + 57, 42, 23, 33, 49, 58, 32,SYM, 52, 24, 43, 59, 34, 60, 61, 44, /* DX */ + 21, 37, 46, 38, 35, 62, 39, 41, 22, 30, 40, 45, 29, 36, 48, 63, /* EX */ + 64, 42, 23, 33, 49, 65, 32,SYM, 52, 24, 43, 66, 34, 67, 68, 69, /* FX */ +}; +/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ + static const unsigned char Iso_8859_3_CharToOrderMap[] = { CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 1, 16, 9, 10, 2, 17, 14, 19, 0, 27, 21, 5, 12, 4, 3, /* 4X */ - 13, 20, 6, 8, 7, 11, 15, 25, 26, 23, 18,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 1, 16, 9, 10, 2, 17, 14, 19, 0, 27, 21, 5, 12, 4, 3, /* 6X */ - 13, 20, 6, 8, 7, 11, 15, 25, 26, 23, 18,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 2, 17, 9, 10, 1, 16, 14, 19, 0, 31, 26, 5, 13, 4, 3, /* 4X */ + 12, 20, 7, 8, 6, 11, 15, 28, 25, 27, 18,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 2, 17, 9, 10, 1, 16, 14, 19, 0, 31, 26, 5, 13, 4, 3, /* 6X */ + 12, 20, 7, 8, 6, 11, 15, 28, 25, 27, 18,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM, 59,SYM,SYM,SYM,ILL, 60,SYM,SYM, 61, 48, 47, 62,SYM,ILL, 58, /* AX */ - SYM, 63,SYM,SYM,SYM,SYM, 64,SYM,SYM, 46, 48, 47, 65,SYM,ILL, 58, /* BX */ - 22, 32, 50,ILL, 39, 66, 67, 38, 24, 30, 55, 40, 31, 37, 42, 49, /* CX */ - ILL, 44, 29, 33, 51, 68, 34,SYM, 69, 28, 45, 70, 36, 71, 72, 73, /* DX */ - 22, 32, 50,ILL, 39, 74, 75, 38, 24, 30, 55, 40, 31, 37, 42, 49, /* EX */ - ILL, 44, 29, 33, 51, 76, 34,SYM, 77, 28, 45, 78, 36, 79, 80,SYM, /* FX */ -}; -/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ - -static const unsigned char Iso_8859_15_CharToOrderMap[] = -{ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ - NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 1, 16, 9, 10, 2, 17, 14, 19, 0, 27, 21, 5, 12, 4, 3, /* 4X */ - 13, 20, 6, 8, 7, 11, 15, 25, 26, 23, 18,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 1, 16, 9, 10, 2, 17, 14, 19, 0, 27, 21, 5, 12, 4, 3, /* 6X */ - 13, 20, 6, 8, 7, 11, 15, 25, 26, 23, 18,SYM,SYM,SYM,SYM,CTR, /* 7X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM,SYM,SYM,SYM,SYM,SYM, 35,SYM, 35,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM, 41, 81,SYM,SYM, 41,SYM,SYM,SYM, 52, 52, 82,SYM, /* BX */ - 22, 32, 50, 43, 39, 53, 54, 38, 24, 30, 55, 40, 31, 37, 42, 49, /* CX */ - 56, 44, 29, 33, 51, 83, 34,SYM, 57, 28, 45, 84, 36, 85, 86, 87, /* DX */ - 22, 32, 50, 43, 39, 53, 54, 38, 24, 30, 55, 40, 31, 37, 42, 49, /* EX */ - 56, 44, 29, 33, 51, 88, 34,SYM, 57, 28, 45, 89, 36, 90, 91, 92, /* FX */ + SYM, 70,SYM,SYM,SYM,ILL, 71,SYM,SYM, 72, 73, 53, 74,SYM,ILL, 50, /* AX */ + SYM, 75,SYM,SYM,SYM,SYM, 76,SYM,SYM, 77, 78, 53, 79,SYM,ILL, 50, /* BX */ + 21, 37, 46,ILL, 35, 80, 81, 41, 22, 30, 40, 45, 29, 36, 48, 82, /* CX */ + ILL, 42, 23, 33, 49, 83, 32,SYM, 84, 24, 43, 85, 34, 86, 87, 44, /* DX */ + 21, 37, 46,ILL, 35, 88, 89, 41, 22, 30, 40, 45, 29, 36, 48, 90, /* EX */ + ILL, 42, 23, 33, 49, 91, 32,SYM, 92, 24, 43, 93, 34, 94, 95,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -109,39 +110,39 @@ static const unsigned char Iso_8859_9_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 1, 16, 9, 10, 2, 17, 14, 19, 0, 27, 21, 5, 12, 4, 3, /* 4X */ - 13, 20, 6, 8, 7, 11, 15, 25, 26, 23, 18,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 1, 16, 9, 10, 2, 17, 14, 19, 0, 27, 21, 5, 12, 4, 3, /* 6X */ - 13, 20, 6, 8, 7, 11, 15, 25, 26, 23, 18,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 2, 17, 9, 10, 1, 16, 14, 19, 0, 31, 26, 5, 13, 4, 3, /* 4X */ + 12, 20, 7, 8, 6, 11, 15, 28, 25, 27, 18,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 2, 17, 9, 10, 1, 16, 14, 19, 0, 31, 26, 5, 13, 4, 3, /* 6X */ + 12, 20, 7, 8, 6, 11, 15, 28, 25, 27, 18,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM, 93,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ - 22, 32, 50, 43, 39, 53, 54, 38, 24, 30, 55, 40, 31, 37, 42, 49, /* CX */ - 47, 44, 29, 33, 51, 94, 34,SYM, 57, 28, 45, 95, 36, 96, 48, 97, /* DX */ - 22, 32, 50, 43, 39, 53, 54, 38, 24, 30, 55, 40, 31, 37, 42, 49, /* EX */ - 47, 44, 29, 33, 51, 98, 34,SYM, 57, 28, 45, 99, 36, 46, 48,100, /* FX */ + SYM,SYM,SYM,SYM,SYM, 96,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ + 21, 37, 46, 38, 35, 97, 39, 41, 22, 30, 40, 45, 29, 36, 48, 98, /* CX */ + 53, 42, 23, 33, 49, 99, 32,SYM, 52, 24, 43,100, 34,101,102, 44, /* DX */ + 21, 37, 46, 38, 35,103, 39, 41, 22, 30, 40, 45, 29, 36, 48,104, /* EX */ + 53, 42, 23, 33, 49,105, 32,SYM, 52, 24, 43,106, 34,107,108,109, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ -static const unsigned char Iso_8859_1_CharToOrderMap[] = +static const unsigned char Iso_8859_15_CharToOrderMap[] = { CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 1, 16, 9, 10, 2, 17, 14, 19, 0, 27, 21, 5, 12, 4, 3, /* 4X */ - 13, 20, 6, 8, 7, 11, 15, 25, 26, 23, 18,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 1, 16, 9, 10, 2, 17, 14, 19, 0, 27, 21, 5, 12, 4, 3, /* 6X */ - 13, 20, 6, 8, 7, 11, 15, 25, 26, 23, 18,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 2, 17, 9, 10, 1, 16, 14, 19, 0, 31, 26, 5, 13, 4, 3, /* 4X */ + 12, 20, 7, 8, 6, 11, 15, 28, 25, 27, 18,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 2, 17, 9, 10, 1, 16, 14, 19, 0, 31, 26, 5, 13, 4, 3, /* 6X */ + 12, 20, 7, 8, 6, 11, 15, 28, 25, 27, 18,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM,101,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ - 22, 32, 50, 43, 39, 53, 54, 38, 24, 30, 55, 40, 31, 37, 42, 49, /* CX */ - 56, 44, 29, 33, 51,102, 34,SYM, 57, 28, 45,103, 36,104,105,106, /* DX */ - 22, 32, 50, 43, 39, 53, 54, 38, 24, 30, 55, 40, 31, 37, 42, 49, /* EX */ - 56, 44, 29, 33, 51,107, 34,SYM, 57, 28, 45,108, 36,109,110,111, /* FX */ + SYM,SYM,SYM,SYM,SYM,SYM, 51,SYM, 51,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ + SYM,SYM,SYM,SYM,110,111,SYM,SYM,112,SYM,SYM,SYM, 47, 47,113,SYM, /* BX */ + 21, 37, 46, 38, 35,114, 39, 41, 22, 30, 40, 45, 29, 36, 48,115, /* CX */ + 116, 42, 23, 33, 49,117, 32,SYM, 52, 24, 43,118, 34,119,120, 44, /* DX */ + 21, 37, 46, 38, 35,121, 39, 41, 22, 30, 40, 45, 29, 36, 48,122, /* EX */ + 123, 42, 23, 33, 49,124, 32,SYM, 52, 24, 43,125, 34,126,127,128, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -151,109 +152,123 @@ static const unsigned char Windows_1252_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 1, 16, 9, 10, 2, 17, 14, 19, 0, 27, 21, 5, 12, 4, 3, /* 4X */ - 13, 20, 6, 8, 7, 11, 15, 25, 26, 23, 18,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 1, 16, 9, 10, 2, 17, 14, 19, 0, 27, 21, 5, 12, 4, 3, /* 6X */ - 13, 20, 6, 8, 7, 11, 15, 25, 26, 23, 18,SYM,SYM,SYM,SYM,CTR, /* 7X */ - SYM,ILL,SYM,112,SYM,SYM,SYM,SYM,SYM,SYM, 35,SYM, 52,ILL, 41,ILL, /* 8X */ - ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 35,SYM, 52,ILL, 41,113, /* 9X */ + SYM, 2, 17, 9, 10, 1, 16, 14, 19, 0, 31, 26, 5, 13, 4, 3, /* 4X */ + 12, 20, 7, 8, 6, 11, 15, 28, 25, 27, 18,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 2, 17, 9, 10, 1, 16, 14, 19, 0, 31, 26, 5, 13, 4, 3, /* 6X */ + 12, 20, 7, 8, 6, 11, 15, 28, 25, 27, 18,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM,ILL,SYM,129,SYM,SYM,SYM,SYM,SYM,SYM, 51,SYM, 47,ILL,130,ILL, /* 8X */ + ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 51,SYM, 47,ILL,131,132, /* 9X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM,114,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ - 22, 32, 50, 43, 39, 53, 54, 38, 24, 30, 55, 40, 31, 37, 42, 49, /* CX */ - 56, 44, 29, 33, 51,115, 34,SYM, 57, 28, 45,116, 36,117,118,119, /* DX */ - 22, 32, 50, 43, 39, 53, 54, 38, 24, 30, 55, 40, 31, 37, 42, 49, /* EX */ - 56, 44, 29, 33, 51,120, 34,SYM, 57, 28, 45,121, 36,122,123,124, /* FX */ + SYM,SYM,SYM,SYM,SYM,133,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ + 21, 37, 46, 38, 35,134, 39, 41, 22, 30, 40, 45, 29, 36, 48,135, /* CX */ + 136, 42, 23, 33, 49,137, 32,SYM, 52, 24, 43,138, 34,139,140, 44, /* DX */ + 21, 37, 46, 38, 35,141, 39, 41, 22, 30, 40, 45, 29, 36, 48,142, /* EX */ + 143, 42, 23, 33, 49,144, 32,SYM, 52, 24, 43,145, 34,146,147,148, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ +static const int Unicode_Char_size = 68; +static const unsigned int Unicode_CharOrder[] = +{ + 65, 2, 66, 17, 67, 9, 68, 10, 69, 1, 70, 16, 71, 14, 72, 19, + 73, 0, 74, 31, 75, 26, 76, 5, 77, 13, 78, 4, 79, 3, 80, 12, + 81, 20, 82, 7, 83, 8, 84, 6, 85, 11, 86, 15, 87, 28, 88, 25, + 89, 27, 90, 18, 97, 2, 98, 17, 99, 9, 100, 10, 101, 1,102, 16, + 103, 14, 104, 19, 105, 0, 106, 31, 107, 26, 108, 5, 109, 13,110, 4, + 111, 3, 112, 12, 113, 20, 114, 7, 115, 8, 116, 6, 117, 11,118, 15, + 119, 28, 120, 25, 121, 27, 122, 18, 192, 21, 200, 22, 201, 30,204, 29, + 210, 23, 211, 33, 214, 32, 217, 24, 224, 21, 232, 22, 233, 30,236, 29, + 242, 23, 243, 33, 246, 32, 249, 24, +}; + /* Model Table: - * Total sequences: 872 - * First 512 sequences: 0.9989484485502651 - * Next 512 sequences (512-1024): 0.0010515514497349433 - * Rest: -4.336808689942018e-17 + * Total sequences: 921 + * First 512 sequences: 0.9992462827093448 + * Next 512 sequences (512-1024): 0.0007537172906552294 + * Rest: -2.0166160408230382e-17 * Negative sequences: TODO */ static const PRUint8 ItalianLangModel[] = { - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,2,0,0,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,0,3,3,3,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,0,3,3,3,0,2,0,0,0,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,3,2,3,2,3,0,3,3,2,2,0, - 3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,2,2,3,2,0,3,3,3,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,2, - 3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,2,3,3,0,2,3,3,2,3,2,2,3,3,3,3,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,2,3,0,0,3,2,3,3,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,2,3,3,3,3,3,0,3,0,0,3,2,0,3,2,2,0,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,0,2,3,3,2,3,2,3,2,2,3,3,2,2, - 3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,0,3,0,3,2,3,3,3,0,3,2,3,0,0, - 3,3,3,3,3,3,2,3,3,3,3,3,3,3,2,3,3,0,0,2,0,0,0,3,0,2,3,0,0,3,2,2,2,2, - 3,3,3,3,2,3,3,3,3,3,3,3,3,3,2,2,2,2,2,3,0,3,2,3,0,2,0,2,0,3,2,0,2,2, - 3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,2,3,2,0,3,2,2,0,3,0,2,2,2,0,2,2,0,0,2, - 3,3,3,3,2,3,3,0,2,2,2,3,2,2,2,3,2,0,0,2,0,2,2,3,2,0,0,0,0,2,2,2,2,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,3,0,2,3,0,2,0,3,0,3,0,2,2,2,2,3,2,0, - 3,3,3,3,0,3,3,3,2,3,0,3,2,2,3,2,2,3,0,2,0,2,0,0,2,2,2,2,2,0,2,0,0,0, - 3,3,3,3,3,2,2,2,2,0,2,3,0,2,3,0,3,2,3,3,0,3,0,3,0,2,0,2,0,3,2,0,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,0,2,0,2,0,3,0,3,0,3,0,2,0,0,3,0,3,0, - 2,3,0,2,0,0,2,0,2,0,0,3,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,2,2,3,3,2,2,2,2,2,0,3,0,3,0,3,0,2,2,2,0,0,0,0,2,2, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,2,0,0,0,2,0,2,0,2,2,2,0,0,0,0,0,0, - 2,0,0,0,2,0,3,0,2,0,0,0,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,2,3,2,0,2,0,2,0,0,2,2,0,3,0,0,0,2,0,3,0,0,0,0,0,0,0,0, - 3,3,3,3,0,3,0,3,2,3,0,2,0,3,0,3,0,0,0,0,0,2,0,2,0,2,3,0,0,0,0,0,0,0, - 3,3,3,3,2,2,2,2,0,2,2,3,2,0,0,0,0,0,0,2,0,3,0,2,0,2,0,2,0,0,0,0,0,2, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,2,3,2,2,3,3,2,3,2,3,0,2,2,0,2,3,0,2,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0, - 0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 2,0,0,0,3,2,2,0,2,2,0,0,0,0,2,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 2,0,0,0,3,2,2,0,0,2,2,0,0,0,0,2,2,0,2,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,3,3,3,2,2,0,2,3,0,3, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,0,3,3,3,3,2,0,2,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,0,0,3,3,3,3,0,0,3,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,0,0,3,3,3,3,0,0,2,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,2,0,3,3,3,3,3,2,0,2, + 3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,2,2,3,3,2,3,3,2,2,3, + 3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,2,3,3,3,2,3,2,3,3,0,2,3,3,3,3,0,2,3, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,2,3,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,0,3,3,3,3,3,2,2,2, + 3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,3,2,2,3,3,2,0,3,2,0,3,3,2,3,2,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,3,0,3,2,3,2,2,2,3,3,3,2,2,2,2, + 3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,2,2,2,2,3,0,3,3,3,0,3,2,3,0,0, + 3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,3,0,3,3,0,0,0,3,2,2,0,3,2,2,2,0,3,2, + 3,3,3,3,3,3,3,2,3,3,2,3,3,3,2,2,3,3,0,2,3,2,2,3,0,2,2,3,3,0,3,2,2,0, + 3,3,3,3,3,3,3,3,3,2,2,3,2,3,3,2,2,3,0,3,0,0,0,3,2,2,2,2,2,2,2,0,3,2, + 3,3,3,3,2,3,2,3,2,2,0,3,3,0,2,3,0,2,2,0,0,0,0,3,0,0,0,2,0,2,2,0,2,0, + 3,3,3,3,3,3,3,3,3,3,0,3,3,2,3,0,3,2,0,2,0,2,2,2,0,0,2,0,0,0,2,0,2,0, + 3,3,3,3,3,3,2,3,3,2,2,3,0,2,0,0,0,3,3,2,0,2,2,2,2,0,2,3,2,3,2,0,2,0, + 3,3,3,3,2,2,2,0,2,2,2,3,2,2,2,0,0,3,3,2,2,0,0,3,0,0,2,2,2,0,0,2,2,3, + 3,3,3,3,3,3,3,3,3,0,0,3,2,3,0,0,2,2,2,0,0,2,0,0,0,0,2,3,3,2,3,2,3,0, + 2,0,2,0,0,0,0,2,0,0,0,3,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,2,2,3,2,0,3,2,0,0,0,0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,2,2,0,3,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,2,0,2,2,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0, + 3,3,3,2,0,2,3,0,0,2,0,2,3,0,0,3,2,0,0,2,0,0,0,0,0,3,2,2,0,0,0,0,2,0, + 3,3,3,3,2,3,3,3,3,0,2,3,2,3,2,2,0,2,0,3,0,0,0,0,0,0,2,3,2,0,2,0,2,0, + 2,3,3,3,3,3,2,3,3,3,3,2,2,3,0,0,3,2,0,2,0,0,0,0,0,2,2,0,2,0,0,0,2,0, + 3,3,3,3,2,2,2,3,3,0,0,2,2,0,2,0,2,2,0,2,0,0,0,0,0,0,2,2,3,0,0,0,0,0, + 0,0,2,0,0,0,2,0,2,2,0,0,2,0,2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 2,0,0,2,2,3,2,3,2,2,3,2,0,2,2,2,2,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,0,0,0,2,0,2,0,3,2,2,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0, + 2,0,0,0,3,3,2,3,3,2,2,0,2,0,2,3,2,0,0,3,0,0,0,0,0,0,2,0,2,0,0,0,0,0, + 2,0,2,0,3,0,0,2,2,2,0,0,2,2,0,2,0,2,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0, }; +const SequenceModel Iso_8859_1ItalianModel = +{ + Iso_8859_1_CharToOrderMap, + ItalianLangModel, + 34, + (float)0.9992462827093448, + PR_TRUE, + "ISO-8859-1", + "it" +}; + const SequenceModel Iso_8859_3ItalianModel = { Iso_8859_3_CharToOrderMap, ItalianLangModel, 34, - (float)0.9989484485502651, + (float)0.9992462827093448, PR_TRUE, "ISO-8859-3", "it" }; -const SequenceModel Iso_8859_15ItalianModel = -{ - Iso_8859_15_CharToOrderMap, - ItalianLangModel, - 34, - (float)0.9989484485502651, - PR_TRUE, - "ISO-8859-15", - "it" -}; - const SequenceModel Iso_8859_9ItalianModel = { Iso_8859_9_CharToOrderMap, ItalianLangModel, 34, - (float)0.9989484485502651, + (float)0.9992462827093448, PR_TRUE, "ISO-8859-9", "it" }; -const SequenceModel Iso_8859_1ItalianModel = +const SequenceModel Iso_8859_15ItalianModel = { - Iso_8859_1_CharToOrderMap, + Iso_8859_15_CharToOrderMap, ItalianLangModel, 34, - (float)0.9989484485502651, + (float)0.9992462827093448, PR_TRUE, - "ISO-8859-1", + "ISO-8859-15", "it" }; @@ -262,8 +277,18 @@ const SequenceModel Windows_1252ItalianModel = Windows_1252_CharToOrderMap, ItalianLangModel, 34, - (float)0.9989484485502651, + (float)0.9992462827093448, PR_TRUE, "WINDOWS-1252", "it" }; + +const LanguageModel ItalianModel = +{ + "it", + Unicode_CharOrder, + 68, + ItalianLangModel, + 34, + (float)0.9992462827093448, +}; diff --git a/src/LangModels/LangSpanishModel.cpp b/src/LangModels/LangSpanishModel.cpp index 6c3f3a9..7e41230 100644 --- a/src/LangModels/LangSpanishModel.cpp +++ b/src/LangModels/LangSpanishModel.cpp @@ -36,12 +36,13 @@ * ***** END LICENSE BLOCK ***** */ #include "../nsSBCharSetProber.h" +#include "../nsLanguageDetector.h" /********* Language model for: Spanish *********/ /** * Generated by BuildLangModel.py - * On: 2015-12-12 18:39:02.290370 + * On: 2021-03-16 11:33:00.157304 **/ /* Character Mapping Table: @@ -61,45 +62,45 @@ * ligature of 'o' and 'e' exists in ISO-8859-15 but not in ISO-8859-1 * even though they are both used for French. Same for the euro sign. */ -static const unsigned char Iso_8859_1_CharToOrderMap[] = -{ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ - NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 1, 14, 10, 8, 0, 16, 15, 20, 5, 23, 27, 7, 12, 3, 2, /* 4X */ - 13, 21, 6, 4, 9, 11, 18, 31, 28, 17, 24,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 1, 14, 10, 8, 0, 16, 15, 20, 5, 23, 27, 7, 12, 3, 2, /* 6X */ - 13, 21, 6, 4, 9, 11, 18, 31, 28, 17, 24,SYM,SYM,SYM,SYM,CTR, /* 7X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ - CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM, 52,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ - 33, 25, 39, 46, 37, 45, 47, 35, 36, 26, 48, 40, 53, 22, 41, 43, /* CX */ - 49, 29, 38, 19, 50, 54, 34,SYM, 44, 51, 30, 55, 32, 42, 56, 57, /* DX */ - 33, 25, 39, 46, 37, 45, 47, 35, 36, 26, 48, 40, 58, 22, 41, 43, /* EX */ - 49, 29, 38, 19, 50, 59, 34,SYM, 44, 51, 30, 60, 32, 42, 61, 62, /* FX */ -}; -/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ - static const unsigned char Iso_8859_15_CharToOrderMap[] = { CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 1, 14, 10, 8, 0, 16, 15, 20, 5, 23, 27, 7, 12, 3, 2, /* 4X */ - 13, 21, 6, 4, 9, 11, 18, 31, 28, 17, 24,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 1, 14, 10, 8, 0, 16, 15, 20, 5, 23, 27, 7, 12, 3, 2, /* 6X */ - 13, 21, 6, 4, 9, 11, 18, 31, 28, 17, 24,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 1, 14, 10, 8, 0, 17, 15, 20, 4, 24, 29, 7, 12, 3, 2, /* 4X */ + 13, 22, 6, 5, 9, 11, 16, 31, 28, 18, 23,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 1, 14, 10, 8, 0, 17, 15, 20, 4, 24, 29, 7, 12, 3, 2, /* 6X */ + 13, 22, 6, 5, 9, 11, 16, 31, 28, 18, 23,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM,SYM,SYM,SYM,SYM,SYM, 63,SYM, 64,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM, 65, 66,SYM,SYM, 67,SYM,SYM,SYM, 68, 69, 70,SYM, /* BX */ - 33, 25, 39, 46, 37, 45, 47, 35, 36, 26, 48, 40, 71, 22, 41, 43, /* CX */ - 49, 29, 38, 19, 50, 72, 34,SYM, 44, 51, 30, 73, 32, 42, 74, 75, /* DX */ - 33, 25, 39, 46, 37, 45, 47, 35, 36, 26, 48, 40, 76, 22, 41, 43, /* EX */ - 49, 29, 38, 19, 50, 77, 34,SYM, 44, 51, 30, 78, 32, 42, 79, 80, /* FX */ + SYM,SYM,SYM,SYM,SYM,SYM, 38,SYM, 38,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ + SYM,SYM,SYM,SYM, 51, 54,SYM,SYM, 51,SYM,SYM,SYM, 46, 46, 55,SYM, /* BX */ + 36, 25, 43, 45, 37, 56, 53, 33, 35, 26, 48, 41, 39, 21, 47, 52, /* CX */ + 49, 27, 44, 19, 42, 57, 34,SYM, 58, 59, 30, 60, 32, 40, 50, 61, /* DX */ + 36, 25, 43, 45, 37, 62, 53, 33, 35, 26, 48, 41, 39, 21, 47, 52, /* EX */ + 49, 27, 44, 19, 42, 63, 34,SYM, 64, 65, 30, 66, 32, 40, 50, 67, /* FX */ +}; +/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ + +static const unsigned char Iso_8859_1_CharToOrderMap[] = +{ + CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ + CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ + NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ + SYM, 1, 14, 10, 8, 0, 17, 15, 20, 4, 24, 29, 7, 12, 3, 2, /* 4X */ + 13, 22, 6, 5, 9, 11, 16, 31, 28, 18, 23,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 1, 14, 10, 8, 0, 17, 15, 20, 4, 24, 29, 7, 12, 3, 2, /* 6X */ + 13, 22, 6, 5, 9, 11, 16, 31, 28, 18, 23,SYM,SYM,SYM,SYM,CTR, /* 7X */ + CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ + CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ + SYM,SYM,SYM,SYM,SYM, 68,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ + 36, 25, 43, 45, 37, 69, 53, 33, 35, 26, 48, 41, 39, 21, 47, 52, /* CX */ + 49, 27, 44, 19, 42, 70, 34,SYM, 71, 72, 30, 73, 32, 40, 50, 74, /* DX */ + 36, 25, 43, 45, 37, 75, 53, 33, 35, 26, 48, 41, 39, 21, 47, 52, /* EX */ + 49, 27, 44, 19, 42, 76, 34,SYM, 77, 78, 30, 79, 32, 40, 50, 80, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -109,96 +110,120 @@ static const unsigned char Windows_1252_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 1, 14, 10, 8, 0, 16, 15, 20, 5, 23, 27, 7, 12, 3, 2, /* 4X */ - 13, 21, 6, 4, 9, 11, 18, 31, 28, 17, 24,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 1, 14, 10, 8, 0, 16, 15, 20, 5, 23, 27, 7, 12, 3, 2, /* 6X */ - 13, 21, 6, 4, 9, 11, 18, 31, 28, 17, 24,SYM,SYM,SYM,SYM,CTR, /* 7X */ - SYM,ILL,SYM, 81,SYM,SYM,SYM,SYM,SYM,SYM, 82,SYM, 83,ILL, 84,ILL, /* 8X */ - ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 85,SYM, 86,ILL, 87, 88, /* 9X */ + SYM, 1, 14, 10, 8, 0, 17, 15, 20, 4, 24, 29, 7, 12, 3, 2, /* 4X */ + 13, 22, 6, 5, 9, 11, 16, 31, 28, 18, 23,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 1, 14, 10, 8, 0, 17, 15, 20, 4, 24, 29, 7, 12, 3, 2, /* 6X */ + 13, 22, 6, 5, 9, 11, 16, 31, 28, 18, 23,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM,ILL,SYM, 81,SYM,SYM,SYM,SYM,SYM,SYM, 38,SYM, 46,ILL, 51,ILL, /* 8X */ + ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 38,SYM, 46,ILL, 51, 82, /* 9X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM, 89,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ - 33, 25, 39, 46, 37, 45, 47, 35, 36, 26, 48, 40, 90, 22, 41, 43, /* CX */ - 49, 29, 38, 19, 50, 91, 34,SYM, 44, 51, 30, 92, 32, 42, 93, 94, /* DX */ - 33, 25, 39, 46, 37, 45, 47, 35, 36, 26, 48, 40, 95, 22, 41, 43, /* EX */ - 49, 29, 38, 19, 50, 96, 34,SYM, 44, 51, 30, 97, 32, 42, 98, 99, /* FX */ + SYM,SYM,SYM,SYM,SYM, 83,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ + 36, 25, 43, 45, 37, 84, 53, 33, 35, 26, 48, 41, 39, 21, 47, 52, /* CX */ + 49, 27, 44, 19, 42, 85, 34,SYM, 86, 87, 30, 88, 32, 40, 50, 89, /* DX */ + 36, 25, 43, 45, 37, 90, 53, 33, 35, 26, 48, 41, 39, 21, 47, 52, /* EX */ + 49, 27, 44, 19, 42, 91, 34,SYM, 92, 93, 30, 94, 32, 40, 50, 95, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ +static const int Unicode_Char_size = 66; +static const unsigned int Unicode_CharOrder[] = +{ + 65, 1, 66, 14, 67, 10, 68, 8, 69, 0, 70, 17, 71, 15, 72, 20, + 73, 4, 74, 24, 75, 29, 76, 7, 77, 12, 78, 3, 79, 2, 80, 13, + 81, 22, 82, 6, 83, 5, 84, 9, 85, 11, 86, 16, 87, 31, 88, 28, + 89, 18, 90, 23, 97, 1, 98, 14, 99, 10, 100, 8, 101, 0,102, 17, + 103, 15, 104, 20, 105, 4, 106, 24, 107, 29, 108, 7, 109, 12,110, 3, + 111, 2, 112, 13, 113, 22, 114, 6, 115, 5, 116, 9, 117, 11,118, 16, + 119, 31, 120, 28, 121, 18, 122, 23, 193, 25, 201, 26, 205, 21,209, 27, + 211, 19, 218, 30, 220, 32, 225, 25, 233, 26, 237, 21, 241, 27,243, 19, + 250, 30, 252, 32, +}; + /* Model Table: - * Total sequences: 897 - * First 512 sequences: 0.9970385677528184 - * Next 512 sequences (512-1024): 0.0029614322471815486 - * Rest: 4.597017211338539e-17 + * Total sequences: 1002 + * First 512 sequences: 0.9966074680689881 + * Next 512 sequences (512-1024): 0.003392531931011823 + * Rest: 3.209238430557093e-17 * Negative sequences: TODO */ static const PRUint8 SpanishLangModel[] = { - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,3,3,3,2,3,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,2,3,3,3,3,3,3,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,2,3,3,2,2,3,3,2,2,3,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,2,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,0,2,3,3,3,0,0,2,2,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,3,3,3,2,0,3,2,2, - 3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,2,3,3,2,2,0,2,2,0, - 3,3,3,2,3,3,3,3,2,2,2,3,3,2,2,3,2,3,3,3,3,2,3,2,2,3,3,2,0,0,2,2,2, - 3,3,3,3,3,3,3,3,2,3,3,3,2,2,3,2,2,3,2,3,3,0,3,2,2,3,3,0,0,0,2,2,2, - 3,3,3,3,3,3,3,3,2,3,3,3,2,2,2,2,2,3,0,3,3,2,3,0,2,3,3,3,0,0,2,0,0, - 3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,2,3,0,2,0, - 3,3,3,3,3,3,2,2,2,2,2,3,3,3,3,2,2,3,0,3,2,0,3,2,0,3,3,2,2,0,3,2,2, - 3,3,3,2,3,3,3,3,2,3,3,3,2,3,3,0,2,2,2,3,3,0,3,2,0,3,3,2,0,0,3,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,2,2,3,2,3,2,2,3,3,0,3,2,2,0,0,2,2,0, - 3,3,3,3,3,3,3,3,3,3,0,3,3,0,2,2,2,2,2,3,3,0,3,2,2,2,3,2,0,0,3,2,3, - 3,3,3,2,2,3,3,3,2,3,2,3,2,2,2,2,3,2,0,3,0,0,3,2,0,2,2,2,0,0,3,2,0, - 3,3,3,3,3,3,3,3,2,2,2,3,2,2,2,2,2,2,0,3,2,0,0,2,2,2,2,2,0,0,2,2,0, - 3,3,3,2,2,3,2,2,2,0,2,3,0,2,0,2,2,2,2,3,0,0,3,0,0,2,3,2,0,0,0,0,0, - 0,0,0,3,3,0,3,3,3,3,3,0,3,3,2,3,2,0,3,0,0,0,0,0,0,0,0,0,2,0,0,0,0, - 3,3,3,3,2,3,3,3,3,3,2,3,3,0,2,0,2,3,2,2,2,0,3,2,2,2,3,0,2,0,2,2,2, - 2,3,2,0,2,2,0,2,2,2,0,3,0,0,0,0,0,0,0,0,0,2,2,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,0,2,2,3,3,3,2,3,2,3,3,3,0,2,0,0,2,0,2,2,0,0,0,0,0,0,0,0, - 3,3,3,2,0,3,2,2,2,2,0,3,2,2,0,0,0,0,0,3,0,0,2,2,0,2,3,0,0,0,2,0,2, - 3,3,3,2,0,3,2,0,2,2,2,3,2,2,2,3,0,2,0,3,2,3,2,0,3,3,2,2,0,0,2,0,0, - 2,0,0,3,3,2,3,3,2,3,3,2,3,3,2,3,3,2,2,0,2,2,0,2,2,0,0,0,2,2,0,0,0, - 2,3,2,3,3,2,3,3,3,3,3,2,2,3,2,3,2,2,2,0,0,0,0,2,0,0,0,0,3,0,0,0,0, - 3,3,3,2,3,3,3,3,2,2,2,3,3,0,2,2,2,3,2,0,2,0,2,0,0,0,0,2,0,0,2,2,0, - 3,3,3,2,2,3,2,2,2,3,3,3,2,3,2,0,2,2,3,2,2,2,0,2,0,2,2,2,3,0,0,2,0, - 3,3,3,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0,3,0,0,2,0,0,0,0,0,0,0, - 2,3,2,3,3,0,2,3,2,3,2,0,3,2,3,0,2,0,0,0,0,0,0,2,2,0,0,0,0,2,0,0,0, - 3,3,3,3,2,3,2,2,2,2,2,2,0,0,2,0,2,2,0,0,2,0,0,2,0,2,0,2,0,0,0,2,0, - 3,0,0,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,0,3,3,3,2,3,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,2,2,3,3,3,3,3,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,2,2,2,3,3,0,3,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,3,2,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,3,3,3,3,3,3,3,3,2,2,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,2,0,3,3,0,0,3,2,2,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,0,2,3,3,2,2, + 3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,0,2,2,2,2,2, + 3,3,3,2,3,3,3,2,2,2,2,3,3,2,2,2,2,2,3,3,2,3,3,2,2,3,3,0,2,2,2,2,0, + 3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,2,2,2,3,3,3,3,0,3,2,3,3,0,2,2,3,2,0, + 3,3,3,3,3,2,3,3,3,3,3,3,3,2,2,2,2,2,2,3,3,3,2,2,0,3,3,0,2,3,2,0,0, + 3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,2,2,3,3,3,3,3,3,3,2,3,0,2,0, + 3,3,3,3,3,3,3,2,2,2,3,3,3,3,3,2,2,2,3,3,2,3,0,2,2,3,3,0,2,2,3,2,2, + 3,3,3,2,3,3,3,3,2,3,3,3,2,3,2,2,2,2,2,3,3,3,2,2,0,3,3,0,0,2,3,0,0, + 3,3,3,3,3,3,3,3,3,3,2,3,2,2,3,2,2,2,2,3,2,3,0,2,3,3,3,0,0,2,2,2,2, + 3,3,3,3,3,3,3,3,2,3,2,3,2,2,2,2,0,2,3,3,3,3,0,2,0,2,3,0,2,2,3,2,3, + 3,3,3,2,3,2,2,2,2,2,2,3,0,0,2,2,2,0,2,3,2,3,0,0,0,3,3,0,0,2,0,0,0, + 3,3,3,2,3,2,3,3,2,3,2,3,2,2,2,2,0,3,2,3,0,3,0,0,0,2,3,0,0,2,3,0,0, + 3,3,3,3,3,3,2,3,2,2,2,3,2,2,2,2,2,2,2,3,2,0,0,2,2,2,2,0,2,2,2,2,0, + 2,2,0,3,2,3,3,3,3,2,3,0,3,3,2,3,3,2,2,0,2,0,2,0,2,0,0,2,2,2,0,0,0, + 3,3,3,3,3,2,3,3,2,3,2,3,3,2,2,0,0,2,3,2,2,3,2,2,0,2,2,0,2,2,3,2,2, + 2,3,3,3,0,3,3,2,3,3,3,0,3,2,2,3,2,3,0,0,0,0,2,2,2,0,2,2,2,2,0,0,0, + 0,2,0,0,2,2,2,2,0,0,0,3,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,2,3,2,2,2,0,3,3,3,2,2,2,3,2,0,2,3,2,2,3,3,0,3,2,0,0,2,2,2,0, + 3,3,3,2,3,2,2,2,2,2,2,3,0,2,0,0,2,0,2,3,2,2,0,0,0,2,3,0,0,2,2,2,0, + 2,2,0,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,2,0,3,0,2,2,2,0,0,2,2,2,0,0,0, + 2,3,2,3,2,3,3,3,3,3,3,2,3,3,2,3,2,2,2,2,2,0,2,2,2,0,0,2,3,0,0,0,0, + 3,3,3,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,3,0,2,0,0,0,2,2,0,0,0,0,0,0, + 3,3,3,0,3,2,2,2,2,3,3,2,2,3,2,2,3,2,2,2,2,2,2,0,0,2,2,0,3,0,0,2,0, + 3,3,3,2,3,3,3,3,0,2,2,3,3,2,2,2,2,0,3,2,2,2,0,2,0,2,3,0,2,2,2,2,2, + 2,3,2,3,0,3,2,3,2,3,3,0,3,2,3,0,0,2,0,0,2,0,0,2,2,0,0,2,0,0,0,0,0, + 3,3,3,2,3,2,2,2,2,2,2,2,2,2,2,0,0,0,2,2,3,0,0,2,2,2,2,0,0,2,0,2,0, + 3,0,0,2,2,0,0,2,0,2,0,0,2,2,2,2,0,2,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0, }; -const SequenceModel Iso_8859_1SpanishModel = -{ - Iso_8859_1_CharToOrderMap, - SpanishLangModel, - 33, - (float)0.9970385677528184, - PR_TRUE, - "ISO-8859-1", - "es" -}; - const SequenceModel Iso_8859_15SpanishModel = { Iso_8859_15_CharToOrderMap, SpanishLangModel, 33, - (float)0.9970385677528184, + (float)0.9966074680689881, PR_TRUE, "ISO-8859-15", "es" }; +const SequenceModel Iso_8859_1SpanishModel = +{ + Iso_8859_1_CharToOrderMap, + SpanishLangModel, + 33, + (float)0.9966074680689881, + PR_TRUE, + "ISO-8859-1", + "es" +}; + const SequenceModel Windows_1252SpanishModel = { Windows_1252_CharToOrderMap, SpanishLangModel, 33, - (float)0.9970385677528184, + (float)0.9966074680689881, PR_TRUE, "WINDOWS-1252", "es" }; + +const LanguageModel SpanishModel = +{ + "es", + Unicode_CharOrder, + 66, + SpanishLangModel, + 33, + (float)0.9966074680689881, +};