Rebuild a bunch of language models.

Adding generic language model (see coming commit), which uses the same
data as specific single-byte encoding statistics model, except that it
applies it to unicode code points.
For this to work, instead of the CharToOrderMap which was mapping
directly from encoded byte (always 256 values) to order, now we add an
array of frequent characters, ordered by generic unicode code points to
the order of frequency (which can be used on the same sequence mapping
array).

This of course means that each prober where we will want to use these
generic models will have to implement their own byte to code point
decoder, as this is per-encoding logics anyway. This will come in a
subsequent commit.
This commit is contained in:
Jehan 2021-03-15 10:20:14 +01:00
parent 82347030ba
commit 9518f4d7a2
14 changed files with 1666 additions and 1320 deletions

View File

@ -43,6 +43,7 @@ import unicodedata
import subprocess
import wikipedia
import importlib
import math
import optparse
import datetime
import operator
@ -433,6 +434,57 @@ for charset in charsets:
CTOM_str += ' */\n\n'
c_code += CTOM_str
## UNICODE frequency.
# Since we can't map the full character table from encoding to order,
# just create a list from the most common characters from the language.
# The list is ordered by unicode code points (hence can be used
# generically for various encoding scheme as it is not encoding
# specific) allowing to search from code points efficiently by a divide
# and conqueer search algorithm.
# Each code point is immediately followed by its order.
# Keep the freq_count more frequent characters.
sorted_chars = [(char, freq, order) for order, (char, freq) in
enumerate(sorted_ratios)][:freq_count]
max_order = len(sorted_chars)
# Add equivalency characters.
equivalent = []
if lang.case_mapping:
for char, ratio, order in sorted_chars:
uppercased = chr(char).upper()
try:
if char != ord(uppercased):
equivalent += [(ord(uppercased), ratio, order)]
except TypeError:
# This happens for some case such as 'SS' as uppercase of 'ß'.
# Just ignore such cases.
sys.stderr.write("Ignoring '{}' as uppercase equivalent of '{}'.\n".format(uppercased, char))
sorted_chars += equivalent
# Order by code point.
sorted_chars = sorted(sorted_chars, key=operator.itemgetter(0))
CTOM_str = 'static const int Unicode_Char_size = {};\n'.format(len(sorted_chars))
CTOM_str += 'static const unsigned int Unicode_CharOrder[]'
CTOM_str += ' =\n{'
column = 0
max_char_width = math.floor(math.log10(sorted_chars[-1][0])) + 1
max_order_width = math.floor(math.log10(max_order)) + 1
for char, ratio, order in sorted_chars:
if column % 8 == 0:
CTOM_str += '\n '
column += 1
CTOM_str += '{}{:>{width}}, '.format('' if column % 8 == 0 else ' ', char, width=max_char_width)
CTOM_str += '{:>{width}},'.format(order, width=max_order_width)
CTOM_str += '\n};\n\n'
c_code += CTOM_str
########### SEQUENCES ###########
ratios = {}
@ -514,11 +566,22 @@ for charset in charsets:
SM_str += '\n {},'.format(freq_count)
SM_str += '\n (float){},'.format(ratio_512)
SM_str += '\n {},'.format('PR_TRUE' if lang.use_ascii else 'PR_FALSE')
SM_str += '\n "{},"'.format(charset)
SM_str += '\n "{}",'.format(charset)
SM_str += '\n "{}"'.format(lang.code)
SM_str += '\n};'
c_code += SM_str
SM_str = '\n\nconst LanguageModel {}Model ='.format(language_c)
SM_str += '\n{'
SM_str += '\n "{}",'.format(lang.code)
SM_str += '\n Unicode_CharOrder,'
SM_str += '\n {},'.format(len(sorted_chars)) # Order is wrong!
SM_str += '\n {}LangModel,'.format(language_c)
SM_str += '\n {},'.format(freq_count)
SM_str += '\n (float){},'.format(ratio_512)
SM_str += '\n};'
c_code += SM_str
c_code += '\n'
lang_model_file = current_dir + '/../src/LangModels/Lang{}Model.cpp'.format(language_c)

View File

@ -1,142 +1,192 @@
= Logs of language model for Arabic (ar) =
- Generated by BuildLangModel.py
- Started: 2015-12-13 18:31:12.817808
- Maximum depth: 2
- Max number of pages: 50
- Started: 2021-03-16 11:33:00.432776
- Maximum depth: 4
- Max number of pages: 100
== Parsed pages ==
الصفحة_الرئيسية (revision 17217037)
11 ديسمبر (revision 17699159)
12 ديسمبر (revision 17710194)
13 ديسمبر (revision 17722318)
1437 هـ (revision 17278274)
14 ديسمبر (revision 17432010)
15 ديسمبر (revision 17206233)
1645 (revision 17168144)
1954 (revision 17409780)
1955 (revision 16826533)
1972 (revision 17004868)
1988 (revision 17671285)
2003 (revision 17656994)
2011 (revision 17589601)
2015 (revision 17678287)
216 ق.م (revision 17586752)
25 يناير (revision 17325864)
2 ربيع الأول (revision 17722146)
6 (عدد) (revision 16972178)
آريز (revision 17466671)
آلهة اليونان (revision 17722617)
أثينا (revision 17642941)
أثينا (ميثولوجيا) (revision 17662932)
أزمة المهاجرين إلى أوروبا (revision 17718437)
أوروبا (revision 17713457)
إس سي إي سانتا مونيكا ستوديو (revision 17035439)
إسبارطة (revision 16733170)
إسماعيل الصفوي (revision 17194218)
إله الحرب (لعبة فيديو) (revision 17630201)
إمارة دبي (revision 17602037)
إيطاليا (revision 17586853)
اتفاق باريس (revision 17718086)
الأزمة الليبية (revision 17630232)
الإمارات العربية المتحدة (revision 17722077)
الإنتخابات البلدية السعودية 2015 (revision 17722004)
الاتحاد الأوروبي لكرة القدم (revision 17596822)
الاحتجاجات اللبنانية 2015 (revision 17315127)
الانتفاضة الفلسطينية (2015) (revision 17710414)
التمرد العراقي (revision 17708640)
الجمعية العامة للأمم المتحدة (revision 17304227)
الجمهورية الرومانية (revision 16472557)
الجيش اللبناني (revision 17516533)
الحرب الأهلية السورية (revision 17675300)
الحرب الأهلية اليمنية (2015) (revision 17686236)
الحرب في شمال غرب باكستان (revision 17490838)
الدولة الصفوية (revision 17031046)
الرياض (revision 17580586)
السعودية (revision 17711339)
السلطة الوطنية الفلسطينية (revision 17438123)
العراق (revision 17704602)
العلاقات الخارجية في تركيا (revision 17647409)
الصفحة_الرئيسية (revision 52017134)
1442 هـ (revision 53072582)
1521 (revision 51053075)
15 مارس (revision 53063546)
16 مارس (revision 53079323)
1775 (revision 50066071)
17 مارس (revision 52787393)
1977 (revision 52535026)
1988 (revision 52921343)
1989 (revision 52945821)
2021 (revision 53072089)
3 شعبان (revision 53076204)
آذار (revision 50305961)
آفة (كائن حي) (revision 50728417)
أبو الريحان البيروني (revision 52901629)
أبو موسى الأشعري (revision 52327088)
أتراك (revision 52923553)
أحلام الجريتلي (revision 53054581)
أستراليا المفتوحة 2021 (revision 52951662)
ألمان (revision 51707635)
أم (revision 52894160)
أمريكا الجنوبية (revision 52623681)
إسبان (revision 53023219)
إسبانيا (revision 52945464)
إسكندنافيا (revision 52901031)
إيران (revision 53077058)
اشتباكات الصحراء الغربية 2020 (revision 52776181)
اغتيال (revision 52605819)
الإسلام (revision 53061751)
الاحتجاجات الروسية 2021 (revision 52959948)
التفسير الموضوعي (تفسير) (revision 53063711)
الجزري (revision 52615628)
الحزب التقدمي الاشتراكي (revision 52719072)
الحضارة الغربية (revision 52663404)
الحملة الصليبية الثانية (revision 53028660)
الشمس (revision 53011313)
العراق (revision 53078113)
الفتح الإسلامي لفارس (revision 52960739)
الفتح الإسلامي للشام (revision 53000955)
الفتح الإسلامي للمغرب (revision 53015306)
الفتح الإسلامي لمصر (revision 52728321)
الفلبين (revision 53043940)
القرآن الكريم (revision 53047128)
القمر (revision 52920452)
القواعد الصاروخية الإيرانية تحت الأرض (revision 50043780)
اللغة العربية (revision 52929542)
المكثرون لرواية الحديث (revision 51989111)
الهجوم الكيماوي على حلبجة (revision 52723565)
انتهازية (revision 52279176)
انقراض العصر الطباشيري-الباليوجيني (revision 52688972)
باتا (revision 46639883)
باتريك أتشي (revision 53024512)
بحث علمي (revision 51195242)
برسفيرنس (مركبة جوالة) (revision 52965815)
برنامج أبولو (revision 52571274)
بعقلين (revision 48961465)
بلاد السند (revision 52279660)
بوتان (revision 52999635)
ترابط زوجي (revision 50219604)
تسمية ثنائية (revision 53076878)
تفجيرات باتا 2021 (revision 53043062)
تقويم هجري (revision 53077283)
توقيت عالمي منسق (revision 52887567)
تونس (revision 53047571)
جائحة فيروس كورونا 201920 (revision 52954613)
جائزة الملك فيصل العالمية (revision 52691751)
جائزة الملك فيصل العالمية في الدراسات الإسلامية (revision 53078767)
جواثم (revision 49526826)
جيرارد كايبر (revision 52265632)
جيفة (revision 52535861)
حامد باكايوكو (revision 53079223)
حديث نبوي (revision 53075245)
حرس الثورة الإسلامية (revision 53019030)
حرملة بن كاهل (revision 52891720)
حرية (revision 52761732)
حصار البصرة (1775) (revision 52323981)
خوسيه دي سان مارتين (revision 49958538)
خير الدين حسيب (revision 53076407)
دير دوريت (revision 32094684)
ذكاء (revision 52187723)
روما القديمة (revision 51009123)
ساحل العاج (revision 52429752)
ساعة الفيل (revision 52709413)
سالم بن عبد الله بن عمر بن الخطاب (revision 51752881)
سرب (revision 52828105)
سرعة الصوت (revision 52975385)
سرعة الضوء (revision 52244159)
سوسن ربيع (revision 53077510)
سيبيريا (revision 52919273)
سيمون بوليفار (revision 53011030)
شريعة إسلامية (revision 53070018)
شفق (revision 48963569)
صباح عبد الجليل (revision 52995683)
صحابة (revision 52201334)
صلاة الجمعة (revision 52577966)
صلاة العيد (revision 49726428)
طائر التعريشة الساتاني (revision 44913097)
طائر القيثارة (revision 52673886)
طابا (revision 53078953)
عبد الله الرضيع (revision 50505781)
عبد الله بن عباس (revision 52388329)
== End of Parsed pages ==
- Wikipedia parsing ended at: 2015-12-13 18:33:58.846891
- Wikipedia parsing ended at: 2021-03-16 11:42:48.951707
95 characters appeared 727795 times.
101 characters appeared 1520487 times.
First 64 characters:
[ 0] Char ا: 14.933875610577156 %
[ 1] Char ل: 11.460782225764122 %
[ 2] Char ي: 8.30302489025069 %
[ 3] Char م: 6.3702003998378665 %
[ 4] Char و: 5.952637762007158 %
[ 5] Char ر: 4.9419135883043985 %
[ 6] Char ن: 4.900967992360486 %
[ 7] Char ت: 4.229625100474721 %
[ 8] Char ة: 3.6022506337636284 %
[ 9] Char ب: 3.5434428650925054 %
[10] Char ع: 3.3116468236247845 %
[11] Char د: 3.1756195082406444 %
[12] Char س: 2.5401383631379715 %
[13] Char ف: 2.3899587109007343 %
[14] Char ق: 2.010868445097864 %
[15] Char أ: 1.8763525443291036 %
[16] Char ه: 1.8663222473361318 %
[17] Char ك: 1.8573911609725264 %
[18] Char ح: 1.8431014227907585 %
[19] Char ج: 1.3270220323030524 %
[20] Char ط: 1.0305099650313618 %
[21] Char ش: 0.9638703206260004 %
[22] Char إ: 0.8946200509758929 %
[23] Char ص: 0.8509264284585631 %
[24] Char ى: 0.7726076711161797 %
[25] Char خ: 0.717097534333157 %
[26] Char ز: 0.6687322666410184 %
[27] Char ث: 0.6549921337739336 %
[28] Char ض: 0.5409490309771295 %
[29] Char غ: 0.4574090231452538 %
[30] Char ذ: 0.44765352880962356 %
[31] Char ئ: 0.39269299734128427 %
[32] Char ء: 0.295138053984982 %
[33] Char ظ: 0.2397653185306302 %
[34] Char آ: 0.12324899181775088 %
[35] Char ؤ: 0.08491402111858422 %
[36] Char ـ: 0.047678261048784344 %
[37] Char a: 0.03311372020967443 %
[38] Char e: 0.029403884335561525 %
[39] Char i: 0.027205463076827956 %
[40] Char o: 0.02432003517474014 %
[41] Char t: 0.02349562720271505 %
[42] Char r: 0.02294602188803166 %
[43] Char n: 0.020472797971956388 %
[44] Char s: 0.01799957405588112 %
[45] Char l: 0.012915724895059736 %
[46] Char h: 0.011816514265692949 %
[47] Char d: 0.011129507622338709 %
[48] Char پ: 0.010717303636326163 %
[49] Char c: 0.009480691678288529 %
[50] Char u: 0.007969277062909199 %
[51] Char m: 0.007694474405567502 %
[52] Char A: 0.006870066433542411 %
[53] Char گ: 0.006595263776200715 %
[54] Char f: 0.006183059790188171 %
[55] Char S: 0.005770855804175626 %
[56] Char y: 0.0054960531468339294 %
[57] Char T: 0.0049464478321505365 %
[58] Char b: 0.0048090465034796885 %
[59] Char G: 0.0046716451748088405 %
[60] Char I: 0.004396842517467144 %
[61] Char C: 0.0042594411887962955 %
[62] Char p: 0.0039846385314545995 %
[63] Char k: 0.003709835874112903 %
[ 0] Char ا: 14.550206611434364 %
[ 1] Char ل: 11.41772340046314 %
[ 2] Char ي: 7.748043883308441 %
[ 3] Char م: 6.294036055553254 %
[ 4] Char و: 5.778148711564124 %
[ 5] Char ن: 5.249304992413615 %
[ 6] Char ر: 4.93203822196441 %
[ 7] Char ت: 4.071261378755622 %
[ 8] Char ب: 3.8685631643019636 %
[ 9] Char ة: 3.2951942371095577 %
[10] Char ع: 3.235344991440243 %
[11] Char د: 2.921103567475421 %
[12] Char س: 2.615806646160079 %
[13] Char ف: 2.609032500771135 %
[14] Char ه: 2.300841769775079 %
[15] Char ق: 2.1174794654607374 %
[16] Char أ: 2.0117238753110023 %
[17] Char ك: 1.952006166445356 %
[18] Char ح: 1.788966298297848 %
[19] Char ج: 1.297939410202126 %
[20] Char ط: 0.9575221623072082 %
[21] Char ص: 0.8946475701535099 %
[22] Char ش: 0.8939898861351658 %
[23] Char إ: 0.8849796150838514 %
[24] Char ى: 0.8706421034839494 %
[25] Char خ: 0.7856693283138889 %
[26] Char ث: 0.6599201440064926 %
[27] Char ز: 0.6011231927665281 %
[28] Char ذ: 0.5680416866438187 %
[29] Char ض: 0.5665290134016273 %
[30] Char غ: 0.5086528197873444 %
[31] Char ئ: 0.3490329085352259 %
[32] Char ء: 0.29898315473923814 %
[33] Char ظ: 0.20197476203348005 %
[34] Char آ: 0.15396382869435912 %
[35] Char ؤ: 0.09148384695166746 %
[36] Char a: 0.05748158320327632 %
[37] Char e: 0.045972112882254175 %
[38] Char i: 0.042946766397871206 %
[39] Char t: 0.042223313977692675 %
[40] Char ـ: 0.03972411470798501 %
[41] Char r: 0.035778010597920275 %
[42] Char s: 0.034988789775907324 %
[43] Char n: 0.031437296076849065 %
[44] Char l: 0.030319233245664053 %
[45] Char o: 0.029661549227319933 %
[46] Char c: 0.0209143517833431 %
[47] Char m: 0.01861245771913867 %
[48] Char d: 0.018086310504463375 %
[49] Char y: 0.015060964020080407 %
[50] Char h: 0.01479789041274276 %
[51] Char p: 0.01479789041274276 %
[52] Char u: 0.014732122010908347 %
[53] Char f: 0.010194102284333902 %
[54] Char C: 0.008221050229301533 %
[55] Char b: 0.007892208220129471 %
[56] Char g: 0.007431829407288587 %
[57] Char v: 0.007234524201785348 %
[58] Char S: 0.007168755799950937 %
[59] Char E: 0.006905682192613288 %
[60] Char I: 0.006445303379772402 %
[61] Char T: 0.006379534977937989 %
[62] Char A: 0.005853387763262692 %
[63] Char B: 0.005458777352256218 %
The first 64 characters have an accumulated ratio of 0.999523217389512.
The first 64 characters have an accumulated ratio of 0.9992864128400966.
1479 sequences found.
1820 sequences found.
First 512 (typical positive ratio): 0.9696025116913417
Next 512 (512-1024): 1.3740132867084825e-06
Rest: 0.0012305764497782395
First 512 (typical positive ratio): 0.9644868613755061
Next 512 (512-1024): 0.0774804388330844
Rest: 0.0019191680534433112
- Processing end: 2015-12-13 18:33:59.193909
- Processing end: 2021-03-16 11:42:49.142159

View File

@ -1,158 +1,156 @@
= Logs of language model for Danish (da) =
- Generated by BuildLangModel.py
- Started: 2016-02-19 17:53:58.564190
- Started: 2021-03-16 01:32:17.684746
- Maximum depth: 4
- Max number of pages: 100
== Parsed pages ==
Forside (revision 2692411)
16. februar (revision 6877446)
17. februar (revision 8454583)
1878 (revision 8280505)
19. februar (revision 8206479)
1922 (revision 8455105)
1926 (revision 8425271)
1942 (revision 8443554)
1945 (revision 8448461)
1948 (revision 8454392)
1985 (revision 8409096)
2. verdenskrig (revision 8433181)
23. oktober (revision 6877825)
26. oktober (revision 7849938)
3C 273 (revision 8443798)
A-bus (revision 8427319)
Aktuelle begivenheder (revision 8440596)
B-52 Stratofortress (revision 8422571)
Borgerkrigen i Syrien (revision 8447763)
Boutros Boutros-Ghali (revision 8453935)
Brasilien (revision 8452750)
Cusco (region) (revision 7693764)
Danmark (revision 8451178)
Danmark i Eurovision Song Contest (revision 8453514)
Dansk (sprog) (revision 8455750)
Dansk Melodi Grand Prix 2016 (revision 8452164)
Dobbeltmordet på Peter Bangs Vej (revision 8334648)
Encyklopædi (revision 8446641)
Eritrea-sagen (revision 8452285)
Eurovision Song Contest 2014 (revision 8445804)
Eurovision Song Contest 2016 (revision 8453588)
Flygtningekrisen i Europa 2015 (revision 8452286)
Fonograf (revision 8177165)
Formel 1 (revision 8450846)
Formel 1 2016 (revision 8456463)
Frederik 6. (revision 8438503)
Første observation af gravitationsbølger (revision 8451269)
Grammofon (revision 8375093)
Guadalcanal (revision 7796248)
Harper Lee (revision 8456583)
Hartkorn (revision 8437552)
IC4 (revision 8446402)
IC4-sagen (revision 8434463)
Islamisk Stat (revision 8439228)
Jonathan Leunbach (revision 8452603)
Juliane Marie af Braunschweig-Wolfenbüttel (revision 8437957)
Kaliumklorid (revision 8452216)
Kejserriget Japan (revision 8044942)
Kevin Magnussen (revision 8455302)
København (revision 8427847)
LIGO (revision 8451266)
Latinamerika (revision 7692181)
Leonid Hurwicz (revision 8445727)
Lighthouse X (revision 8452940)
Linkoban (revision 8455879)
Machu Picchu (revision 8406907)
Matador (tv-serie) (revision 8454648)
Middelaldercentret (revision 8449194)
Nobelprisen (revision 8409809)
Nykøbing Falster (revision 8452825)
Nyligt afdøde (revision 8456580)
Overvågning (revision 8455039)
Panorama (foto) (revision 8448393)
Peru (revision 8437485)
Peter Lauritsen (revision 8456097)
Professor (revision 8415451)
Renault F1 (revision 8450843)
S-bus (revision 8455589)
Salomonøerne (revision 8238961)
Slaget om Belgien (1940) (revision 8430013)
Slaget om Guadalcanal (revision 7762887)
Slaget om Henderson Field (revision 8445480)
Slaget om Iwo Jima (revision 8145239)
Soldiers of Love (Lighthouse X-sang) (revision 8452929)
Solen (revision 8276478)
Stillehavskrigen (revision 8430649)
Stockholm (revision 8358042)
Søslaget ved Guadalcanal (revision 7772812)
Thomas Edison (revision 8282441)
Togulykken ved Bad Aibling (revision 8455364)
Topografi (revision 6886168)
USA (revision 8448088)
United States Army (revision 8401635)
United States Marine Corps (revision 8401667)
Vestallierede (revision 6961443)
Wikimedia (revision 8263252)
Wikipedia (revision 8267051)
Zikavirus (revision 8454832)
1. februar (revision 8404985)
10. februar (revision 6877431)
11. februar (revision 6877433)
12. februar (revision 6877437)
13. februar (revision 6877438)
14. februar (revision 6877441)
1497 (revision 7369489)
15. februar (revision 7329463)
1560 (revision 7874693)
1568 (revision 7369703)
1620 (revision 7423903)
1688 (revision 7367090)
18. februar (revision 6877450)
Forside (revision 10000691)
1. symfoni (Beethoven) (revision 10648993)
15. marts (revision 8172123)
1917 (revision 10645384)
1930 (revision 10645389)
1940 (revision 10648721)
1951 (revision 10640371)
1972 (revision 10641861)
2. marts (revision 9423344)
2003 (revision 10654209)
44 f.Kr. (revision 7242128)
7. marts (revision 9423388)
9. marts (revision 10601197)
Abdikation (revision 10197388)
Afsnit af Badehotellet (revision 10654331)
Agnes Slott-Møller (revision 10648962)
Australian Open-mesterskabet i damesingle 2021 (revision 10630904)
Australian Open-mesterskabet i herresingle 2021 (revision 10630887)
Australian Open 2021 (revision 10630544)
Casper & Mandrilaftalen (revision 10444147)
Coronaviruspandemien (revision 10652415)
Cykling under sommer-OL 2012 Linjeløb (kvinder) (revision 10651872)
Dansk (sprog) (revision 10633727)
Den danske Treårsekspedition til Østgrønland 1931-34 (revision 10654093)
Dnepr (revision 10635465)
Donald Trump (revision 10653185)
Døde i 2021 (revision 10653976)
Encyklopædi (revision 10590147)
Eurovision Song Contest 2014 (revision 10592331)
Folkerepublikken Kina (revision 10634829)
Folketinget (revision 10643927)
Fram-ekspeditionen 1910-1912 (revision 10630146)
Frankrig (revision 10648749)
Frankrigs præsidenter (revision 10477099)
Geologi (revision 10631000)
Geoteknik (revision 10603548)
Greater London (revision 10380043)
Hortus Botanicus Amsterdam (revision 8854568)
Hu Jintao (revision 10610855)
IC4 (revision 10577458)
Idus martius (revision 10652897)
Inger Støjberg (revision 10643259)
Italiens premierministre (revision 10625575)
John Polkinghorne (revision 10654447)
Julius Cæsar (revision 10653812)
Korruption (revision 10401686)
Lars Göran Petrov (revision 10650013)
London Underground (revision 10635531)
Marge Simpson (revision 10640942)
Mario Draghi (revision 10652699)
Matilde af Skotland (revision 10648200)
Metrosystemer i verden (revision 10510595)
Middelaldercentret (revision 10574228)
Naomi Osaka (revision 10478959)
Nederlandene (revision 10642742)
Nicolas Sarkozy (revision 10639376)
Nikolaj 2. af Rusland (revision 10639924)
Novak Djokovic (revision 10479710)
Outlaw Gentlemen & Shady Ladies (revision 10492201)
Paris-Nice 2021 (revision 10653019)
Rigsretssagen mod Donald Trump 2021 (revision 10653875)
Rigsretssagen mod Inger Støjberg (revision 10643260)
Rusland (revision 10631140)
Sanja Ilić (revision 10645645)
Senat (revision 10429780)
Senatet (USA) (revision 10624834)
Shu-bi-dua (revision 10630614)
Svend Johansen (skuespiller) (revision 10643631)
Tennis (revision 10651841)
Tommy Troelsen (revision 10648382)
Træsko (revision 10626215)
USA's præsidenter (revision 10639768)
Undergrundsbane (revision 10541653)
Vilhelm Erobreren (revision 10631208)
Wikimedia (revision 10260889)
Wikipedia (revision 10627445)
Zar (revision 10557166)
1800 (revision 10645359)
2. april (revision 9568657)
Burgtheater (revision 9296862)
C-dur (revision 10513719)
Cello (revision 10641506)
Coda (revision 9298442)
Dominant (revision 9513277)
Dynamik (musik) (revision 9504157)
F-dur (revision 8135200)
Fagot (revision 10578018)
Fløjte (revision 10329382)
Harmonik (revision 10577145)
International Music Score Library Project (revision 10115839)
Italienske og franske musikudtryk (revision 10352094)
Johann Georg Albrechtsberger (revision 10289540)
Joseph Haydn (revision 10289602)
Klarinet (revision 10490230)
Klassicisme (musik) (revision 10436811)
Kontrabas (revision 10147393)
Kontrapunkt (musikteori) (revision 10184029)
Leipzig (revision 10611798)
Ludwig van Beethoven (revision 10642134)
== End of Parsed pages ==
- Wikipedia parsing ended at: 2016-02-19 17:56:42.162636
- Wikipedia parsing ended at: 2021-03-16 01:36:49.098009
53 characters appeared 1301488 times.
57 characters appeared 1058523 times.
First 30 characters:
[ 0] Char e: 15.272749345364689 %
[ 1] Char r: 8.48482659847805 %
[ 2] Char n: 7.695652975670924 %
[ 3] Char t: 6.977014002434137 %
[ 4] Char a: 6.780469739252302 %
[ 5] Char i: 6.164636170291236 %
[ 6] Char s: 6.0942551909814 %
[ 7] Char d: 5.953493232361728 %
[ 8] Char l: 5.076650725938311 %
[ 9] Char o: 4.883026197706011 %
[10] Char g: 4.012253666572415 %
[11] Char k: 3.232607599916403 %
[12] Char m: 3.0863135119186653 %
[13] Char f: 2.701600014752345 %
[14] Char v: 2.13970470722742 %
[15] Char b: 1.982423195603801 %
[16] Char u: 1.8339777239590376 %
[17] Char p: 1.5789619266562582 %
[18] Char h: 1.3433085821767086 %
[19] Char ø: 0.8730775850411222 %
[20] Char y: 0.859938777768216 %
[21] Char å: 0.7699648402443973 %
[22] Char æ: 0.7208671920140639 %
[23] Char j: 0.644108896893402 %
[24] Char c: 0.5698093259407694 %
[25] Char w: 0.11087309295206717 %
[26] Char z: 0.05309307500338075 %
[27] Char x: 0.032424424965885205 %
[28] Char é: 0.032193919575132464 %
[29] Char q: 0.012139950579644223 %
[ 0] Char e: 15.118707859914238 %
[ 1] Char r: 8.552388564065213 %
[ 2] Char n: 7.6833474567864855 %
[ 3] Char t: 7.125305732610439 %
[ 4] Char a: 6.351302711419591 %
[ 5] Char i: 6.265806222443915 %
[ 6] Char s: 6.152629654716997 %
[ 7] Char d: 5.90341447469729 %
[ 8] Char o: 5.144999211164992 %
[ 9] Char l: 5.1253491893893655 %
[10] Char g: 3.907992551885977 %
[11] Char m: 3.3046990948708723 %
[12] Char k: 3.0474538578755492 %
[13] Char f: 2.586434116216653 %
[14] Char v: 2.2680659749481116 %
[15] Char u: 1.9654745338551927 %
[16] Char b: 1.7524418458550264 %
[17] Char p: 1.6338804163915193 %
[18] Char h: 1.5844719481768466 %
[19] Char ø: 0.7598323324103491 %
[20] Char æ: 0.7542585281566863 %
[21] Char å: 0.728278932059105 %
[22] Char y: 0.6751860847615027 %
[23] Char c: 0.6527963964883143 %
[24] Char j: 0.5847770903419198 %
[25] Char w: 0.17241004682940286 %
[26] Char z: 0.0783166733268904 %
[27] Char x: 0.05602145631223884 %
[28] Char é: 0.019177665482941794 %
[29] Char q: 0.016626941502452003 %
The first 30 characters have an accumulated ratio of 0.9997241618823994.
The first 30 characters have an accumulated ratio of 0.9997184756495605.
964 sequences found.
936 sequences found.
First 512 (typical positive ratio): 0.9968082796759031
Next 512 (512-1024): 7.68351302509128e-07
Rest: 3.903127820947816e-17
First 512 (typical positive ratio): 0.9962304038307248
Next 512 (512-1024): 0.007598323324103491
Rest: -5.2909066017292616e-17
- Processing end: 2016-02-19 17:56:42.304278
- Processing end: 2021-03-16 01:36:49.182013

View File

@ -1,116 +1,159 @@
= Logs of language model for French (fr) =
- Generated by BuildLangModel.py
- Started: 2015-12-03 21:07:37.508739
- Maximum depth: 2
- Max number of pages: 50
- Started: 2021-03-16 01:17:58.545030
- Maximum depth: 4
- Max number of pages: 100
== Parsed pages ==
Wikipédia:Accueil_principal (revision 115957655)
Bœuf (animal) (revision 115500130)
1500 av. J.-C. (revision 110583603)
1898 dans les chemins de fer (revision 106801806)
1913 dans les chemins de fer (revision 112852042)
1974 dans les chemins de fer (revision 90170756)
1er décembre (revision 121012781)
2009 dans les chemins de fer (revision 107042206)
2011 dans les chemins de fer (revision 109560866)
24 novembre (revision 120782024)
26 novembre (revision 120833172)
29 novembre (revision 120918160)
2 décembre (revision 121025437)
30 novembre (revision 120947714)
3 décembre (revision 121030621)
Amphibien (revision 120332329)
Angleterre (revision 120784240)
Anne-Josèphe Théroigne de Méricourt (revision 121009789)
Années 1930 (revision 120558236)
Antonio Troyo Calderón (revision 121028881)
António Costa (revision 120993829)
Attentat du 24 novembre 2015 à Tunis (revision 121015161)
Balard (métro de Paris) (revision 118979088)
Bois de Vincennes (revision 120822909)
Buse à tête blanche (revision 121009499)
Californie (revision 120922479)
Charenton-le-Pont (revision 120210025)
Charenton - Écoles (métro de Paris) (revision 108644873)
Chronique médiévale (revision 100253272)
Concorde (métro de Paris) (revision 120856751)
Conférence de Paris de 2015 sur le climat (revision 121029398)
Crise de la dette publique grecque (revision 120905208)
Crise entre la Colombie et le Venezuela de 2015 (revision 120857143)
Crise migratoire en Europe (revision 121002308)
Crise russo-turque de 2015 (revision 121030214)
Créteil (revision 120684618)
Créteil - Préfecture (métro de Paris) (revision 113486387)
Deuxième guerre civile libyenne (revision 121027704)
Devise (monnaie) (revision 121015771)
Droits de tirage spéciaux (revision 121009135)
Décembre 2015 (revision 121010045)
Département français (revision 120993190)
Eldar Riazanov (revision 120996396)
Enfants verts de Woolpit (revision 121002303)
Ernst Larsen (revision 121026772)
Fatima Mernissi (revision 120992271)
Fejervarya cancrivora (revision 120353807)
Fonds monétaire international (revision 120754406)
Français (revision 120883858)
Freyja (revision 121028677)
Fusillade du 2 décembre 2015 en Californie (revision 121030353)
Wikipédia:Accueil_principal (revision 164303621)
Bœuf (animal) (revision 178255345)
10 mars (revision 180841287)
12 mars (revision 180798998)
13 mars (revision 180904703)
1493 (revision 163870551)
14 mars (revision 180901488)
15 mars (revision 180904428)
1891 (revision 180890066)
1917 (revision 178369116)
1939 (revision 178458019)
2011 (revision 176114496)
45e parallèle nord (revision 180910832)
6 mars (revision 180750121)
7 mars (revision 180750121)
Absolutisme (revision 179767600)
Alassane Ouattara (revision 180842696)
Ambassadeur (revision 180674153)
Amiral de France (revision 177268292)
Amirautés de Bretagne (revision 175194082)
Aurora Cornu (revision 180901231)
Bata (Guinée équatoriale) (revision 180763894)
Bob Walkup (revision 180908319)
Bourgogne-Franche-Comté (revision 180662628)
Centre de données (revision 180741567)
Championnats du monde de ski acrobatique 2021 (revision 180882257)
Christophe Colomb (revision 180494940)
Claude Debussy (revision 179962158)
Couronne solaire (revision 180875717)
Crise présidentielle depuis 2019 au Venezuela (revision 180336636)
Critique musical (revision 174352172)
Côte d'Ivoire (revision 180838790)
Daniel Vachez (revision 180915214)
Degré Celsius (revision 179948881)
Deuxième République (Tchécoslovaquie) (revision 180896689)
Deuxième guerre civile libyenne (revision 180269091)
Empire romain (revision 180843240)
Empire russe (revision 179593986)
Excommunication (revision 178073962)
Explosions de Bata (revision 180862772)
Fatima Aziz (revision 180862495)
Fort du Lomont (revision 180886100)
Frankie de la Cruz (revision 180903250)
GINK (revision 179590111)
Giovanni Gastel (revision 180881061)
Goodwill Zwelithini kaBhekuzulu (revision 180806403)
Gouvernement de l'Église catholique (revision 176961659)
Guerre civile syrienne (revision 180897321)
Guerre civile yéménite (revision 180691885)
Guerre du Tigré (revision 180793174)
Guinée équatoriale (revision 180759310)
Hamed Bakayoko (revision 180904779)
Helena Fuchsová (revision 180909783)
Henri-Charles de Beaumanoir de Lavardin (revision 180903071)
Henry Darrow (revision 180905848)
Heure en France (revision 180854115)
Incendie du centre de données d'OVHcloud à Strasbourg (revision 180901025)
Innocent XI (revision 180108629)
Ivo Trumbić (revision 180827381)
Jean-Claude Fasquelle (revision 180871354)
Jean-Jacques Viton (revision 180889491)
Jean Frydman (revision 180909934)
Le Mans (revision 180520548)
Lieutenant général (revision 180899945)
Liste des ambassadeurs de France près le Saint-Siège (revision 180150184)
Manifestation des agriculteurs indiens de 2020-2021 (revision 180901643)
Manifestations de 2020-2021 en Arménie (revision 180901656)
Manifestations de 2020-2021 en Biélorussie (revision 180901634)
Manifestations de 2021 au Sénégal (revision 180900196)
Manifestations de 2021 en Birmanie (revision 180901671)
Manifestations de 2021 en Russie (revision 180897927)
Manifestations de Deraa (revision 180914771)
Mars 1891 (revision 155220626)
Mars 2021 (revision 180914744)
Marvin Hagler (revision 180908678)
Militaire (revision 178062901)
Murray Walker (revision 180862148)
OVHcloud (revision 180900746)
Obren Joksimović (revision 180901629)
Palais Farnèse (revision 180885444)
Pandémie de Covid-19 (revision 180845115)
Pays-Bas (revision 180853920)
Photosphère (revision 179722426)
Premier ministre ivoirien (revision 180838804)
Province de Bretagne (revision 176523092)
Président de la république de Côte d'Ivoire (revision 180747416)
Pôle Nord (revision 178839482)
Querelle des Franchises (revision 180092394)
Raoul Casadei (revision 180910155)
Rassemblement des houphouëtistes pour la démocratie et la paix (revision 180912125)
Roi des Français (revision 180882393)
Ronald DeFeo Jr. (revision 180915749)
Royaume de France (revision 180809662)
Révolte du Papier timbré (revision 180903105)
== End of Parsed pages ==
- Wikipedia parsing ended at: 2015-12-03 21:10:27.682316
- Wikipedia parsing ended at: 2021-03-16 01:24:27.092152
56 characters appeared 728239 times.
57 characters appeared 1900431 times.
First 38 characters:
[ 0] Char e: 14.339660468609894 %
[ 1] Char s: 7.954806045817375 %
[ 2] Char a: 7.864176458552756 %
[ 3] Char n: 7.572102015959047 %
[ 4] Char i: 7.34154583866011 %
[ 5] Char r: 7.020222756540091 %
[ 6] Char t: 6.833608197308851 %
[ 7] Char l: 5.9446143367768 %
[ 8] Char o: 5.386418469760614 %
[ 9] Char u: 5.024861343597363 %
[10] Char d: 4.169235649285468 %
[11] Char c: 3.4240132703686568 %
[12] Char p: 2.8882001650557028 %
[13] Char m: 2.803063280049544 %
[14] Char é: 2.498355622261373 %
[15] Char g: 1.277739862874688 %
[16] Char v: 1.1729665672945284 %
[17] Char f: 1.1614318925517584 %
[18] Char b: 0.9925312981040565 %
[19] Char h: 0.8580974103282026 %
[20] Char q: 0.7740590657737364 %
[21] Char x: 0.43570860665248634 %
[22] Char y: 0.41044217626356183 %
[23] Char è: 0.4100302235941771 %
[24] Char à: 0.363479571953713 %
[25] Char j: 0.29591933417463223 %
[26] Char k: 0.1359443808969308 %
[27] Char ç: 0.11685724054877589 %
[28] Char ê: 0.11218844362908331 %
[29] Char z: 0.10738232915292918 %
[30] Char w: 0.08239053387692777 %
[31] Char ô: 0.04792382720507965 %
[32] Char â: 0.03364280133307884 %
[33] Char î: 0.029385957082770905 %
[34] Char û: 0.024854477719539875 %
[35] Char œ: 0.021146903695078125 %
[36] Char ï: 0.017851282340001016 %
[37] Char ù: 0.015242248767231636 %
[ 0] Char e: 14.210092342210793 %
[ 1] Char a: 8.0327567799094 %
[ 2] Char s: 7.818647454182762 %
[ 3] Char i: 7.531554684174274 %
[ 4] Char n: 7.491616375443256 %
[ 5] Char r: 7.05650455080979 %
[ 6] Char t: 6.771779664718161 %
[ 7] Char l: 5.854461435327039 %
[ 8] Char o: 5.412772155368966 %
[ 9] Char u: 5.014546700195903 %
[10] Char d: 4.239248886173716 %
[11] Char c: 3.238896860764742 %
[12] Char m: 2.8875028875028876 %
[13] Char p: 2.787104609428072 %
[14] Char é: 2.546790701688196 %
[15] Char v: 1.3356443880361877 %
[16] Char g: 1.1728392138414918 %
[17] Char f: 1.1096956427252553 %
[18] Char b: 1.084859171419536 %
[19] Char h: 0.9054261901642312 %
[20] Char q: 0.7540920980556516 %
[21] Char y: 0.42858698895145364 %
[22] Char x: 0.4087493836924361 %
[23] Char à: 0.39127966235027745 %
[24] Char è: 0.3704422838819194 %
[25] Char j: 0.35176231076003284 %
[26] Char k: 0.17332910271406854 %
[27] Char z: 0.11539487621492178 %
[28] Char ê: 0.10397641377140239 %
[29] Char ç: 0.09292628882606103 %
[30] Char ô: 0.07540394784130547 %
[31] Char w: 0.06340666932922058 %
[32] Char î: 0.031729644485908724 %
[33] Char û: 0.029309140926453 %
[34] Char â: 0.02504694987610705 %
[35] Char ï: 0.019942844544211285 %
[36] Char ù: 0.016259469562430837 %
[37] Char œ: 0.010839646374953892 %
The first 38 characters have an accumulated ratio of 0.999621003544166.
The first 38 characters have an accumulated ratio of 0.9996521841624343.
914 sequences found.
1049 sequences found.
First 512 (typical positive ratio): 0.997057879992383
Next 512 (512-1024): 1.3731755646154627e-06
Rest: 3.8163916471489756e-17
First 512 (typical positive ratio): 0.997006678170155
Next 512 (512-1024): 0.00010839646374953892
Rest: 1.646491655585584e-05
- Processing end: 2015-12-03 21:10:27.987730
- Processing end: 2021-03-16 01:24:27.266283

View File

@ -1,159 +1,150 @@
= Logs of language model for German (de) =
- Generated by BuildLangModel.py
- Started: 2015-12-03 22:42:29.154759
- Maximum depth: 3
- Started: 2021-03-16 01:05:29.301622
- Maximum depth: 4
- Max number of pages: 100
== Parsed pages ==
Wikipedia:Hauptseite (revision 140459035)
1740 (revision 145584733)
1890 (revision 148575121)
1925 (revision 148682812)
1965 (revision 148411693)
3. Dezember (revision 148684818)
Bundeswehreinsatz in Syrien (revision 148714599)
Clara Klabunde (revision 148697193)
Day Tripper (revision 145956669)
Dezember 2015 (revision 148713161)
Edwar al-Charrat (revision 148656295)
Enzyklika (revision 148704406)
Enzyklopädie (revision 148364925)
Facebook Inc. (revision 148280344)
Franz Neubauer (CSU) (revision 148710968)
Freie Inhalte (revision 148123311)
Gabriele Ferzetti (revision 148715582)
Georg von Waldburg zu Zeil und Trauchburg (revision 148710609)
Jim Loscutoff (revision 148690370)
Katarina Witt (revision 148713884)
Klavierkonzert (Gershwin) (revision 143900338)
Ludolf Camphausen (revision 145088962)
Mark Zuckerberg (revision 148714452)
Montenegro (revision 148692773)
NATO (revision 148697872)
NATO-Osterweiterung (revision 148697354)
Nekrolog 2015 (revision 148711617)
Peter-Ulrich-Haus (revision 148654149)
Philanthropie (revision 145561255)
Präsidentschaftswahl in Burkina Faso 2015 (revision 148677453)
Québec (Stadt) (revision 148716893)
Rivka Zohar (revision 148708850)
Roch Marc Kaboré (revision 148673951)
Rubber Soul (revision 148665720)
Salve Regina (Latry) (revision 148713279)
Schießerei in San Bernardino (revision 148711974)
Single (Musik) (revision 146450210)
The Giving Pledge (revision 148711856)
Ubi primum (Benedikt XIV.) (revision 136691297)
VTech (revision 148704025)
Walter Damrosch (revision 148716127)
We Can Work It Out (revision 148706519)
1. August (revision 148089156)
1. Januar (revision 148659041)
1. Juni (revision 148375663)
1. November (revision 147888516)
10. August (revision 148079904)
10. November (revision 148658709)
10. September (revision 148201788)
11. August (revision 148315737)
11. Oktober (revision 148087353)
12. Januar (revision 147377586)
12. September (revision 148359994)
13. Dezember (revision 148614781)
13. September (revision 148320520)
14. August (revision 148513270)
14. Dezember (revision 147968142)
15. April (revision 146544147)
15. August (revision 147827975)
16. April (revision 148712866)
16. Dezember (revision 148392316)
16. Februar (revision 148221712)
16. Jahrhundert (revision 147390194)
16. Juli (revision 147928181)
1652 (revision 142931287)
1654 (revision 145531451)
1656 (revision 144194148)
1657 (revision 147492859)
1662 (revision 147548355)
1665 (revision 147757128)
1666 (revision 147843417)
1667 (revision 148566099)
1668 (revision 145304760)
1670 (revision 147643990)
1672 (revision 145296252)
1673 (revision 147879655)
1674 (revision 146784434)
1679 (revision 146069377)
1685 (revision 148596629)
1688 (revision 140370621)
1692 (revision 146892539)
1693 (revision 147464373)
17. August (revision 148288443)
17. Februar (revision 145814425)
17. Jahrhundert (revision 147869798)
17. Oktober (revision 148327370)
1700er (revision 127393249)
1707 (revision 148288721)
1710er (revision 134739897)
1720er (revision 127302296)
1730 (revision 148694277)
1730er (revision 127393280)
1731 (revision 147730204)
1735 (revision 145436596)
1736 (revision 145680122)
1737 (revision 146645905)
1738 (revision 145094942)
1739 (revision 147843445)
1740er (revision 127393296)
1741 (revision 146530178)
1742 (revision 147010984)
Wikipedia:Hauptseite (revision 201839754)
1021 (revision 209824844)
1521 (revision 209838003)
16. März (revision 209315535)
1861 (revision 209842356)
1946 (revision 209524711)
1951 (revision 209835290)
Beyoncé (revision 209832932)
Bolivien (revision 209448707)
Bund der Schweizerinnen gegen das Frauenstimmrecht (revision 209693790)
Bundesgrenzschutz (revision 208691250)
Clara Weaver Parrish (revision 209287165)
Dornmühle (Fränkisch-Crumbach) (revision 209842366)
Edmund Weiskopf (revision 209843848)
Enrico Letta (revision 209811620)
Enzyklopädie (revision 209393223)
Ferdinand Magellan (revision 209566955)
Freie Inhalte (revision 207460431)
Geschichte der Bundesrepublik Deutschland (bis 1990) (revision 209662112)
Giovanni Gastel (revision 209840651)
Henry Darrow (revision 209836134)
Heribert von Köln (revision 208577962)
Homonhon (revision 207392862)
Internationales Olympisches Komitee (revision 209815926)
Jeanine Áñez (revision 209843969)
Jeanne dArc Mujawamariya (revision 209842628)
Kommunalwahlen in Hessen 2021 (revision 209834340)
Landtagswahl in Baden-Württemberg 2021 (revision 209842530)
Mark Lubotsky (revision 209830272)
Marvelous Marvin Hagler (revision 209843820)
Max Blokzijl (revision 209843982)
Molly Pitcher (revision 209843994)
Murray Walker (revision 209841073)
März 2021 (revision 209804897)
Nekrolog 2021 (revision 207237920)
Oscarverleihung 2021 (revision 209715006)
Thomas Bach (revision 209739384)
1. Dezember (revision 209839074)
1. Januar (revision 209777781)
1. November (revision 209796293)
10. Februar (revision 209675106)
10. Mai (revision 208810425)
10. März (revision 209821650)
11. Juli (revision 209510718)
11. März (revision 209819434)
11. November (revision 209630921)
12. Dezember (revision 209724301)
12. Mai (revision 208883973)
12. März (revision 209795040)
12. September (revision 209262794)
13. Dezember (revision 209710424)
13. Januar (revision 209629276)
13. März (revision 209795132)
13. Oktober (revision 209183744)
14. Februar (revision 209414444)
14. September (revision 209562392)
16. April (revision 209621904)
19. August (revision 208018991)
1920 (revision 209819215)
1921 (revision 209733600)
1923 (revision 209799201)
1924 (revision 209534204)
1925 (revision 209632533)
1926 (revision 209684778)
1927 (revision 209374750)
1929 (revision 209747684)
1930 (revision 209715589)
1931 (revision 209767120)
1933 (revision 209704894)
1934 (revision 209767120)
1936 (revision 209834629)
1939 (revision 209524711)
1940 (revision 209524711)
1941 (revision 209524711)
1942 (revision 209524711)
1944 (revision 209505481)
1945 (revision 209524711)
1947 (revision 209505481)
1948 (revision 209767120)
1950 (revision 209655464)
1952 (revision 209572541)
1954 (revision 209187815)
1955 (revision 209259419)
1957 (revision 209842142)
1965 (revision 209593366)
1980er (revision 209258403)
1990er (revision 209258403)
2. März (revision 209835819)
2. September (revision 209803579)
20. April (revision 209655478)
20. Jahrhundert (revision 207914301)
20. Januar (revision 209517100)
== End of Parsed pages ==
- Wikipedia parsing ended at: 2015-12-03 22:50:46.517106
- Wikipedia parsing ended at: 2021-03-16 01:10:34.749053
59 characters appeared 1746165 times.
59 characters appeared 3848604 times.
First 31 characters:
[ 0] Char e: 14.27997926885489 %
[ 1] Char r: 8.696257226550754 %
[ 2] Char n: 8.464091308667852 %
[ 3] Char i: 8.258784250056554 %
[ 4] Char s: 6.690833913175444 %
[ 5] Char a: 6.370703799469123 %
[ 6] Char t: 5.925728668253001 %
[ 7] Char h: 4.540979804314025 %
[ 8] Char d: 4.367284878576767 %
[ 9] Char l: 4.083634708060234 %
[10] Char u: 3.899917819908199 %
[11] Char o: 3.6450163644329145 %
[12] Char c: 3.392405643223865 %
[13] Char m: 2.578565026787274 %
[14] Char g: 2.543631329227192 %
[15] Char b: 1.9455206123132693 %
[16] Char k: 1.7604292836014925 %
[17] Char f: 1.6422273954637734 %
[18] Char p: 1.519329502080273 %
[19] Char w: 1.0273370500496803 %
[20] Char z: 1.0037997554641171 %
[21] Char v: 0.9010603236234834 %
[22] Char ä: 0.4926224039538073 %
[23] Char j: 0.4661644231787947 %
[24] Char ü: 0.4094687500894818 %
[25] Char y: 0.34229296773214446 %
[26] Char ö: 0.3044958523392692 %
[27] Char ß: 0.14477440562604335 %
[28] Char x: 0.09918879372796958 %
[29] Char é: 0.07633871942227682 %
[30] Char q: 0.06099079983850323 %
[ 0] Char e: 13.62925362027374 %
[ 1] Char r: 9.404189155340482 %
[ 2] Char i: 8.18457809636949 %
[ 3] Char n: 7.829540269666611 %
[ 4] Char s: 6.804155480792516 %
[ 5] Char a: 6.737923673103287 %
[ 6] Char t: 5.6408765360115 %
[ 7] Char h: 4.424695292111114 %
[ 8] Char u: 4.194118178955279 %
[ 9] Char l: 4.1823216937881895 %
[10] Char d: 4.112010484840737 %
[11] Char o: 3.6970808116397533 %
[12] Char c: 3.4451453046351355 %
[13] Char m: 2.8236732072200725 %
[14] Char g: 2.3015618130626065 %
[15] Char b: 2.0475736137051253 %
[16] Char k: 1.9373258459431004 %
[17] Char p: 1.6796479970399656 %
[18] Char f: 1.6060368902594293 %
[19] Char z: 1.0385064298639195 %
[20] Char w: 0.9370410673584499 %
[21] Char v: 0.7894031186373033 %
[22] Char j: 0.6687879553209424 %
[23] Char ä: 0.5280616036360197 %
[24] Char y: 0.35885739348605367 %
[25] Char ü: 0.33731711550473886 %
[26] Char ö: 0.27194276158316105 %
[27] Char ß: 0.13979094757475696 %
[28] Char x: 0.09044838076351841 %
[29] Char é: 0.04185933392991329 %
[30] Char q: 0.02814007364748361 %
The first 31 characters have an accumulated ratio of 0.9993385504806246.
The first 31 characters have an accumulated ratio of 0.9991186414606439.
1188 sequences found.
1337 sequences found.
First 512 (typical positive ratio): 0.9934041448127945
Next 512 (512-1024): 1.1453671331174316e-06
Rest: 0.0001130256702826099
First 512 (typical positive ratio): 0.9936565191798025
Next 512 (512-1024): 0.0033731711550473885
Rest: 0.00017862552962171364
- Processing end: 2015-12-03 22:50:46.681265
- Processing end: 2021-03-16 01:10:34.853392

View File

@ -1,162 +1,162 @@
= Logs of language model for Italian (it) =
- Generated by BuildLangModel.py
- Started: 2016-09-21 18:43:12.831409
- Maximum depth: 5
- Started: 2021-03-16 01:25:53.681909
- Maximum depth: 4
- Max number of pages: 100
== Parsed pages ==
Pieve Ligure (revision 83186252)
010 (prefisso) (revision 76157203)
1000 (revision 83185341)
1143 (revision 70627567)
1162 (revision 70627612)
118 - Emergenza sanitaria (revision 83267411)
1201 (revision 77523243)
1202 (revision 76764411)
1374 (revision 78259457)
1404 (revision 70628069)
1520 (revision 76854924)
1537 (revision 70628296)
1582 (revision 80626188)
1584 (revision 76837051)
1600 (revision 76869356)
1619 (revision 70628455)
1742 (revision 70628675)
1748 (revision 70628682)
1749 (revision 70628684)
1750 (revision 70628690)
1754 (revision 70628697)
1775 (revision 70628734)
1797 (revision 78338823)
1798 (revision 82047236)
1803 (revision 77502534)
1805 (revision 79369853)
1809 (revision 70628789)
1810 (revision 82930218)
1814 (revision 78338825)
1815 (revision 82669615)
1816 (revision 83185384)
1818 (revision 72407239)
1823 (revision 74880156)
1859 (revision 83185401)
1860 (revision 83185403)
1861 (revision 83185412)
1868 (revision 83185430)
1874 (revision 83185441)
1897 (revision 83185267)
1908 (revision 83185631)
1909 (revision 83185630)
1913 (revision 83185626)
1915 (revision 83185625)
1917 (revision 83185270)
1920 (revision 83185621)
1921 (revision 83185619)
1923 (revision 83185616)
1925 (revision 83185614)
1926 (revision 83185612)
1928 (revision 83185610)
1929 (revision 83185609)
1939 (revision 83185598)
1946 (revision 83185590)
1947 (revision 83185589)
1948 (revision 83185587)
1951 (revision 83185584)
1956 (revision 83185478)
1960 (revision 83185487)
1964 (revision 83185493)
1965 (revision 83185494)
1969 (revision 83185500)
1970 (revision 83185503)
1971 (revision 83185505)
1975 (revision 83185510)
1976 (revision 83185513)
1977 (revision 83185514)
1980 (revision 83185518)
1981 (revision 83308867)
1983 (revision 83185524)
1985 (revision 83185526)
1988 (revision 83185280)
1990 (revision 83185531)
1995 (revision 83185538)
1999 (revision 83326325)
2000 (revision 83185544)
2001 (revision 83309058)
2002 (revision 83185545)
2003 (revision 83185546)
2004 (revision 83185283)
2005 (revision 83185285)
2006 (revision 83185547)
2007 (revision 83185549)
2008 (revision 83185551)
2009 (revision 83185552)
2010 (revision 83185287)
2012 (revision 83185289)
712 (revision 70630167)
749 (revision 78272323)
ATP (Provincia di Genova) (revision 82754117)
Abbazia di San Colombano (revision 83062997)
Abbazia di San Fruttuoso (revision 83288120)
Acacia dealbata (revision 83036867)
Acquedotto (revision 82973825)
Affresco (revision 82000422)
Agricoltura (revision 82578266)
Allevamento (revision 82971452)
Altitudine (revision 82971213)
Angelo (revision 82333116)
Anni 1960 (revision 83161222)
Anni 1970 (revision 81663175)
Antica Roma (revision 83125874)
Pieve Ligure (revision 118508492)
010 (prefisso) (revision 94383168)
AMT (Genova) (revision 118888771)
Abbazia di San Colombano (revision 119100076)
Abbazia di San Fruttuoso (revision 119098176)
Acacia dealbata (revision 118537500)
Affresco (revision 119234348)
Agenzia nazionale per le nuove tecnologie, l'energia e lo sviluppo economico sostenibile (revision 119261985)
Agricoltura (revision 119211593)
Altitudine (revision 118983270)
Antica Roma (revision 118468482)
Anton Maria Maragliano (revision 116868790)
Appennino Ligure (revision 117194376)
Arcidiocesi di Genova (revision 119158953)
Area (revision 118021697)
Area naturale marina protetta Portofino (revision 117836953)
Arenzano (revision 118507675)
Austria (revision 119220244)
Avegno (revision 118656626)
Bargagli (revision 118656627)
Batteria di Punta Chiappa (revision 118356835)
Battesimo (revision 118993799)
Bogliasco (revision 118656629)
Bogliasco Pieve (revision 118656629)
Borzonasca (revision 118854360)
Busalla (revision 118656635)
Calcio (sport) (revision 118995232)
Calcio a 5 (revision 118431165)
Camogli (revision 118850151)
Campo Ligure (revision 119083085)
Campomorone (revision 119226877)
Cantiere navale (revision 115540115)
Carabinieri (revision 119285803)
Carasco (revision 118801735)
Caravella (revision 118751709)
Casarza Ligure (revision 118656643)
Casella (Italia) (revision 118797269)
Castello della Dragonara (revision 108868054)
Castiglione Chiavarese (revision 118656646)
Centrismo (revision 117397211)
Centro-destra (revision 117992364)
Centrolabrus melanocercus (revision 116914326)
Ceranesi (revision 118656648)
Cesare Lanza (revision 115376996)
Chiavari (revision 119146951)
Chiesa di San Michele Arcangelo (Pieve Ligure) (revision 119097578)
Chiesa di Santa Croce (Pieve Ligure) (revision 119097599)
Chilometro quadrato (revision 116585233)
Cicagna (revision 118656655)
Circondario di Genova (revision 113691033)
Città dell'olio (revision 118165836)
Città metropolitana di Genova (revision 119014943)
Città metropolitane d'Italia (revision 119240923)
Classificazione climatica dei comuni italiani (revision 118213893)
Classificazione sismica dell'Italia (revision 118461862)
Claudio Burlando (revision 119123207)
Codice catastale (revision 116588085)
Codice postale (revision 105346722)
Cogoleto (revision 118508042)
Cogorno (revision 118962627)
Compagnia di Gesù (revision 119271066)
Comune (Italia) (revision 118913656)
Comune medievale (revision 113420512)
Comuni d'Italia (revision 119120484)
Comuni della Liguria (revision 113527316)
Comunità montana Fontanabuona (revision 105560751)
Concilio di Trento (revision 118571991)
Congresso di Vienna (revision 118881415)
Coordinate geografiche (revision 118353691)
Corallo (revision 117035534)
Coreglia Ligure (revision 118656657)
Corona (copricapo) (revision 117780990)
Cristo degli abissi (revision 117435230)
Cristoforo Colombo (revision 119014639)
Croce (revision 117653124)
Crocefieschi (revision 118656658)
Crêuza (revision 119275449)
Davagna (revision 118656659)
Decreto del presidente della Repubblica (revision 119120849)
Democrazia Cristiana (revision 119162011)
Densità di popolazione (revision 119143170)
Dipartimento di Genova (revision 118450361)
Ebano (revision 116535223)
Erba sintetica (revision 114157150)
Etnico (onomastica) (revision 117289144)
Fascia (Italia) (revision 118955929)
Favale di Malvaro (revision 118656662)
Federico Barbarossa (revision 118793984)
Fermata ferroviaria (revision 119085486)
Ferrovia Genova-Pisa (revision 119025272)
Flora (revision 110652725)
Floricoltura (revision 113487805)
Fontanigorda (revision 118803588)
Francesco Bossi (vescovo) (revision 117422608)
Frazione (geografia) (revision 119001222)
Fuso orario (revision 119022172)
Galleria (ingegneria) (revision 115407813)
Gas (revision 117414169)
Genova (revision 119208791)
Germania nazista (revision 119177156)
Giacomo il Maggiore (revision 118986303)
== End of Parsed pages ==
- Wikipedia parsing ended at: 2016-09-21 18:46:08.840718
- Wikipedia parsing ended at: 2021-03-16 01:31:12.602302
59 characters appeared 823241 times.
54 characters appeared 1487235 times.
First 34 characters:
[ 0] Char i: 11.823147778111148 %
[ 1] Char a: 11.252112078965942 %
[ 2] Char e: 10.910170897707962 %
[ 3] Char o: 8.936386793174782 %
[ 4] Char n: 7.317055394471364 %
[ 5] Char l: 6.931263141655967 %
[ 6] Char r: 6.521784021932824 %
[ 7] Char t: 6.386708145002497 %
[ 8] Char s: 4.572415610981475 %
[ 9] Char c: 4.116291584116923 %
[10] Char d: 3.9770856893667834 %
[11] Char u: 2.8944136650142545 %
[12] Char m: 2.762860450342002 %
[13] Char p: 2.6809889206198427 %
[14] Char g: 2.1493098618751985 %
[15] Char v: 1.5369739845318686 %
[16] Char b: 1.2855287819727153 %
[17] Char f: 0.9932692856648295 %
[18] Char z: 0.9664241698360504 %
[19] Char h: 0.7159507361756764 %
[20] Char q: 0.2416060424590126 %
[21] Char k: 0.18876610858788617 %
[22] Char à: 0.15596890825408355 %
[23] Char y: 0.12462936126844994 %
[24] Char è: 0.11600491229178332 %
[25] Char w: 0.10628722330398996 %
[26] Char x: 0.10312897438295712 %
[27] Char j: 0.07555503188009344 %
[28] Char ù: 0.05575524056746445 %
[29] Char ò: 0.03304014255849745 %
[30] Char é: 0.021014502436103158 %
[31] Char ì: 0.0191924357508919 %
[32] Char á: 0.004737373381549267 %
[33] Char ó: 0.003644133370422513 %
[ 0] Char i: 11.700840822062418 %
[ 1] Char e: 11.23655642854021 %
[ 2] Char a: 11.108197426768466 %
[ 3] Char o: 9.061513479712351 %
[ 4] Char n: 7.150383093458666 %
[ 5] Char l: 7.047440384337378 %
[ 6] Char t: 6.5587482812064 %
[ 7] Char r: 6.521363469794619 %
[ 8] Char s: 4.669067094305877 %
[ 9] Char c: 4.495120139049982 %
[10] Char d: 3.939861555167811 %
[11] Char u: 2.7531627483215497 %
[12] Char p: 2.6924460492121285 %
[13] Char m: 2.5125820734450173 %
[14] Char g: 1.9460273594959776 %
[15] Char v: 1.64123356429885 %
[16] Char f: 1.1068862688142762 %
[17] Char b: 1.0097933413347588 %
[18] Char z: 0.9880079476343685 %
[19] Char h: 0.7280624783574889 %
[20] Char q: 0.27574660359660713 %
[21] Char à: 0.2058854182425777 %
[22] Char è: 0.14859790147488458 %
[23] Char ò: 0.10186688721015845 %
[24] Char ù: 0.07302141221797497 %
[25] Char x: 0.06501998675394272 %
[26] Char k: 0.05291699025372587 %
[27] Char y: 0.04471384818135668 %
[28] Char w: 0.04115018810073727 %
[29] Char ì: 0.041015710361845974 %
[30] Char é: 0.024474948478216286 %
[31] Char j: 0.019028600053118707 %
[32] Char ö: 0.006791125814010562 %
[33] Char ó: 0.004505004252858493 %
The first 34 characters have an accumulated ratio of 0.9997947138201325.
The first 34 characters have an accumulated ratio of 0.9997202863031062.
872 sequences found.
921 sequences found.
First 512 (typical positive ratio): 0.9989484485502651
Next 512 (512-1024): 1.214711123474171e-06
Rest: -4.336808689942018e-17
First 512 (typical positive ratio): 0.9992462827093448
Next 512 (512-1024): 0.0007302141221797497
Rest: -2.0166160408230382e-17
- Processing end: 2016-09-21 18:46:08.920456
- Processing end: 2021-03-16 01:31:12.679004

View File

@ -1,109 +1,161 @@
= Logs of language model for Spanish (es) =
- Generated by BuildLangModel.py
- Started: 2015-12-12 18:37:37.085123
- Maximum depth: 2
- Max number of pages: 50
- Started: 2021-03-16 11:26:55.275471
- Maximum depth: 4
- Max number of pages: 100
== Parsed pages ==
Wikipedia:Portada (revision 84894710)
11 de diciembre (revision 87735970)
12 de diciembre (revision 87742023)
13 de diciembre (revision 87697780)
1474 (revision 66715698)
1915 (revision 86935345)
2000 (revision 87686385)
2015 (revision 87743360)
Actuación (revision 87459085)
Akiyuki Nosaka (revision 87726149)
Alberto Podestá (revision 87729965)
Alejandro Magno (revision 87717064)
Argentina (revision 87742018)
Arnold Peralta (revision 87733100)
Atentados del 11 de diciembre de 2007 (revision 87720544)
Cantante (revision 86761085)
Canto (revision 87664585)
Carlo Furno (revision 87726011)
Ciencia ficción (revision 87662615)
Copa Mundial de Clubes de la FIFA 2015 (revision 87734956)
Corona de Castilla (revision 87209578)
Crisis migratoria en Europa (revision 87609406)
Dictadura de Primo de Rivera (revision 87371131)
Dionisio Miguel Recio (revision 87724426)
Disneyland (revision 87665192)
Dolph Schayes (revision 87730770)
Día Internacional de las Montañas (revision 87739490)
El discurso del rey (revision 87570241)
Elecciones regionales de Francia de 2015 (revision 87744011)
Estados Unidos (revision 87510736)
Fiction House (revision 87732511)
Filoxeno de Eretria (revision 83958621)
Frank Sinatra (revision 87742871)
Fundación Wikimedia (revision 87703852)
Geoffrey Marcy (revision 87706505)
Gheorghe Gruia (revision 87737327)
Grupo de Acción Republicana (revision 87739104)
Guerra contra el Estado Islámico (revision 87648946)
Here We Go Again (canción) (revision 87680365)
Isaac Asimov (revision 87591711)
Isabel I de Castilla (revision 87743713)
John "Hot Rod" Williams (revision 87730438)
José Subirà-Puig (revision 87740413)
Julio Terrazas Sandoval (revision 87736542)
Libertad Lamarque (revision 87508996)
Mosaico de Issos (revision 87731652)
Museo Arqueológico Nacional de Nápoles (revision 87302262)
Philip K. Dick (revision 87725371)
Planet Comics (revision 86698920)
Wikipedia:Portada (revision 123425818)
15 de marzo (revision 134002604)
16 de marzo (revision 133912948)
17 de marzo (revision 134014922)
1971 (revision 133880223)
1996 (revision 133952177)
2021 (revision 134007556)
Accipitridae (revision 132476150)
Alan Tudyk (revision 133512405)
América del Norte (revision 133943336)
Aquila (animal) (revision 117806396)
Aquila chrysaetos (revision 131580419)
Aranjuez (revision 134014704)
Aurora Cornu (revision 134003763)
Beatlemanía en el Reino Unido (revision 127586904)
Bebe Daniels (revision 134008877)
Bob Walkup (revision 134014429)
Carlos IV de España (revision 133996847)
Carlos Velasco Carballo (revision 133836906)
Centre Bell (revision 121340657)
Chemancheri Kunhiraman Nair (revision 134003097)
Comunidad de Madrid (revision 133999674)
Copa Libertadores 2021 (revision 134005909)
Copa Libertadores Femenina 2020 (revision 134010574)
Derecho del consumo (revision 133897891)
Elecciones generales de los Países Bajos de 2021 (revision 133978338)
España (revision 133914408)
Especie (revision 133121989)
Estados Unidos (revision 134015635)
Eurasia (revision 133581203)
Familia (biología) (revision 132469743)
Fernando VII de España (revision 133996527)
Fernando VI de España (revision 133038817)
Frankie de la Cruz (revision 134001053)
Fundación Wikimedia (revision 133870365)
Género (biología) (revision 132578267)
Henry Darrow (revision 134014606)
Hueso oracular (revision 133943486)
Hungría (revision 133720525)
Incendios en la Patagonia argentina de 2021 (revision 134013731)
Iztapalapa (revision 133879018)
Jim Dornan (revision 134003749)
Josep Anton Codina Olivé (revision 134001289)
Laxman Pai (revision 134003882)
Lorenzo I Suárez de Figueroa (revision 130396052)
Manuel Godoy (revision 133790185)
Mark Lubotski (revision 134012323)
Marvin Hagler (revision 133992695)
Motín de Aranjuez (revision 133995861)
Orden de Santiago (revision 132833929)
Pandemia de COVID-19 (revision 133965486)
Partido judicial de Aranjuez (revision 119111968)
Pirámide de Mayo (revision 134001660)
Problema de los puentes de Königsberg (revision 133982384)
Protestas en Birmania de 2021 (revision 134001180)
Protestas en Paraguay de 2021 (revision 133946792)
Raoul Casadei (revision 134004118)
Reconquista (revision 134015214)
Reserva del Regajal-Mar de Ontígola (revision 130660000)
Reserva natural del Carrizal de Villamejor (revision 133997932)
Revolución de Octubre (revision 133949459)
Reyes Católicos (revision 133817736)
Río Jarama (revision 133989542)
Sitio de Osaka (revision 133841594)
Tajo (revision 133599349)
The Beatles (revision 133923045)
The Beatles en los Estados Unidos (revision 132489522)
Thomas E. Dewey (revision 133282206)
Tirreno-Adriático 2021 (revision 133997271)
Torneo de Acapulco 2021 (revision 134007806)
Torneo de Dubái 2021 (revision 133993636)
Wikipedia en español (revision 133678639)
Zona especial de conservación (revision 125067184)
África (revision 133842601)
Águila (revision 133882643)
0 de marzo (revision 124578320)
1086 (revision 131546903)
10 de marzo (revision 133880871)
1190 (revision 133725464)
11 de marzo (revision 133917865)
12 Horas de Sebring (revision 130945879)
12 de marzo (revision 133976376)
1311 (revision 130818429)
13 de febrero (revision 133955522)
13 de marzo (revision 133955664)
1416 (revision 130880976)
1455 (revision 130905583)
1493 (revision 130905628)
14 de marzo (revision 133988159)
1530 (revision 130937867)
1552 (revision 133149262)
1575 (revision 130983277)
1583 (revision 130984233)
1591 (revision 130984579)
1594 (revision 130984689)
15 de abril (revision 134002206)
15 de febrero (revision 133748957)
1638 (revision 131037338)
1657 (revision 131045532)
1660 (revision 131045617)
1666 (revision 132768900)
== End of Parsed pages ==
- Wikipedia parsing ended at: 2015-12-12 18:39:02.288858
- Wikipedia parsing ended at: 2021-03-16 11:33:00.148262
52 characters appeared 991829 times.
54 characters appeared 1548023 times.
First 33 characters:
[ 0] Char e: 12.571925200815867 %
[ 1] Char a: 11.81988024145291 %
[ 2] Char o: 8.07941691561751 %
[ 3] Char n: 7.234513207417812 %
[ 4] Char s: 7.042242160695039 %
[ 5] Char i: 7.040528155559072 %
[ 6] Char r: 6.8208330266608455 %
[ 7] Char l: 5.722559029832763 %
[ 8] Char d: 5.275707808503281 %
[ 9] Char t: 4.668647518876742 %
[10] Char c: 4.466999855821921 %
[11] Char u: 3.673717949364255 %
[12] Char m: 2.710547886782903 %
[13] Char p: 2.4541528832086983 %
[14] Char b: 1.3867309788280036 %
[15] Char g: 1.2748165258325779 %
[16] Char f: 0.925058654263991 %
[17] Char y: 0.9045914164639268 %
[18] Char v: 0.8877538365988492 %
[19] Char ó: 0.8641610600214351 %
[20] Char h: 0.7369213846338432 %
[21] Char q: 0.5913317719082624 %
[22] Char í: 0.5612862701130941 %
[23] Char j: 0.43283670874717317 %
[24] Char z: 0.38071078784750195 %
[25] Char á: 0.37587124393418625 %
[26] Char é: 0.29632124085905936 %
[27] Char k: 0.2001353055819098 %
[28] Char x: 0.18743150280945606 %
[29] Char ñ: 0.17462687620547493 %
[30] Char ú: 0.12865120902897575 %
[31] Char w: 0.0972949974239511 %
[32] Char ü: 0.004436248587206061 %
[ 0] Char e: 12.61415366567551 %
[ 1] Char a: 11.863648020733542 %
[ 2] Char o: 8.178496055937154 %
[ 3] Char n: 7.268238262609793 %
[ 4] Char i: 6.931486160089352 %
[ 5] Char s: 6.9263182782167965 %
[ 6] Char r: 6.65558586661826 %
[ 7] Char l: 5.899266354569667 %
[ 8] Char d: 5.353731824397958 %
[ 9] Char t: 4.62951777848262 %
[10] Char c: 4.489080588595906 %
[11] Char u: 3.50666624462298 %
[12] Char m: 2.6851022239333653 %
[13] Char p: 2.477159577086387 %
[14] Char b: 1.394552923309279 %
[15] Char g: 1.3049547713438365 %
[16] Char v: 0.9327380794729794 %
[17] Char f: 0.9320274957155029 %
[18] Char y: 0.9299603429664806 %
[19] Char ó: 0.786745416573268 %
[20] Char h: 0.7480509010525037 %
[21] Char í: 0.5318396432094356 %
[22] Char q: 0.49553527305472855 %
[23] Char z: 0.47085863711327286 %
[24] Char j: 0.408844054642599 %
[25] Char á: 0.39095026365887325 %
[26] Char é: 0.305228023097848 %
[27] Char ñ: 0.23759336909076934 %
[28] Char x: 0.19883425504659816 %
[29] Char k: 0.19489374511877408 %
[30] Char ú: 0.13029522171182212 %
[31] Char w: 0.10923610308115578 %
[32] Char ü: 0.0067182464343230035 %
The first 33 characters have an accumulated ratio of 0.9999263986029848.
The first 33 characters have an accumulated ratio of 0.9998830766726332.
897 sequences found.
1002 sequences found.
First 512 (typical positive ratio): 0.9970385677528184
Next 512 (512-1024): 1.0082383152741046e-06
Rest: 4.597017211338539e-17
First 512 (typical positive ratio): 0.9966074680689881
Next 512 (512-1024): 6.718246434323004e-05
Rest: 3.209238430557093e-17
- Processing end: 2015-12-12 18:39:02.460105
- Processing end: 2021-03-16 11:33:00.247475

View File

@ -36,3 +36,4 @@
* ***** END LICENSE BLOCK ***** */
#include "../nsSBCharSetProber.h"
#include "../nsLanguageDetector.h"

View File

@ -36,12 +36,13 @@
* ***** END LICENSE BLOCK ***** */
#include "../nsSBCharSetProber.h"
#include "../nsLanguageDetector.h"
/********* Language model for: Arabic *********/
/**
* Generated by BuildLangModel.py
* On: 2015-12-13 18:33:58.848027
* On: 2021-03-16 11:42:48.952857
**/
/* Character Mapping Table:
@ -67,17 +68,17 @@ static const unsigned char Iso_8859_6_CharToOrderMap[] =
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
SYM, 52, 72, 61, 68, 74, 69, 59, 78, 60, 90, 86, 67, 65, 71, 75, /* 4X */
64, 85, 76, 55, 57, 79, 81, 70, 82, 87, 91,SYM,SYM,SYM,SYM,SYM, /* 5X */
SYM, 37, 58, 49, 47, 38, 54, 66, 46, 39, 88, 63, 45, 51, 43, 40, /* 6X */
62, 89, 42, 44, 41, 50, 77, 73, 83, 56, 80,SYM,SYM,SYM,SYM,CTR, /* 7X */
SYM, 62, 63, 54, 67, 59, 70, 78, 76, 60, 83, 81, 77, 65, 71, 75, /* 4X */
66, 90, 68, 58, 61, 73, 69, 79, 84, 87, 88,SYM,SYM,SYM,SYM,SYM, /* 5X */
SYM, 36, 55, 46, 48, 37, 53, 56, 50, 38, 91, 64, 44, 47, 43, 45, /* 6X */
51, 80, 41, 42, 39, 52, 57, 72, 85, 49, 86,SYM,SYM,SYM,SYM,CTR, /* 7X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
SYM,ILL,ILL,ILL,SYM,ILL,ILL,ILL,ILL,ILL,ILL,ILL,SYM,SYM,ILL,ILL, /* AX */
ILL,ILL,ILL,ILL,ILL,ILL,ILL,ILL,ILL,ILL,ILL,SYM,ILL,ILL,ILL,SYM, /* BX */
ILL, 32, 34, 15, 35, 22, 31, 0, 9, 8, 7, 27, 19, 18, 25, 11, /* CX */
30, 5, 26, 12, 21, 23, 28,SYM, 33, 10, 29,ILL,ILL,ILL,ILL,ILL, /* DX */
36, 13, 14, 17, 1, 3, 6, 16, 4, 24, 2,SYM,SYM,SYM,SYM,SYM, /* EX */
ILL, 32, 34, 16, 35, 23, 31, 0, 8, 9, 7, 26, 19, 18, 25, 11, /* CX */
28, 6, 27, 12, 22, 21, 29,SYM, 33, 10, 30,ILL,ILL,ILL,ILL,ILL, /* DX */
40, 13, 15, 17, 1, 3, 5, 14, 4, 24, 2,SYM,SYM,SYM,SYM,SYM, /* EX */
SYM,SYM,SYM,ILL,ILL,ILL,ILL,ILL,ILL,ILL,ILL,ILL,ILL,ILL,ILL,ILL, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
@ -88,159 +89,172 @@ static const unsigned char Windows_1256_CharToOrderMap[] =
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
SYM, 52, 72, 61, 68, 74, 69, 59, 78, 60, 90, 86, 67, 65, 71, 75, /* 4X */
64, 85, 76, 55, 57, 79, 81, 70, 82, 87, 91,SYM,SYM,SYM,SYM,SYM, /* 5X */
SYM, 37, 58, 49, 47, 38, 54, 66, 46, 39, 88, 63, 45, 51, 43, 40, /* 6X */
62, 89, 42, 44, 41, 50, 77, 73, 83, 56, 80,SYM,SYM,SYM,SYM,CTR, /* 7X */
SYM, 48,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 95,SYM, 96, 92, 97, 98, /* 8X */
53,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 84,SYM, 99,SYM,100,SYM,SYM,101, /* 9X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,102,SYM,SYM,SYM,SYM,SYM, /* AX */
SYM, 62, 63, 54, 67, 59, 70, 78, 76, 60, 83, 81, 77, 65, 71, 75, /* 4X */
66, 90, 68, 58, 61, 73, 69, 79, 84, 87, 88,SYM,SYM,SYM,SYM,SYM, /* 5X */
SYM, 36, 55, 46, 48, 37, 53, 56, 50, 38, 91, 64, 44, 47, 43, 45, /* 6X */
51, 80, 41, 42, 39, 52, 57, 72, 85, 49, 86,SYM,SYM,SYM,SYM,CTR, /* 7X */
SYM, 74,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 97,SYM,101, 93, 98,102, /* 8X */
82,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 94,SYM,103,SYM,104,SYM,SYM,105, /* 9X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 99,SYM,SYM,SYM,SYM,SYM, /* AX */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
103, 32, 34, 15, 35, 22, 31, 0, 9, 8, 7, 27, 19, 18, 25, 11, /* CX */
30, 5, 26, 12, 21, 23, 28,SYM, 20, 33, 10, 29, 36, 13, 14, 17, /* DX */
104, 1, 93, 3, 6, 16, 4,105,106, 94,107,108, 24, 2,109,110, /* EX */
SYM,SYM,SYM,SYM,111,SYM,SYM,SYM,SYM,112,SYM,113,114,SYM,SYM,115, /* FX */
106, 32, 34, 16, 35, 23, 31, 0, 8, 9, 7, 26, 19, 18, 25, 11, /* CX */
28, 6, 27, 12, 22, 21, 29,SYM, 20, 33, 10, 30, 40, 13, 15, 17, /* DX */
107, 1, 96, 3, 5, 14, 4,108,109, 89,110,111, 24, 2,100,112, /* EX */
SYM,SYM,SYM,SYM, 95,SYM,SYM,SYM,SYM,113,SYM,114, 92,SYM,SYM,115, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
static const int Unicode_Char_size = 64;
static const unsigned int Unicode_CharOrder[] =
{
65, 62, 66, 63, 67, 54, 69, 59, 73, 60, 83, 58, 84, 61, 97, 36,
98, 55, 99, 46, 100, 48, 101, 37, 102, 53, 103, 56, 104, 50, 105, 38,
108, 44, 109, 47, 110, 43, 111, 45, 112, 51, 114, 41, 115, 42, 116, 39,
117, 52, 118, 57, 121, 49, 1569, 32, 1570, 34, 1571, 16, 1572, 35,1573, 23,
1574, 31, 1575, 0, 1576, 8, 1577, 9, 1578, 7, 1579, 26, 1580, 19,1581, 18,
1582, 25, 1583, 11, 1584, 28, 1585, 6, 1586, 27, 1587, 12, 1588, 22,1589, 21,
1590, 29, 1591, 20, 1592, 33, 1593, 10, 1594, 30, 1600, 40, 1601, 13,1602, 15,
1603, 17, 1604, 1, 1605, 3, 1606, 5, 1607, 14, 1608, 4, 1609, 24,1610, 2,
};
/* Model Table:
* Total sequences: 1479
* First 512 sequences: 0.9696025116913417
* Next 512 sequences (512-1024): 0.029166911858880054
* Rest: 0.0012305764497782395
* Total sequences: 1820
* First 512 sequences: 0.9644868613755061
* Next 512 sequences (512-1024): 0.03359397057105059
* Rest: 0.0019191680534433112
* Negative sequences: TODO
*/
static const PRUint8 ArabicLangModel[] =
{
2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,3,3,3,3,3,1,3,1,3,3,3,3,2,2,3,
3,2,1,2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
1,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,3,3,3,3,3,1,1,3,3,3,3,3,3,3,
3,2,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,
1,2,3,2,2,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,0,3,1,3,3,3,3,2,2,3,
2,2,0,2,2,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,1,3,2,3,3,3,2,2,2,2,
0,2,1,3,1,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,2,3,2,0,0,0,0,2,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,1,2,3,3,3,2,3,2,2,
2,2,0,2,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,0,2,3,3,2,3,2,3,2,
0,2,2,3,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,
2,2,2,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,1,2,3,2,3,2,3,3,2,3,
1,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
3,2,3,3,3,2,2,3,3,3,3,3,3,3,3,1,3,2,3,3,3,3,0,3,2,2,3,2,2,2,3,2,
0,3,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,2,3,3,2,2,
0,3,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2,0,0,0,0,0,0,0,0,1,3,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
2,2,2,1,0,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0,0,0,1,1,0,0,1,1,1,1,1,1,
3,2,3,3,3,2,2,3,3,3,3,3,3,3,3,3,1,3,3,3,3,3,3,1,2,2,2,3,3,2,3,2,
0,3,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,
3,2,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,1,3,2,2,3,2,3,3,3,
2,0,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,3,2,3,2,3,3,2,
0,2,2,2,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,2,3,2,3,2,2,2,2,2,2,2,2,2,
1,2,2,2,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2,0,1,0,1,0,0,1,1,0,1,1,1,1,1,0,1,0,1,1,0,1,0,1,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,2,3,3,2,2,1,2,2,2,2,2,2,2,
1,2,2,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,0,3,2,0,2,2,3,0,3,2,0,3,3,3,0,2,0,
0,3,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,2,2,0,2,0,0,3,3,2,3,0,2,0,2,
2,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,2,3,3,2,3,3,1,0,0,2,2,0,1,0,1,0,1,
0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
3,3,3,2,3,3,3,3,3,2,3,2,3,2,3,2,3,2,2,2,2,2,2,2,2,2,2,1,3,2,2,2,
1,3,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,0,2,0,2,1,3,2,0,3,2,0,2,0,3,0,2,0,
0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,3,3,3,3,3,3,3,2,3,3,3,3,3,3,0,3,3,3,3,3,3,0,3,2,3,2,3,2,3,2,2,
3,3,3,3,3,3,3,3,3,3,0,3,3,2,3,3,0,2,0,2,2,3,3,0,2,0,3,3,2,3,0,0,
0,3,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,2,3,2,2,2,0,0,2,1,3,3,3,2,0,0,2,2,
2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,2,2,3,2,2,3,0,0,0,2,2,1,1,0,0,1,1,
0,0,0,2,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,
3,3,3,2,3,3,3,3,3,3,3,2,3,2,3,3,3,3,2,2,2,3,2,3,2,2,1,2,2,3,2,2,
1,3,1,2,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,2,3,3,0,3,2,2,2,2,0,2,0,3,2,0,2,0,2,0,0,2,3,2,0,0,
0,1,0,2,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,2,0,1,2,1,3,3,2,0,2,0,1,2,2,2,0,0,
0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,0,3,3,3,3,3,3,0,2,3,3,2,2,2,3,2,
0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,2,3,3,1,3,2,1,2,0,2,2,0,3,2,2,0,0,2,0,2,1,2,0,3,0,
0,1,0,2,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,3,3,3,2,2,3,2,2,2,2,2,2,2,2,2,2,1,0,2,3,3,0,1,3,0,
0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,2,3,3,2,1,3,3,3,3,0,2,3,0,3,2,2,0,3,2,0,3,2,3,0,2,0,
0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,3,3,3,3,3,2,2,0,2,3,1,2,1,0,1,0,0,1,0,3,2,0,2,2,2,
0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,2,3,3,3,2,3,3,2,2,3,2,3,2,2,0,2,1,2,1,1,0,2,1,0,0,0,1,0,2,
1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,2,3,3,3,3,2,3,2,3,3,2,1,2,2,2,3,3,2,2,2,0,0,0,2,3,1,0,0,2,1,2,
0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,3,3,3,1,2,3,2,0,2,3,3,3,2,3,0,2,2,2,3,2,2,0,3,0,2,2,2,3,2,3,1,
3,3,3,3,3,3,3,3,3,3,2,2,3,2,2,2,2,2,2,2,1,1,2,1,1,2,3,3,3,1,2,1,
0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,2,3,3,3,3,0,3,3,3,2,3,0,3,1,3,2,3,2,0,2,0,2,2,2,3,0,0,
0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,2,2,1,2,1,0,0,1,0,1,0,1,3,2,0,2,2,
0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,2,3,3,3,2,3,3,3,2,2,2,2,3,2,0,2,0,2,0,1,0,2,1,0,1,0,0,2,2,
1,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,2,3,3,2,3,0,3,2,2,0,1,3,0,2,2,0,0,2,2,0,0,0,0,2,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,2,3,3,3,2,3,3,3,2,3,2,1,3,3,2,2,3,2,2,2,0,1,0,2,3,0,0,2,0,2,2,
0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,3,3,3,1,3,2,2,2,0,3,2,3,2,2,3,0,2,2,2,2,2,2,0,0,2,2,2,3,3,2,0,
0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,2,3,3,2,3,0,3,2,0,2,1,3,0,2,0,0,2,2,2,0,0,0,2,0,0,
0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,2,3,2,3,2,3,2,2,0,0,2,0,0,1,3,2,0,3,0,1,2,0,2,0,2,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,2,2,3,3,2,2,0,2,2,1,2,2,2,2,0,0,0,0,1,2,2,0,0,1,0,2,
2,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,2,3,2,2,1,1,2,3,1,2,2,0,0,0,0,0,0,1,0,0,2,0,1,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,2,2,3,2,3,2,0,2,0,1,2,0,2,1,2,0,0,0,2,2,0,0,0,2,0,2,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,2,2,3,2,1,2,2,2,0,0,2,0,0,2,2,1,0,2,1,0,2,0,2,0,2,0,
0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,2,2,2,2,2,2,2,0,0,0,2,2,0,3,3,0,2,0,0,0,0,2,2,0,0,0,0,0,1,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,2,3,2,2,3,2,2,2,2,2,2,0,2,2,2,2,2,2,0,1,0,1,2,0,1,1,1,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2,0,1,1,1,0,0,2,2,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,2,2,3,2,2,1,2,3,2,0,0,0,2,0,0,3,0,0,0,1,0,0,0,2,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,3,2,2,2,3,2,2,0,2,0,2,2,2,2,0,1,2,1,1,0,2,0,1,0,3,1,2,0,1,2,1,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2,2,2,2,3,2,1,2,1,1,0,2,2,0,2,0,2,2,0,0,0,2,0,0,2,2,1,2,0,0,0,0,
0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,2,3,3,3,2,1,3,2,2,2,0,0,0,0,1,3,3,2,0,0,1,0,2,3,2,0,0,
0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,2,2,3,2,2,1,2,2,3,2,1,0,0,1,0,0,0,1,0,1,0,0,0,1,0,
0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,2,2,3,2,2,1,2,2,2,1,2,2,2,0,0,0,0,2,2,0,1,0,0,1,2,
2,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,2,2,2,2,2,2,2,2,0,0,2,3,1,0,3,1,2,0,0,0,0,2,1,0,0,0,0,0,1,
0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,0,1,0,1,1,1,0,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,
0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,2,2,1,2,2,2,2,2,1,2,0,2,1,2,0,0,1,0,1,0,1,0,0,0,1,1,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,2,2,1,1,2,2,2,2,2,0,2,0,2,1,2,0,0,1,0,0,0,2,0,0,0,1,2,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,2,2,1,2,2,2,2,2,2,0,2,0,2,1,2,0,0,1,0,0,0,1,0,0,0,1,1,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,1,0,1,2,2,2,2,2,1,1,2,0,2,2,2,0,0,2,0,0,0,1,0,0,0,2,2,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,2,2,2,2,2,2,1,2,2,2,1,0,1,1,1,0,0,0,0,2,0,2,0,0,0,1,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,2,2,2,2,2,1,2,2,2,0,1,0,2,1,2,0,0,0,0,2,0,1,0,0,0,0,2,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,2,2,2,2,2,0,1,2,1,1,2,0,2,1,0,0,0,1,0,1,0,0,0,0,0,0,1,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,2,2,2,1,2,0,0,2,1,2,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,1,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,2,2,2,2,1,0,0,1,2,0,2,0,0,1,0,0,0,1,1,1,0,1,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,2,2,2,2,2,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,2,2,2,1,1,1,1,1,0,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,
2,2,1,0,2,2,1,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,2,3,3,3,1,3,2,2,2,3,2,0,2,2,0,0,0,2,2,2,0,0,0,2,2,0,0,0,0,2,1,
0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,3,2,3,0,2,2,2,2,0,0,1,0,1,2,1,2,0,1,1,0,2,2,2,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2,3,3,2,2,2,3,2,2,3,2,3,2,2,2,2,0,2,2,2,2,2,2,0,1,0,0,2,2,1,1,0,
0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2,1,0,1,1,0,0,2,0,2,0,0,1,0,2,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,2,2,3,2,2,3,2,1,2,1,0,0,2,3,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,3,2,2,2,3,2,2,2,1,0,2,2,2,1,0,0,2,2,1,0,0,2,0,0,3,2,2,2,0,1,1,
1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2,2,2,2,2,2,2,2,0,1,0,2,2,0,2,2,0,2,0,1,0,0,2,0,1,2,2,0,1,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,2,2,2,2,2,1,0,0,1,2,0,0,1,2,0,0,0,0,0,1,0,0,0,0,0,0,1,
0,0,0,0,1,2,2,2,0,2,2,2,2,1,2,2,2,2,1,1,1,1,1,2,2,1,0,0,0,1,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,1,1,1,0,2,2,2,2,1,0,2,0,1,0,2,0,0,0,0,0,0,2,0,0,0,0,1,
0,0,0,0,2,1,1,2,0,2,2,2,2,2,2,2,2,1,1,2,2,2,0,2,2,2,0,0,1,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,2,2,2,2,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,2,0,0,0,0,0,1,0,
0,0,0,0,2,2,1,2,0,2,2,2,2,2,2,2,2,1,0,1,1,1,0,2,2,2,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,2,2,2,1,0,2,2,1,1,2,1,1,0,2,2,1,2,1,1,0,0,1,0,0,1,0,0,0,
1,1,0,1,0,1,0,0,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,1,1,0,0,1,0,1,1,
0,0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,2,2,2,2,0,1,2,2,1,2,1,2,2,1,2,1,2,0,1,1,1,1,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,2,2,2,2,0,0,2,1,1,2,1,1,0,1,1,2,1,0,0,0,1,1,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,2,2,2,2,0,1,2,2,1,2,2,1,2,1,0,0,2,1,1,0,2,1,1,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,2,2,2,1,0,0,1,0,2,2,1,1,1,1,0,0,1,1,0,1,0,0,1,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,1,1,1,2,0,2,2,2,2,1,1,2,2,1,1,2,2,2,0,1,2,2,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,2,2,2,2,0,1,0,0,1,2,1,0,2,1,2,0,2,1,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,2,2,2,0,0,0,1,1,0,2,1,2,0,1,1,1,2,0,0,2,0,1,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,2,2,2,0,0,1,1,0,0,2,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,2,2,2,1,1,0,0,0,1,0,0,0,0,0,0,0,1,1,0,1,0,0,1,
2,2,2,0,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,2,1,0,0,0,1,2,1,2,1,1,0,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,1,2,1,0,1,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
0,0,0,0,2,2,2,2,0,2,1,1,0,2,1,0,0,1,0,0,1,0,0,2,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,2,1,0,2,2,0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,1,0,0,1,2,2,0,
0,0,0,0,2,2,1,1,0,2,2,1,2,2,0,0,0,0,2,1,1,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,1,0,1,0,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,1,1,2,2,0,2,2,2,2,1,1,2,1,1,1,1,1,1,0,1,1,1,0,0,0,1,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,1,2,1,0,1,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,
0,0,0,0,1,1,1,2,0,2,0,0,1,1,1,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,2,0,1,1,0,1,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,
0,0,0,0,1,1,1,0,0,1,0,0,1,2,0,0,0,1,1,0,1,1,1,0,0,0,1,1,1,1,1,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,2,0,0,2,0,2,0,0,1,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,
0,0,0,0,2,1,1,0,0,1,1,1,2,1,1,0,1,1,0,0,1,2,0,1,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,1,0,2,1,1,0,0,0,0,0,0,1,0,0,2,0,1,0,2,0,0,0,0,
0,0,0,0,2,2,1,0,0,1,1,1,1,1,0,0,0,1,2,0,1,0,0,1,1,0,1,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,
0,0,0,0,1,2,2,1,0,0,1,0,0,1,0,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,2,1,0,1,0,0,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,
0,0,0,0,1,1,1,1,0,0,0,1,0,1,1,0,0,1,1,0,2,0,0,0,0,1,1,1,1,0,1,1,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,1,1,1,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,
0,0,0,0,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,0,0,0,1,0,0,0,1,1,1,1,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,1,0,1,1,2,0,0,1,1,0,0,0,0,0,0,1,1,0,1,2,1,1,1,1,0,
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,1,1,1,0,0,1,0,0,0,1,0,0,0,1,2,0,1,0,1,0,0,0,0,0,0,0,1,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,1,0,1,0,1,1,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,1,0,1,1,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,1,1,1,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,
};
@ -249,7 +263,7 @@ const SequenceModel Iso_8859_6ArabicModel =
Iso_8859_6_CharToOrderMap,
ArabicLangModel,
64,
(float)0.9696025116913417,
(float)0.9644868613755061,
PR_FALSE,
"ISO-8859-6",
"ar"
@ -260,8 +274,18 @@ const SequenceModel Windows_1256ArabicModel =
Windows_1256_CharToOrderMap,
ArabicLangModel,
64,
(float)0.9696025116913417,
(float)0.9644868613755061,
PR_FALSE,
"WINDOWS-1256",
"ar"
};
const LanguageModel ArabicModel =
{
"ar",
Unicode_CharOrder,
64,
ArabicLangModel,
64,
(float)0.9644868613755061,
};

View File

@ -36,12 +36,13 @@
* ***** END LICENSE BLOCK ***** */
#include "../nsSBCharSetProber.h"
#include "../nsLanguageDetector.h"
/********* Language model for: Danish *********/
/**
* Generated by BuildLangModel.py
* On: 2016-02-19 17:56:42.163975
* On: 2021-03-16 01:36:49.098484
**/
/* Character Mapping Table:
@ -67,18 +68,18 @@ static const unsigned char Iso_8859_15_CharToOrderMap[] =
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
SYM, 4, 15, 24, 7, 0, 13, 10, 18, 5, 23, 11, 8, 12, 2, 9, /* 4X */
17, 29, 1, 6, 3, 16, 14, 25, 27, 20, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */
SYM, 4, 15, 24, 7, 0, 13, 10, 18, 5, 23, 11, 8, 12, 2, 9, /* 6X */
17, 29, 1, 6, 3, 16, 14, 25, 27, 20, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */
SYM, 4, 16, 23, 7, 0, 13, 10, 18, 5, 24, 12, 9, 11, 2, 8, /* 4X */
17, 29, 1, 6, 3, 15, 14, 25, 27, 22, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */
SYM, 4, 16, 23, 7, 0, 13, 10, 18, 5, 24, 12, 9, 11, 2, 8, /* 6X */
17, 29, 1, 6, 3, 15, 14, 25, 27, 22, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
SYM,SYM,SYM,SYM,SYM,SYM, 39,SYM, 39,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
SYM,SYM,SYM,SYM, 53, 42,SYM,SYM, 54,SYM,SYM,SYM, 55, 56, 57,SYM, /* BX */
58, 33, 40, 35, 32, 21, 22, 38, 41, 28, 49, 45, 59, 34, 60, 50, /* CX */
43, 47, 51, 36, 52, 61, 30,SYM, 19, 62, 37, 44, 31, 46, 63, 48, /* DX */
64, 33, 40, 35, 32, 21, 22, 38, 41, 28, 49, 45, 65, 34, 66, 50, /* EX */
43, 47, 51, 36, 52, 67, 30,SYM, 19, 68, 37, 44, 31, 46, 69, 70, /* FX */
SYM,SYM,SYM,SYM,SYM,SYM, 40,SYM, 40,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
SYM,SYM,SYM,SYM, 56, 52,SYM,SYM, 56,SYM,SYM,SYM, 57, 58, 59,SYM, /* BX */
41, 32, 48, 60, 33, 21, 20, 37, 34, 28, 39, 46, 43, 36, 53, 45, /* CX */
49, 54, 47, 35, 42, 61, 30,SYM, 19, 55, 38, 62, 31, 51, 50, 44, /* DX */
41, 32, 48, 63, 33, 21, 20, 37, 34, 28, 39, 46, 43, 36, 53, 45, /* EX */
49, 54, 47, 35, 42, 64, 30,SYM, 19, 55, 38, 65, 31, 51, 50, 66, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
@ -88,18 +89,18 @@ static const unsigned char Iso_8859_1_CharToOrderMap[] =
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
SYM, 4, 15, 24, 7, 0, 13, 10, 18, 5, 23, 11, 8, 12, 2, 9, /* 4X */
17, 29, 1, 6, 3, 16, 14, 25, 27, 20, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */
SYM, 4, 15, 24, 7, 0, 13, 10, 18, 5, 23, 11, 8, 12, 2, 9, /* 6X */
17, 29, 1, 6, 3, 16, 14, 25, 27, 20, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */
SYM, 4, 16, 23, 7, 0, 13, 10, 18, 5, 24, 12, 9, 11, 2, 8, /* 4X */
17, 29, 1, 6, 3, 15, 14, 25, 27, 22, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */
SYM, 4, 16, 23, 7, 0, 13, 10, 18, 5, 24, 12, 9, 11, 2, 8, /* 6X */
17, 29, 1, 6, 3, 15, 14, 25, 27, 22, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
SYM,SYM,SYM,SYM,SYM, 42,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
71, 33, 40, 35, 32, 21, 22, 38, 41, 28, 49, 45, 72, 34, 73, 50, /* CX */
43, 47, 51, 36, 52, 74, 30,SYM, 19, 75, 37, 44, 31, 46, 76, 48, /* DX */
77, 33, 40, 35, 32, 21, 22, 38, 41, 28, 49, 45, 78, 34, 79, 50, /* EX */
43, 47, 51, 36, 52, 80, 30,SYM, 19, 81, 37, 44, 31, 46, 82, 83, /* FX */
SYM,SYM,SYM,SYM,SYM, 52,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
41, 32, 48, 67, 33, 21, 20, 37, 34, 28, 39, 46, 43, 36, 53, 45, /* CX */
49, 54, 47, 35, 42, 68, 30,SYM, 19, 55, 38, 69, 31, 51, 50, 44, /* DX */
41, 32, 48, 70, 33, 21, 20, 37, 34, 28, 39, 46, 43, 36, 53, 45, /* EX */
49, 54, 47, 35, 42, 71, 30,SYM, 19, 55, 38, 72, 31, 51, 50, 73, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
@ -109,61 +110,74 @@ static const unsigned char Windows_1252_CharToOrderMap[] =
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
SYM, 4, 15, 24, 7, 0, 13, 10, 18, 5, 23, 11, 8, 12, 2, 9, /* 4X */
17, 29, 1, 6, 3, 16, 14, 25, 27, 20, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */
SYM, 4, 15, 24, 7, 0, 13, 10, 18, 5, 23, 11, 8, 12, 2, 9, /* 6X */
17, 29, 1, 6, 3, 16, 14, 25, 27, 20, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */
SYM,ILL,SYM, 84,SYM,SYM,SYM,SYM,SYM,SYM, 39,SYM, 85,ILL, 86,ILL, /* 8X */
ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 39,SYM, 87,ILL, 88, 89, /* 9X */
SYM, 4, 16, 23, 7, 0, 13, 10, 18, 5, 24, 12, 9, 11, 2, 8, /* 4X */
17, 29, 1, 6, 3, 15, 14, 25, 27, 22, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */
SYM, 4, 16, 23, 7, 0, 13, 10, 18, 5, 24, 12, 9, 11, 2, 8, /* 6X */
17, 29, 1, 6, 3, 15, 14, 25, 27, 22, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */
SYM,ILL,SYM, 74,SYM,SYM,SYM,SYM,SYM,SYM, 40,SYM, 75,ILL, 56,ILL, /* 8X */
ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 40,SYM, 76,ILL, 56, 77, /* 9X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
SYM,SYM,SYM,SYM,SYM, 42,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
90, 33, 40, 35, 32, 21, 22, 38, 41, 28, 49, 45, 91, 34, 92, 50, /* CX */
43, 47, 51, 36, 52, 93, 30,SYM, 19, 94, 37, 44, 31, 46, 95, 48, /* DX */
96, 33, 40, 35, 32, 21, 22, 38, 41, 28, 49, 45, 97, 34, 98, 50, /* EX */
43, 47, 51, 36, 52, 99, 30,SYM, 19,100, 37, 44, 31, 46,101,102, /* FX */
SYM,SYM,SYM,SYM,SYM, 52,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
41, 32, 48, 78, 33, 21, 20, 37, 34, 28, 39, 46, 43, 36, 53, 45, /* CX */
49, 54, 47, 35, 42, 79, 30,SYM, 19, 55, 38, 80, 31, 51, 50, 44, /* DX */
41, 32, 48, 81, 33, 21, 20, 37, 34, 28, 39, 46, 43, 36, 53, 45, /* EX */
49, 54, 47, 35, 42, 82, 30,SYM, 19, 55, 38, 83, 31, 51, 50, 84, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
static const int Unicode_Char_size = 60;
static const unsigned int Unicode_CharOrder[] =
{
65, 4, 66, 16, 67, 23, 68, 7, 69, 0, 70, 13, 71, 10, 72, 18,
73, 5, 74, 24, 75, 12, 76, 9, 77, 11, 78, 2, 79, 8, 80, 17,
81, 29, 82, 1, 83, 6, 84, 3, 85, 15, 86, 14, 87, 25, 88, 27,
89, 22, 90, 26, 97, 4, 98, 16, 99, 23, 100, 7, 101, 0,102, 13,
103, 10, 104, 18, 105, 5, 106, 24, 107, 12, 108, 9, 109, 11,110, 2,
111, 8, 112, 17, 113, 29, 114, 1, 115, 6, 116, 3, 117, 15,118, 14,
119, 25, 120, 27, 121, 22, 122, 26, 197, 21, 198, 20, 201, 28,216, 19,
229, 21, 230, 20, 233, 28, 248, 19,
};
/* Model Table:
* Total sequences: 964
* First 512 sequences: 0.9968082796759031
* Next 512 sequences (512-1024): 0.0031917203240968304
* Rest: 3.903127820947816e-17
* Total sequences: 936
* First 512 sequences: 0.9962304038307248
* Next 512 sequences (512-1024): 0.003769596169275244
* Rest: -5.2909066017292616e-17
* Negative sequences: TODO
*/
static const PRUint8 DanishLangModel[] =
{
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,3,3,3,2,3,0,2,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,2,2,2,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,3,3,3,3,2,3,0,2,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,2,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,0,2,3,3,3,3,3,2,2,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,2,3,3,2,3,3,2,2,
3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,2,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,2,0,3,3,3,3,3,3,0,2,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,3,3,2,3,3,2,2,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,0,2,2,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,0,2,2,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,0,2,2,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,3,3,3,2,2,0,0,
3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,2,3,3,3,3,3,3,2,2,2,2,3,2,
3,3,3,3,3,3,3,2,3,3,2,3,3,2,3,2,3,2,3,3,3,3,3,2,2,2,2,2,0,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,2,3,0,
3,3,3,3,3,3,3,2,3,3,3,2,2,3,3,3,3,2,3,3,3,3,3,3,2,2,2,2,2,0,
3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,3,2,2,2,2,3,3,3,2,2,0,0,2,0,
3,3,3,3,3,3,3,2,3,3,2,2,2,2,2,3,3,2,2,3,3,3,3,3,2,2,0,0,2,0,
3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,2,3,3,2,3,0,2,2,3,2,3,3,0,2,
3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,3,3,2,3,3,2,2,0,2,0,2,0,
3,3,3,3,3,3,2,2,3,3,2,2,3,2,3,2,3,2,2,3,3,3,3,3,2,3,2,2,2,0,
3,3,3,3,2,2,3,3,3,2,3,3,3,2,3,3,0,2,2,2,2,0,0,3,0,0,2,0,0,0,
3,3,3,3,3,2,3,3,3,3,3,3,3,2,3,3,3,3,2,2,2,0,0,0,2,2,2,0,0,0,
3,3,3,3,2,0,3,3,3,2,3,3,2,2,3,3,0,2,2,2,0,0,0,0,0,0,0,0,0,0,
2,3,3,3,0,3,3,3,3,2,3,3,3,3,3,3,2,2,2,0,0,0,0,0,2,0,0,0,0,0,
3,3,2,3,3,3,3,3,3,3,2,2,2,2,2,2,3,2,2,3,3,2,3,2,2,0,0,0,0,0,
3,3,2,3,3,3,2,2,3,3,2,3,2,2,0,2,3,2,3,0,3,0,0,2,3,2,2,0,2,2,
3,2,2,2,3,3,2,2,2,3,0,2,2,2,0,2,2,0,2,0,2,0,0,0,2,2,2,0,0,0,
3,2,2,2,3,3,2,2,0,3,0,2,2,0,0,2,2,2,2,2,2,0,0,2,2,0,2,0,0,0,
3,2,0,2,2,3,2,0,2,2,0,0,2,2,2,2,2,2,2,2,0,0,0,0,2,2,0,0,2,0,
2,3,2,2,2,0,2,2,2,2,2,2,2,0,2,2,0,2,0,0,0,0,0,0,2,0,0,0,0,0,
0,0,0,0,3,2,2,2,2,2,0,0,0,0,2,2,3,0,2,0,0,0,0,0,0,0,0,0,0,2,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,0,2,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,0,3,3,3,3,3,2,0,2,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,2,2,
3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,3,3,3,3,2,3,2,2,0,2,2,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,0,2,0,
3,3,3,3,3,3,3,2,3,3,2,3,3,2,3,3,2,2,3,3,3,3,3,2,2,2,0,0,2,0,
3,3,2,3,3,3,3,2,3,3,3,2,2,3,3,3,3,2,3,3,3,3,3,2,3,2,0,3,2,0,
3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,3,3,2,3,2,3,3,2,0,3,0,2,0,0,2,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,2,0,2,3,2,2,2,2,0,2,
3,3,3,3,3,3,3,3,3,3,0,2,2,2,0,3,3,2,2,3,3,3,3,2,3,2,2,0,2,0,
3,3,3,3,3,3,3,3,3,3,3,2,2,3,2,3,3,3,3,2,3,3,2,2,2,0,2,0,2,0,
3,3,3,3,3,3,3,2,3,2,2,3,2,2,3,3,2,2,2,3,3,3,3,2,3,2,0,0,2,0,
3,3,3,3,2,2,3,3,0,3,3,3,3,3,3,2,3,2,2,0,0,0,2,2,3,0,0,0,0,0,
2,3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,3,2,2,0,0,0,0,2,0,0,0,0,0,0,
3,3,3,3,0,0,3,3,2,3,2,2,3,2,3,0,3,2,2,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,2,3,3,3,3,3,3,3,2,3,2,3,3,2,2,0,0,0,2,2,2,2,0,0,0,
3,3,2,3,3,3,2,2,3,3,2,2,3,2,2,3,2,2,3,0,3,0,3,3,0,0,2,0,2,2,
3,3,2,3,3,3,3,3,3,3,2,2,2,2,2,3,3,2,2,3,3,2,3,0,0,2,2,0,2,0,
3,2,2,2,3,3,2,2,3,2,0,2,2,2,0,2,2,0,3,0,2,0,2,2,0,2,0,0,0,0,
3,2,2,2,3,3,2,2,3,0,2,2,2,0,2,2,2,2,2,0,0,0,2,2,2,2,2,2,0,0,
3,2,2,2,3,3,2,0,2,2,0,0,0,2,2,2,2,2,0,0,0,0,0,2,0,2,0,2,0,0,
2,2,3,2,2,0,2,2,2,2,2,2,0,0,2,0,2,2,0,0,0,0,0,2,0,0,2,0,0,0,
0,2,0,0,2,2,0,2,2,2,0,0,2,2,0,3,2,0,0,0,0,0,0,0,0,0,0,0,0,2,
};
@ -172,7 +186,7 @@ const SequenceModel Iso_8859_15DanishModel =
Iso_8859_15_CharToOrderMap,
DanishLangModel,
30,
(float)0.9968082796759031,
(float)0.9962304038307248,
PR_TRUE,
"ISO-8859-15",
"da"
@ -183,7 +197,7 @@ const SequenceModel Iso_8859_1DanishModel =
Iso_8859_1_CharToOrderMap,
DanishLangModel,
30,
(float)0.9968082796759031,
(float)0.9962304038307248,
PR_TRUE,
"ISO-8859-1",
"da"
@ -194,8 +208,18 @@ const SequenceModel Windows_1252DanishModel =
Windows_1252_CharToOrderMap,
DanishLangModel,
30,
(float)0.9968082796759031,
(float)0.9962304038307248,
PR_TRUE,
"WINDOWS-1252",
"da"
};
const LanguageModel DanishModel =
{
"da",
Unicode_CharOrder,
60,
DanishLangModel,
30,
(float)0.9962304038307248,
};

View File

@ -36,12 +36,13 @@
* ***** END LICENSE BLOCK ***** */
#include "../nsSBCharSetProber.h"
#include "../nsLanguageDetector.h"
/********* Language model for: French *********/
/**
* Generated by BuildLangModel.py
* On: 2015-12-03 21:10:27.685575
* On: 2021-03-16 01:24:27.092782
**/
/* Character Mapping Table:
@ -61,24 +62,24 @@
* ligature of 'o' and 'e' exists in ISO-8859-15 but not in ISO-8859-1
* even though they are both used for French. Same for the euro sign.
*/
static const unsigned char Windows_1252_CharToOrderMap[] =
static const unsigned char Iso_8859_15_CharToOrderMap[] =
{
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
SYM, 2, 18, 11, 10, 0, 17, 15, 19, 4, 25, 26, 7, 13, 3, 8, /* 4X */
12, 20, 5, 1, 6, 9, 16, 30, 21, 22, 29,SYM,SYM,SYM,SYM,SYM, /* 5X */
SYM, 2, 18, 11, 10, 0, 17, 15, 19, 4, 25, 26, 7, 13, 3, 8, /* 6X */
12, 20, 5, 1, 6, 9, 16, 30, 21, 22, 29,SYM,SYM,SYM,SYM,CTR, /* 7X */
SYM,ILL,SYM, 56,SYM,SYM,SYM,SYM,SYM,SYM, 51,SYM, 35,ILL, 57,ILL, /* 8X */
ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 51,SYM, 35,ILL, 58, 59, /* 9X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
SYM,SYM,SYM,SYM,SYM, 60,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
24, 38, 32, 46, 49, 61, 47, 27, 23, 14, 28, 41, 62, 39, 33, 36, /* CX */
48, 45, 54, 40, 31, 55, 42,SYM, 52, 37, 43, 34, 44, 53, 50, 63, /* DX */
24, 38, 32, 46, 49, 64, 47, 27, 23, 14, 28, 41, 65, 39, 33, 36, /* EX */
48, 45, 54, 40, 31, 55, 42,SYM, 52, 37, 43, 34, 44, 53, 50, 66, /* FX */
SYM, 1, 18, 11, 10, 0, 17, 16, 19, 3, 25, 26, 7, 12, 4, 8, /* 4X */
13, 20, 5, 2, 6, 9, 15, 31, 22, 21, 27,SYM,SYM,SYM,SYM,SYM, /* 5X */
SYM, 1, 18, 11, 10, 0, 17, 16, 19, 3, 25, 26, 7, 12, 4, 8, /* 6X */
13, 20, 5, 2, 6, 9, 15, 31, 22, 21, 27,SYM,SYM,SYM,SYM,CTR, /* 7X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
SYM,SYM,SYM,SYM,SYM,SYM, 49,SYM, 49,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
SYM,SYM,SYM,SYM, 57, 58,SYM,SYM, 59,SYM,SYM,SYM, 37, 37, 53,SYM, /* BX */
23, 40, 34, 47, 48, 55, 43, 29, 24, 14, 28, 39, 60, 41, 32, 35, /* CX */
54, 45, 51, 38, 30, 61, 44,SYM, 50, 36, 46, 33, 42, 52, 62, 56, /* DX */
23, 40, 34, 47, 48, 55, 43, 29, 24, 14, 28, 39, 63, 41, 32, 35, /* EX */
54, 45, 51, 38, 30, 64, 44,SYM, 50, 36, 46, 33, 42, 52, 65, 53, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
@ -88,101 +89,116 @@ static const unsigned char Iso_8859_1_CharToOrderMap[] =
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
SYM, 2, 18, 11, 10, 0, 17, 15, 19, 4, 25, 26, 7, 13, 3, 8, /* 4X */
12, 20, 5, 1, 6, 9, 16, 30, 21, 22, 29,SYM,SYM,SYM,SYM,SYM, /* 5X */
SYM, 2, 18, 11, 10, 0, 17, 15, 19, 4, 25, 26, 7, 13, 3, 8, /* 6X */
12, 20, 5, 1, 6, 9, 16, 30, 21, 22, 29,SYM,SYM,SYM,SYM,CTR, /* 7X */
SYM, 1, 18, 11, 10, 0, 17, 16, 19, 3, 25, 26, 7, 12, 4, 8, /* 4X */
13, 20, 5, 2, 6, 9, 15, 31, 22, 21, 27,SYM,SYM,SYM,SYM,SYM, /* 5X */
SYM, 1, 18, 11, 10, 0, 17, 16, 19, 3, 25, 26, 7, 12, 4, 8, /* 6X */
13, 20, 5, 2, 6, 9, 15, 31, 22, 21, 27,SYM,SYM,SYM,SYM,CTR, /* 7X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
SYM,SYM,SYM,SYM,SYM, 67,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
24, 38, 32, 46, 49, 68, 47, 27, 23, 14, 28, 41, 69, 39, 33, 36, /* CX */
48, 45, 54, 40, 31, 55, 42,SYM, 52, 37, 43, 34, 44, 53, 50, 70, /* DX */
24, 38, 32, 46, 49, 71, 47, 27, 23, 14, 28, 41, 72, 39, 33, 36, /* EX */
48, 45, 54, 40, 31, 55, 42,SYM, 52, 37, 43, 34, 44, 53, 50, 73, /* FX */
SYM,SYM,SYM,SYM,SYM, 66,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
23, 40, 34, 47, 48, 55, 43, 29, 24, 14, 28, 39, 67, 41, 32, 35, /* CX */
54, 45, 51, 38, 30, 68, 44,SYM, 50, 36, 46, 33, 42, 52, 69, 56, /* DX */
23, 40, 34, 47, 48, 55, 43, 29, 24, 14, 28, 39, 70, 41, 32, 35, /* EX */
54, 45, 51, 38, 30, 71, 44,SYM, 50, 36, 46, 33, 42, 52, 72, 53, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
static const unsigned char Iso_8859_15_CharToOrderMap[] =
static const unsigned char Windows_1252_CharToOrderMap[] =
{
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
SYM, 2, 18, 11, 10, 0, 17, 15, 19, 4, 25, 26, 7, 13, 3, 8, /* 4X */
12, 20, 5, 1, 6, 9, 16, 30, 21, 22, 29,SYM,SYM,SYM,SYM,SYM, /* 5X */
SYM, 2, 18, 11, 10, 0, 17, 15, 19, 4, 25, 26, 7, 13, 3, 8, /* 6X */
12, 20, 5, 1, 6, 9, 16, 30, 21, 22, 29,SYM,SYM,SYM,SYM,CTR, /* 7X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
SYM,SYM,SYM,SYM,SYM,SYM, 51,SYM, 51,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
SYM,SYM,SYM,SYM, 74, 75,SYM,SYM, 76,SYM,SYM,SYM, 35, 35, 77,SYM, /* BX */
24, 38, 32, 46, 49, 78, 47, 27, 23, 14, 28, 41, 79, 39, 33, 36, /* CX */
48, 45, 54, 40, 31, 55, 42,SYM, 52, 37, 43, 34, 44, 53, 50, 80, /* DX */
24, 38, 32, 46, 49, 81, 47, 27, 23, 14, 28, 41, 82, 39, 33, 36, /* EX */
48, 45, 54, 40, 31, 55, 42,SYM, 52, 37, 43, 34, 44, 53, 50, 83, /* FX */
SYM, 1, 18, 11, 10, 0, 17, 16, 19, 3, 25, 26, 7, 12, 4, 8, /* 4X */
13, 20, 5, 2, 6, 9, 15, 31, 22, 21, 27,SYM,SYM,SYM,SYM,SYM, /* 5X */
SYM, 1, 18, 11, 10, 0, 17, 16, 19, 3, 25, 26, 7, 12, 4, 8, /* 6X */
13, 20, 5, 2, 6, 9, 15, 31, 22, 21, 27,SYM,SYM,SYM,SYM,CTR, /* 7X */
SYM,ILL,SYM, 73,SYM,SYM,SYM,SYM,SYM,SYM, 49,SYM, 37,ILL, 74,ILL, /* 8X */
ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 49,SYM, 37,ILL, 75, 53, /* 9X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
SYM,SYM,SYM,SYM,SYM, 76,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
23, 40, 34, 47, 48, 55, 43, 29, 24, 14, 28, 39, 77, 41, 32, 35, /* CX */
54, 45, 51, 38, 30, 78, 44,SYM, 50, 36, 46, 33, 42, 52, 79, 56, /* DX */
23, 40, 34, 47, 48, 55, 43, 29, 24, 14, 28, 39, 80, 41, 32, 35, /* EX */
54, 45, 51, 38, 30, 81, 44,SYM, 50, 36, 46, 33, 42, 52, 82, 53, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
static const int Unicode_Char_size = 76;
static const unsigned int Unicode_CharOrder[] =
{
65, 1, 66, 18, 67, 11, 68, 10, 69, 0, 70, 17, 71, 16, 72, 19,
73, 3, 74, 25, 75, 26, 76, 7, 77, 12, 78, 4, 79, 8, 80, 13,
81, 20, 82, 5, 83, 2, 84, 6, 85, 9, 86, 15, 87, 31, 88, 22,
89, 21, 90, 27, 97, 1, 98, 18, 99, 11, 100, 10, 101, 0,102, 17,
103, 16, 104, 19, 105, 3, 106, 25, 107, 26, 108, 7, 109, 12,110, 4,
111, 8, 112, 13, 113, 20, 114, 5, 115, 2, 116, 6, 117, 9,118, 15,
119, 31, 120, 22, 121, 21, 122, 27, 192, 23, 194, 34, 199, 29,200, 24,
201, 14, 202, 28, 206, 32, 207, 35, 212, 30, 217, 36, 219, 33,224, 23,
226, 34, 231, 29, 232, 24, 233, 14, 234, 28, 238, 32, 239, 35,244, 30,
249, 36, 251, 33, 338, 37, 339, 37,
};
/* Model Table:
* Total sequences: 914
* First 512 sequences: 0.997057879992383
* Next 512 sequences (512-1024): 0.002942120007616917
* Rest: 3.8163916471489756e-17
* Total sequences: 1049
* First 512 sequences: 0.997006678170155
* Next 512 sequences (512-1024): 0.0029768569132891634
* Rest: 1.646491655585584e-05
* Negative sequences: TODO
*/
static const PRUint8 FrenchLangModel[] =
{
3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,0,0,3,3,3,0,3,3,0,0,0,2,0,2,0,
3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,2,2,3,3,3,3,0,3,3,0,0,3,0,0,2,3,0,0,0,2,2,0,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,3,3,0,3,3,2,2,3,0,0,3,0,
3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,3,3,0,3,3,3,2,3,2,0,2,2,0,0,0,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,0,2,3,0,2,3,2,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,0,2,3,3,3,2,3,3,3,0,2,0,0,0,
3,3,3,2,3,3,3,3,3,3,2,3,3,3,3,2,2,2,3,3,2,2,3,3,2,0,2,0,3,3,2,3,2,0,0,0,0,0,
3,3,3,2,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,0,3,3,3,2,3,0,0,2,2,2,2,0,2,0,0,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,0,3,3,0,0,3,3,0,0,2,3,0,3,3,
3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,3,3,2,0,3,3,2,3,3,2,0,0,0,0,0,2,0,
3,3,3,2,3,3,3,2,3,3,3,2,2,3,3,3,2,2,2,3,0,0,3,3,0,3,0,0,2,2,3,2,2,2,3,0,0,0,
3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,2,2,2,2,3,3,0,3,3,0,0,3,0,2,2,2,3,2,0,0,2,0,0,
3,3,3,2,3,3,3,3,3,3,2,2,3,2,3,0,0,2,2,3,0,0,3,3,0,0,2,2,3,2,2,3,2,0,0,0,0,0,
3,3,3,3,3,2,3,2,3,3,2,3,3,3,3,2,0,2,3,2,0,0,3,3,0,2,2,0,3,0,2,2,3,0,2,2,0,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,0,0,3,2,2,0,3,0,0,2,0,0,0,0,0,
3,3,3,3,3,3,3,3,3,3,2,2,2,3,3,3,2,2,3,3,0,2,3,3,0,0,0,0,2,0,2,0,2,0,0,0,0,0,
3,2,3,2,3,3,0,2,3,3,0,0,0,2,3,0,2,2,0,0,0,0,2,3,0,0,2,0,3,0,0,0,0,0,0,2,0,0,
3,3,3,2,3,3,3,3,3,3,2,2,2,3,3,2,0,3,0,0,0,0,0,3,0,2,0,0,3,0,0,0,0,0,2,2,0,0,
3,3,3,3,3,3,3,3,3,3,3,2,2,2,3,2,2,0,3,2,0,0,3,2,0,3,0,0,0,0,0,0,3,2,0,2,0,0,
3,3,3,3,3,3,3,3,3,3,0,2,0,3,3,0,0,2,2,0,0,0,3,3,0,2,2,0,2,2,2,3,3,0,0,2,0,0,
0,0,2,0,0,0,0,2,2,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,0,3,0,3,0,3,2,3,2,2,3,3,2,3,0,3,2,2,2,2,3,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,
3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,0,2,2,2,0,3,2,0,0,2,2,0,0,0,0,0,0,0,
0,3,0,3,0,3,3,3,0,0,3,3,2,3,0,3,3,2,3,0,3,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,2,3,2,3,2,2,2,3,3,2,2,2,2,3,0,0,0,0,0,0,0,0,0,3,2,0,0,0,0,0,0,2,0,0,0,0,0,
3,3,3,2,3,3,2,3,3,3,0,0,2,3,2,2,2,2,2,3,0,0,3,0,0,0,2,0,0,0,2,0,0,0,0,0,0,0,
0,0,3,0,0,0,0,0,3,3,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,2,0,0,3,2,0,0,0,3,0,3,0,0,2,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,0,3,2,3,2,0,2,3,3,0,2,0,2,2,2,0,0,2,2,2,0,3,0,0,0,2,0,0,3,2,0,0,0,0,0,0,0,
3,2,3,2,3,2,2,2,3,2,0,2,0,0,2,0,0,2,2,2,0,0,2,0,0,0,2,0,0,0,2,0,0,0,0,0,0,0,
0,2,0,3,0,0,3,3,0,0,0,0,2,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,2,0,2,2,0,3,3,0,0,0,3,2,2,0,3,0,0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,3,0,0,3,3,0,0,0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,2,3,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,2,0,0,2,0,2,2,0,3,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2,2,2,2,0,2,2,3,0,0,2,2,0,2,0,2,0,2,2,0,2,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,2,0,3,3,3,0,3,0,3,0,2,0,3,0,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,3,3,0,3,2,3,3,0,0,3,0,0,
3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,2,3,3,3,3,3,0,2,3,2,3,2,0,0,0,2,0,2,0,0,0,2,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,0,2,2,0,0,0,0,0,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,2,3,3,3,3,2,3,2,2,1,0,0,0,0,2,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,3,2,3,3,3,3,3,2,0,2,3,0,0,0,
3,3,3,3,2,3,3,3,3,3,2,3,2,2,3,2,2,2,3,3,0,3,0,2,3,2,2,3,3,0,3,2,0,0,2,0,0,0,
3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,2,2,0,2,2,0,2,2,0,0,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,0,3,3,3,2,1,0,0,3,2,3,0,2,3,0,
3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,0,2,3,3,3,3,0,2,2,0,0,2,2,0,0,
3,3,3,3,2,3,2,3,3,3,3,2,3,2,3,2,3,2,2,3,0,3,0,0,3,3,2,2,0,0,2,2,2,2,0,0,0,2,
3,3,3,3,2,3,3,3,3,3,2,3,2,2,3,2,2,2,2,3,3,3,0,0,3,0,3,2,2,0,3,2,0,0,2,0,0,2,
3,3,3,3,3,2,3,2,3,3,2,2,3,3,3,2,2,2,3,2,0,3,0,0,3,2,2,2,3,0,2,2,0,2,2,0,0,2,
3,3,3,3,2,3,3,3,3,3,2,2,2,3,3,2,3,2,2,3,0,3,0,0,3,2,2,0,3,2,3,2,0,0,2,0,0,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,0,0,0,3,2,3,0,2,0,2,0,0,3,2,0,0,
3,3,3,3,2,3,2,2,3,3,2,2,2,2,3,1,2,0,0,3,0,2,0,0,3,0,2,0,3,0,2,0,0,0,2,0,0,2,
3,3,3,3,3,3,3,3,3,3,2,2,3,2,3,2,3,2,3,3,2,3,0,0,3,2,2,1,2,0,0,2,0,0,2,0,0,0,
3,3,3,3,2,3,3,3,3,3,2,2,3,2,3,2,2,3,2,0,0,2,0,0,3,0,2,0,3,0,0,0,0,2,0,0,0,0,
3,3,3,3,3,3,3,3,3,3,3,2,2,0,3,2,2,2,3,2,2,3,2,0,2,3,2,2,2,0,0,2,1,2,3,0,0,2,
3,3,2,3,3,3,3,3,3,3,2,2,3,2,3,2,2,2,2,2,2,3,0,0,3,1,2,2,2,0,3,2,0,0,2,0,0,2,
2,3,2,2,0,2,0,0,2,3,1,2,2,2,0,2,0,0,2,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,2,2,2,2,2,0,2,2,0,2,2,2,2,0,2,2,2,0,0,2,0,0,0,
3,3,2,3,0,1,3,2,2,2,1,3,0,3,3,3,0,2,2,2,2,2,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,1,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,3,1,3,3,3,3,0,0,3,3,3,2,0,3,3,0,2,0,3,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,
3,3,2,3,2,2,2,0,3,3,2,2,2,2,3,0,0,1,2,1,0,0,0,3,0,2,2,2,0,0,0,0,0,0,2,0,0,0,
3,3,3,3,2,3,2,3,3,3,2,2,3,2,3,0,2,0,2,3,0,3,0,0,0,2,2,1,0,0,0,2,0,0,0,0,0,2,
3,3,2,3,2,2,2,2,3,3,2,2,2,1,3,2,2,2,3,2,2,2,0,0,2,0,2,3,0,0,0,2,0,0,0,0,0,0,
0,0,0,0,2,2,3,2,0,0,0,3,3,0,0,2,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,3,0,0,0,0,0,0,3,3,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,2,2,0,3,0,3,3,0,0,0,0,3,3,0,2,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,2,3,2,2,2,2,3,3,2,2,2,0,2,0,2,2,2,2,0,2,0,0,0,0,2,1,0,0,0,2,0,0,0,0,0,0,
0,0,0,0,3,0,3,3,0,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,2,2,3,2,0,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,2,2,3,2,0,0,0,3,2,2,0,0,3,0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2,2,2,0,2,2,2,2,0,2,3,2,2,0,0,2,2,2,2,2,2,0,0,0,0,0,2,0,0,0,0,2,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,2,2,2,2,2,0,3,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
};
const SequenceModel Windows_1252FrenchModel =
const SequenceModel Iso_8859_15FrenchModel =
{
Windows_1252_CharToOrderMap,
Iso_8859_15_CharToOrderMap,
FrenchLangModel,
38,
(float)0.997057879992383,
(float)0.997006678170155,
PR_TRUE,
"WINDOWS-1252",
"ISO-8859-15",
"fr"
};
@ -191,19 +207,29 @@ const SequenceModel Iso_8859_1FrenchModel =
Iso_8859_1_CharToOrderMap,
FrenchLangModel,
38,
(float)0.997057879992383,
(float)0.997006678170155,
PR_TRUE,
"ISO-8859-1",
"fr"
};
const SequenceModel Iso_8859_15FrenchModel =
const SequenceModel Windows_1252FrenchModel =
{
Iso_8859_15_CharToOrderMap,
Windows_1252_CharToOrderMap,
FrenchLangModel,
38,
(float)0.997057879992383,
(float)0.997006678170155,
PR_TRUE,
"ISO-8859-15",
"WINDOWS-1252",
"fr"
};
const LanguageModel FrenchModel =
{
"fr",
Unicode_CharOrder,
76,
FrenchLangModel,
38,
(float)0.997006678170155,
};

View File

@ -36,12 +36,13 @@
* ***** END LICENSE BLOCK ***** */
#include "../nsSBCharSetProber.h"
#include "../nsLanguageDetector.h"
/********* Language model for: German *********/
/**
* Generated by BuildLangModel.py
* On: 2015-12-03 22:50:46.518374
* On: 2021-03-16 01:10:34.750155
**/
/* Character Mapping Table:
@ -61,110 +62,133 @@
* ligature of 'o' and 'e' exists in ISO-8859-15 but not in ISO-8859-1
* even though they are both used for French. Same for the euro sign.
*/
static const unsigned char Windows_1252_CharToOrderMap[] =
{
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
SYM, 5, 15, 12, 8, 0, 17, 14, 7, 3, 23, 16, 9, 13, 2, 11, /* 4X */
18, 30, 1, 4, 6, 10, 21, 19, 28, 25, 20,SYM,SYM,SYM,SYM,SYM, /* 5X */
SYM, 5, 15, 12, 8, 0, 17, 14, 7, 3, 23, 16, 9, 13, 2, 11, /* 6X */
18, 30, 1, 4, 6, 10, 21, 19, 28, 25, 20,SYM,SYM,SYM,SYM,CTR, /* 7X */
SYM,ILL,SYM, 59,SYM,SYM,SYM,SYM,SYM,SYM, 36,SYM, 54,ILL, 42,ILL, /* 8X */
ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 36,SYM, 54,ILL, 42, 56, /* 9X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
SYM,SYM,SYM,SYM,SYM, 60,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
41, 31, 37, 44, 22, 49, 50, 35, 32, 29, 48, 43, 57, 33, 47, 52, /* CX */
53, 39, 51, 34, 40, 55, 26,SYM, 38, 58, 46, 61, 24, 45, 62, 27, /* DX */
41, 31, 37, 44, 22, 49, 50, 35, 32, 29, 48, 43, 57, 33, 47, 52, /* EX */
53, 39, 51, 34, 40, 55, 26,SYM, 38, 58, 46, 63, 24, 45, 64, 56, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
static const unsigned char Iso_8859_1_CharToOrderMap[] =
{
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
SYM, 5, 15, 12, 8, 0, 17, 14, 7, 3, 23, 16, 9, 13, 2, 11, /* 4X */
18, 30, 1, 4, 6, 10, 21, 19, 28, 25, 20,SYM,SYM,SYM,SYM,SYM, /* 5X */
SYM, 5, 15, 12, 8, 0, 17, 14, 7, 3, 23, 16, 9, 13, 2, 11, /* 6X */
18, 30, 1, 4, 6, 10, 21, 19, 28, 25, 20,SYM,SYM,SYM,SYM,CTR, /* 7X */
SYM, 5, 15, 12, 10, 0, 18, 14, 7, 2, 22, 16, 9, 13, 3, 11, /* 4X */
17, 30, 1, 4, 6, 8, 21, 20, 28, 24, 19,SYM,SYM,SYM,SYM,SYM, /* 5X */
SYM, 5, 15, 12, 10, 0, 18, 14, 7, 2, 22, 16, 9, 13, 3, 11, /* 6X */
17, 30, 1, 4, 6, 8, 21, 20, 28, 24, 19,SYM,SYM,SYM,SYM,CTR, /* 7X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
SYM,SYM,SYM,SYM,SYM, 65,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
41, 31, 37, 44, 22, 49, 50, 35, 32, 29, 48, 43, 57, 33, 47, 52, /* CX */
53, 39, 51, 34, 40, 55, 26,SYM, 38, 58, 46, 66, 24, 45, 67, 27, /* DX */
41, 31, 37, 44, 22, 49, 50, 35, 32, 29, 48, 43, 57, 33, 47, 52, /* EX */
53, 39, 51, 34, 40, 55, 26,SYM, 38, 58, 46, 68, 24, 45, 69, 56, /* FX */
SYM,SYM,SYM,SYM,SYM, 59,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
48, 31, 46, 42, 23, 41, 45, 36, 35, 29, 51, 40, 55, 32, 52, 47, /* CX */
50, 37, 53, 33, 49, 58, 26,SYM, 39, 60, 38, 61, 25, 44, 54, 27, /* DX */
48, 31, 46, 42, 23, 41, 45, 36, 35, 29, 51, 40, 55, 32, 52, 47, /* EX */
50, 37, 53, 33, 49, 58, 26,SYM, 39, 62, 38, 63, 25, 44, 54, 57, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
static const unsigned char Windows_1252_CharToOrderMap[] =
{
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
SYM, 5, 15, 12, 10, 0, 18, 14, 7, 2, 22, 16, 9, 13, 3, 11, /* 4X */
17, 30, 1, 4, 6, 8, 21, 20, 28, 24, 19,SYM,SYM,SYM,SYM,SYM, /* 5X */
SYM, 5, 15, 12, 10, 0, 18, 14, 7, 2, 22, 16, 9, 13, 3, 11, /* 6X */
17, 30, 1, 4, 6, 8, 21, 20, 28, 24, 19,SYM,SYM,SYM,SYM,CTR, /* 7X */
SYM,ILL,SYM, 64,SYM,SYM,SYM,SYM,SYM,SYM, 34,SYM, 56,ILL, 43,ILL, /* 8X */
ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 34,SYM, 56,ILL, 43, 57, /* 9X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
SYM,SYM,SYM,SYM,SYM, 65,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
48, 31, 46, 42, 23, 41, 45, 36, 35, 29, 51, 40, 55, 32, 52, 47, /* CX */
50, 37, 53, 33, 49, 58, 26,SYM, 39, 66, 38, 67, 25, 44, 54, 27, /* DX */
48, 31, 46, 42, 23, 41, 45, 36, 35, 29, 51, 40, 55, 32, 52, 47, /* EX */
50, 37, 53, 33, 49, 58, 26,SYM, 39, 68, 38, 69, 25, 44, 54, 57, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
static const int Unicode_Char_size = 61;
static const unsigned int Unicode_CharOrder[] =
{
65, 5, 66, 15, 67, 12, 68, 10, 69, 0, 70, 18, 71, 14, 72, 7,
73, 2, 74, 22, 75, 16, 76, 9, 77, 13, 78, 3, 79, 11, 80, 17,
81, 30, 82, 1, 83, 4, 84, 6, 85, 8, 86, 21, 87, 20, 88, 28,
89, 24, 90, 19, 97, 5, 98, 15, 99, 12, 100, 10, 101, 0,102, 18,
103, 14, 104, 7, 105, 2, 106, 22, 107, 16, 108, 9, 109, 13,110, 3,
111, 11, 112, 17, 113, 30, 114, 1, 115, 4, 116, 6, 117, 8,118, 21,
119, 20, 120, 28, 121, 24, 122, 19, 196, 23, 201, 29, 214, 26,220, 25,
223, 27, 228, 23, 233, 29, 246, 26, 252, 25,
};
/* Model Table:
* Total sequences: 1188
* First 512 sequences: 0.9934041448127945
* Next 512 sequences (512-1024): 0.006482829516922903
* Rest: 0.0001130256702826099
* Total sequences: 1337
* First 512 sequences: 0.9936565191798025
* Next 512 sequences (512-1024): 0.00616485529057582
* Rest: 0.00017862552962171364
* Negative sequences: TODO
*/
static const PRUint8 GermanLangModel[] =
{
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,2,3,3,0,2,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,2,3,2,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,1,2,3,3,0,2,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,0,2,3,2,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,0,2,3,3,2,2,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,1,2,2,2,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,0,2,2,3,3,2,3,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,0,0,3,2,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,0,3,0,3,3,2,2,
3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,3,3,3,0,0,2,2,
3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,3,3,3,2,3,2,2,3,2,3,3,3,0,0,2,2,
3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,2,3,2,2,3,2,3,3,2,0,0,2,2,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,2,2,2,
3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,3,3,3,3,3,2,2,2,2,0,3,3,3,2,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,0,3,0,3,3,1,2,
3,3,2,3,2,3,3,3,2,3,3,3,3,2,2,2,3,2,2,2,2,2,2,2,1,3,2,0,1,2,3,
3,3,2,3,3,3,3,2,3,3,3,3,3,3,2,3,2,3,3,2,2,2,3,2,3,3,3,0,0,2,2,
3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,2,3,2,2,2,3,2,3,2,3,3,2,0,2,2,1,
3,3,3,3,3,3,3,3,2,3,3,3,2,2,3,3,2,2,2,2,2,2,3,2,3,3,3,0,0,2,0,
3,3,3,3,3,3,3,3,2,3,3,3,1,3,2,2,3,3,3,2,2,2,3,2,3,3,3,0,1,2,1,
3,3,3,3,3,3,3,2,2,3,3,3,2,3,3,2,3,3,2,2,2,2,3,2,3,2,3,0,0,2,0,
3,3,2,3,3,3,3,3,3,3,3,3,2,2,2,2,2,3,3,2,2,2,3,2,2,2,2,0,0,2,0,
3,3,3,3,3,3,2,2,2,2,3,3,1,2,2,2,2,2,2,2,2,2,3,3,3,2,3,0,0,0,0,
3,2,2,3,3,3,3,2,2,3,3,3,2,3,2,3,2,2,2,3,3,2,2,2,3,3,3,0,0,2,2,
3,2,2,3,2,3,2,0,2,2,2,3,1,2,2,2,2,2,2,2,2,2,2,1,0,2,3,0,0,2,1,
2,3,3,3,3,2,3,3,3,3,3,2,3,3,3,2,2,3,2,0,2,2,0,0,0,0,0,2,0,0,2,
3,2,2,3,2,3,2,2,2,2,3,3,2,2,2,1,2,1,2,0,2,0,3,2,3,2,2,0,0,2,0,
2,3,3,0,3,1,3,3,3,3,0,0,3,2,3,3,2,2,2,1,1,0,0,0,0,0,0,2,0,0,0,
3,3,3,2,3,3,2,2,2,3,2,3,3,3,2,2,3,2,3,2,2,2,0,2,2,2,1,0,0,1,0,
2,3,3,2,3,0,3,3,2,3,0,1,3,3,3,2,2,3,2,2,2,2,0,0,0,0,1,3,1,0,0,
3,2,2,3,2,2,3,2,1,2,2,2,0,2,2,3,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,
3,1,2,3,1,3,3,2,1,2,2,2,2,0,0,2,2,2,3,2,0,2,0,0,0,2,0,0,2,2,0,
2,3,2,0,2,2,2,2,2,2,2,2,2,2,2,3,2,2,2,1,2,2,0,2,0,0,0,0,0,0,2,
0,1,0,2,0,2,0,0,0,0,3,2,0,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,0,1,3,2,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,0,0,3,3,2,2,
3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,2,3,3,3,3,0,1,2,2,
3,3,3,3,3,3,3,2,3,3,3,3,2,3,2,3,3,2,3,2,3,2,2,3,3,3,3,0,0,2,2,
3,3,3,3,3,3,3,3,2,3,3,2,3,3,3,3,3,3,3,3,3,3,2,1,2,1,1,3,3,2,2,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,1,0,2,2,
3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,2,3,2,2,3,3,2,2,0,0,2,1,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,0,0,3,3,1,2,
3,3,3,2,2,3,3,3,3,3,2,3,3,2,2,2,3,2,2,3,2,2,2,2,3,1,1,0,0,2,2,
3,3,3,2,3,3,3,2,3,3,3,3,3,3,2,3,3,3,3,2,2,2,2,3,3,3,3,0,1,2,0,
3,3,3,3,3,3,3,3,3,3,2,3,2,2,3,3,3,2,2,3,3,2,2,2,3,3,2,0,1,2,1,
3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,2,2,2,2,2,2,3,3,3,3,0,0,2,0,
3,3,3,3,3,3,3,3,3,3,2,3,2,3,2,2,3,3,2,3,3,2,2,3,3,3,3,0,2,2,0,
3,3,3,2,3,3,3,3,3,3,3,3,2,2,2,2,2,3,3,2,2,2,2,3,2,2,2,1,0,2,1,
3,3,3,3,3,3,3,2,3,3,2,3,2,3,3,3,2,2,3,2,2,2,2,3,2,3,2,0,0,2,1,
3,2,3,2,3,3,3,2,3,3,2,3,2,3,2,3,2,2,2,3,3,2,2,2,3,2,3,0,0,2,2,
3,3,3,3,3,3,2,2,3,3,2,3,2,2,2,2,2,2,2,0,2,0,3,3,2,3,2,0,0,1,0,
3,2,3,2,2,3,2,2,2,2,2,3,2,1,2,1,2,2,2,1,2,2,0,2,2,0,3,0,0,2,0,
3,2,3,2,2,3,2,1,3,2,2,3,2,2,1,1,2,0,1,2,0,1,2,3,2,3,2,0,0,2,0,
2,3,3,3,3,1,3,3,3,3,3,3,3,3,3,2,2,2,3,2,0,2,0,1,1,0,0,2,0,0,2,
3,3,3,3,3,3,3,2,2,3,3,3,3,3,2,2,3,3,2,2,2,2,2,0,2,2,2,1,1,1,0,
2,3,1,3,3,1,3,3,0,3,3,0,3,2,3,3,2,2,2,1,1,0,0,0,0,1,0,2,1,0,0,
2,3,2,3,3,0,3,3,0,3,2,1,2,3,2,2,2,2,3,2,2,2,0,0,1,0,1,3,1,0,0,
3,2,3,2,2,2,3,2,2,2,2,2,0,2,2,3,2,1,2,1,2,2,0,0,0,0,0,0,0,0,0,
3,1,3,1,2,3,3,2,2,2,1,3,2,1,1,2,2,3,2,1,2,2,0,0,2,0,0,0,2,0,1,
2,2,1,2,2,2,2,1,0,2,2,2,2,2,2,2,2,2,1,2,0,2,2,0,0,0,0,0,0,0,2,
1,0,2,0,0,2,1,0,3,1,0,1,0,1,1,0,0,1,0,0,1,2,0,0,0,0,0,0,0,0,0,
};
const SequenceModel Windows_1252GermanModel =
{
Windows_1252_CharToOrderMap,
GermanLangModel,
31,
(float)0.9934041448127945,
PR_TRUE,
"WINDOWS-1252",
"de"
};
const SequenceModel Iso_8859_1GermanModel =
{
Iso_8859_1_CharToOrderMap,
GermanLangModel,
31,
(float)0.9934041448127945,
(float)0.9936565191798025,
PR_TRUE,
"ISO-8859-1",
"de"
};
const SequenceModel Windows_1252GermanModel =
{
Windows_1252_CharToOrderMap,
GermanLangModel,
31,
(float)0.9936565191798025,
PR_TRUE,
"WINDOWS-1252",
"de"
};
const LanguageModel GermanModel =
{
"de",
Unicode_CharOrder,
61,
GermanLangModel,
31,
(float)0.9936565191798025,
};

View File

@ -36,12 +36,13 @@
* ***** END LICENSE BLOCK ***** */
#include "../nsSBCharSetProber.h"
#include "../nsLanguageDetector.h"
/********* Language model for: Italian *********/
/**
* Generated by BuildLangModel.py
* On: 2016-09-21 18:46:08.841217
* On: 2021-03-16 01:31:12.602629
**/
/* Character Mapping Table:
@ -61,45 +62,45 @@
* ligature of 'o' and 'e' exists in ISO-8859-15 but not in ISO-8859-1
* even though they are both used for French. Same for the euro sign.
*/
static const unsigned char Iso_8859_1_CharToOrderMap[] =
{
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
SYM, 2, 17, 9, 10, 1, 16, 14, 19, 0, 31, 26, 5, 13, 4, 3, /* 4X */
12, 20, 7, 8, 6, 11, 15, 28, 25, 27, 18,SYM,SYM,SYM,SYM,SYM, /* 5X */
SYM, 2, 17, 9, 10, 1, 16, 14, 19, 0, 31, 26, 5, 13, 4, 3, /* 6X */
12, 20, 7, 8, 6, 11, 15, 28, 25, 27, 18,SYM,SYM,SYM,SYM,CTR, /* 7X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
SYM,SYM,SYM,SYM,SYM, 54,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
21, 37, 46, 38, 35, 55, 39, 41, 22, 30, 40, 45, 29, 36, 48, 56, /* CX */
57, 42, 23, 33, 49, 58, 32,SYM, 52, 24, 43, 59, 34, 60, 61, 44, /* DX */
21, 37, 46, 38, 35, 62, 39, 41, 22, 30, 40, 45, 29, 36, 48, 63, /* EX */
64, 42, 23, 33, 49, 65, 32,SYM, 52, 24, 43, 66, 34, 67, 68, 69, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
static const unsigned char Iso_8859_3_CharToOrderMap[] =
{
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
SYM, 1, 16, 9, 10, 2, 17, 14, 19, 0, 27, 21, 5, 12, 4, 3, /* 4X */
13, 20, 6, 8, 7, 11, 15, 25, 26, 23, 18,SYM,SYM,SYM,SYM,SYM, /* 5X */
SYM, 1, 16, 9, 10, 2, 17, 14, 19, 0, 27, 21, 5, 12, 4, 3, /* 6X */
13, 20, 6, 8, 7, 11, 15, 25, 26, 23, 18,SYM,SYM,SYM,SYM,CTR, /* 7X */
SYM, 2, 17, 9, 10, 1, 16, 14, 19, 0, 31, 26, 5, 13, 4, 3, /* 4X */
12, 20, 7, 8, 6, 11, 15, 28, 25, 27, 18,SYM,SYM,SYM,SYM,SYM, /* 5X */
SYM, 2, 17, 9, 10, 1, 16, 14, 19, 0, 31, 26, 5, 13, 4, 3, /* 6X */
12, 20, 7, 8, 6, 11, 15, 28, 25, 27, 18,SYM,SYM,SYM,SYM,CTR, /* 7X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
SYM, 59,SYM,SYM,SYM,ILL, 60,SYM,SYM, 61, 48, 47, 62,SYM,ILL, 58, /* AX */
SYM, 63,SYM,SYM,SYM,SYM, 64,SYM,SYM, 46, 48, 47, 65,SYM,ILL, 58, /* BX */
22, 32, 50,ILL, 39, 66, 67, 38, 24, 30, 55, 40, 31, 37, 42, 49, /* CX */
ILL, 44, 29, 33, 51, 68, 34,SYM, 69, 28, 45, 70, 36, 71, 72, 73, /* DX */
22, 32, 50,ILL, 39, 74, 75, 38, 24, 30, 55, 40, 31, 37, 42, 49, /* EX */
ILL, 44, 29, 33, 51, 76, 34,SYM, 77, 28, 45, 78, 36, 79, 80,SYM, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
static const unsigned char Iso_8859_15_CharToOrderMap[] =
{
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
SYM, 1, 16, 9, 10, 2, 17, 14, 19, 0, 27, 21, 5, 12, 4, 3, /* 4X */
13, 20, 6, 8, 7, 11, 15, 25, 26, 23, 18,SYM,SYM,SYM,SYM,SYM, /* 5X */
SYM, 1, 16, 9, 10, 2, 17, 14, 19, 0, 27, 21, 5, 12, 4, 3, /* 6X */
13, 20, 6, 8, 7, 11, 15, 25, 26, 23, 18,SYM,SYM,SYM,SYM,CTR, /* 7X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
SYM,SYM,SYM,SYM,SYM,SYM, 35,SYM, 35,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
SYM,SYM,SYM,SYM, 41, 81,SYM,SYM, 41,SYM,SYM,SYM, 52, 52, 82,SYM, /* BX */
22, 32, 50, 43, 39, 53, 54, 38, 24, 30, 55, 40, 31, 37, 42, 49, /* CX */
56, 44, 29, 33, 51, 83, 34,SYM, 57, 28, 45, 84, 36, 85, 86, 87, /* DX */
22, 32, 50, 43, 39, 53, 54, 38, 24, 30, 55, 40, 31, 37, 42, 49, /* EX */
56, 44, 29, 33, 51, 88, 34,SYM, 57, 28, 45, 89, 36, 90, 91, 92, /* FX */
SYM, 70,SYM,SYM,SYM,ILL, 71,SYM,SYM, 72, 73, 53, 74,SYM,ILL, 50, /* AX */
SYM, 75,SYM,SYM,SYM,SYM, 76,SYM,SYM, 77, 78, 53, 79,SYM,ILL, 50, /* BX */
21, 37, 46,ILL, 35, 80, 81, 41, 22, 30, 40, 45, 29, 36, 48, 82, /* CX */
ILL, 42, 23, 33, 49, 83, 32,SYM, 84, 24, 43, 85, 34, 86, 87, 44, /* DX */
21, 37, 46,ILL, 35, 88, 89, 41, 22, 30, 40, 45, 29, 36, 48, 90, /* EX */
ILL, 42, 23, 33, 49, 91, 32,SYM, 92, 24, 43, 93, 34, 94, 95,SYM, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
@ -109,39 +110,39 @@ static const unsigned char Iso_8859_9_CharToOrderMap[] =
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
SYM, 1, 16, 9, 10, 2, 17, 14, 19, 0, 27, 21, 5, 12, 4, 3, /* 4X */
13, 20, 6, 8, 7, 11, 15, 25, 26, 23, 18,SYM,SYM,SYM,SYM,SYM, /* 5X */
SYM, 1, 16, 9, 10, 2, 17, 14, 19, 0, 27, 21, 5, 12, 4, 3, /* 6X */
13, 20, 6, 8, 7, 11, 15, 25, 26, 23, 18,SYM,SYM,SYM,SYM,CTR, /* 7X */
SYM, 2, 17, 9, 10, 1, 16, 14, 19, 0, 31, 26, 5, 13, 4, 3, /* 4X */
12, 20, 7, 8, 6, 11, 15, 28, 25, 27, 18,SYM,SYM,SYM,SYM,SYM, /* 5X */
SYM, 2, 17, 9, 10, 1, 16, 14, 19, 0, 31, 26, 5, 13, 4, 3, /* 6X */
12, 20, 7, 8, 6, 11, 15, 28, 25, 27, 18,SYM,SYM,SYM,SYM,CTR, /* 7X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
SYM,SYM,SYM,SYM,SYM, 93,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
22, 32, 50, 43, 39, 53, 54, 38, 24, 30, 55, 40, 31, 37, 42, 49, /* CX */
47, 44, 29, 33, 51, 94, 34,SYM, 57, 28, 45, 95, 36, 96, 48, 97, /* DX */
22, 32, 50, 43, 39, 53, 54, 38, 24, 30, 55, 40, 31, 37, 42, 49, /* EX */
47, 44, 29, 33, 51, 98, 34,SYM, 57, 28, 45, 99, 36, 46, 48,100, /* FX */
SYM,SYM,SYM,SYM,SYM, 96,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
21, 37, 46, 38, 35, 97, 39, 41, 22, 30, 40, 45, 29, 36, 48, 98, /* CX */
53, 42, 23, 33, 49, 99, 32,SYM, 52, 24, 43,100, 34,101,102, 44, /* DX */
21, 37, 46, 38, 35,103, 39, 41, 22, 30, 40, 45, 29, 36, 48,104, /* EX */
53, 42, 23, 33, 49,105, 32,SYM, 52, 24, 43,106, 34,107,108,109, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
static const unsigned char Iso_8859_1_CharToOrderMap[] =
static const unsigned char Iso_8859_15_CharToOrderMap[] =
{
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
SYM, 1, 16, 9, 10, 2, 17, 14, 19, 0, 27, 21, 5, 12, 4, 3, /* 4X */
13, 20, 6, 8, 7, 11, 15, 25, 26, 23, 18,SYM,SYM,SYM,SYM,SYM, /* 5X */
SYM, 1, 16, 9, 10, 2, 17, 14, 19, 0, 27, 21, 5, 12, 4, 3, /* 6X */
13, 20, 6, 8, 7, 11, 15, 25, 26, 23, 18,SYM,SYM,SYM,SYM,CTR, /* 7X */
SYM, 2, 17, 9, 10, 1, 16, 14, 19, 0, 31, 26, 5, 13, 4, 3, /* 4X */
12, 20, 7, 8, 6, 11, 15, 28, 25, 27, 18,SYM,SYM,SYM,SYM,SYM, /* 5X */
SYM, 2, 17, 9, 10, 1, 16, 14, 19, 0, 31, 26, 5, 13, 4, 3, /* 6X */
12, 20, 7, 8, 6, 11, 15, 28, 25, 27, 18,SYM,SYM,SYM,SYM,CTR, /* 7X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
SYM,SYM,SYM,SYM,SYM,101,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
22, 32, 50, 43, 39, 53, 54, 38, 24, 30, 55, 40, 31, 37, 42, 49, /* CX */
56, 44, 29, 33, 51,102, 34,SYM, 57, 28, 45,103, 36,104,105,106, /* DX */
22, 32, 50, 43, 39, 53, 54, 38, 24, 30, 55, 40, 31, 37, 42, 49, /* EX */
56, 44, 29, 33, 51,107, 34,SYM, 57, 28, 45,108, 36,109,110,111, /* FX */
SYM,SYM,SYM,SYM,SYM,SYM, 51,SYM, 51,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
SYM,SYM,SYM,SYM,110,111,SYM,SYM,112,SYM,SYM,SYM, 47, 47,113,SYM, /* BX */
21, 37, 46, 38, 35,114, 39, 41, 22, 30, 40, 45, 29, 36, 48,115, /* CX */
116, 42, 23, 33, 49,117, 32,SYM, 52, 24, 43,118, 34,119,120, 44, /* DX */
21, 37, 46, 38, 35,121, 39, 41, 22, 30, 40, 45, 29, 36, 48,122, /* EX */
123, 42, 23, 33, 49,124, 32,SYM, 52, 24, 43,125, 34,126,127,128, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
@ -151,109 +152,123 @@ static const unsigned char Windows_1252_CharToOrderMap[] =
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
SYM, 1, 16, 9, 10, 2, 17, 14, 19, 0, 27, 21, 5, 12, 4, 3, /* 4X */
13, 20, 6, 8, 7, 11, 15, 25, 26, 23, 18,SYM,SYM,SYM,SYM,SYM, /* 5X */
SYM, 1, 16, 9, 10, 2, 17, 14, 19, 0, 27, 21, 5, 12, 4, 3, /* 6X */
13, 20, 6, 8, 7, 11, 15, 25, 26, 23, 18,SYM,SYM,SYM,SYM,CTR, /* 7X */
SYM,ILL,SYM,112,SYM,SYM,SYM,SYM,SYM,SYM, 35,SYM, 52,ILL, 41,ILL, /* 8X */
ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 35,SYM, 52,ILL, 41,113, /* 9X */
SYM, 2, 17, 9, 10, 1, 16, 14, 19, 0, 31, 26, 5, 13, 4, 3, /* 4X */
12, 20, 7, 8, 6, 11, 15, 28, 25, 27, 18,SYM,SYM,SYM,SYM,SYM, /* 5X */
SYM, 2, 17, 9, 10, 1, 16, 14, 19, 0, 31, 26, 5, 13, 4, 3, /* 6X */
12, 20, 7, 8, 6, 11, 15, 28, 25, 27, 18,SYM,SYM,SYM,SYM,CTR, /* 7X */
SYM,ILL,SYM,129,SYM,SYM,SYM,SYM,SYM,SYM, 51,SYM, 47,ILL,130,ILL, /* 8X */
ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 51,SYM, 47,ILL,131,132, /* 9X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
SYM,SYM,SYM,SYM,SYM,114,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
22, 32, 50, 43, 39, 53, 54, 38, 24, 30, 55, 40, 31, 37, 42, 49, /* CX */
56, 44, 29, 33, 51,115, 34,SYM, 57, 28, 45,116, 36,117,118,119, /* DX */
22, 32, 50, 43, 39, 53, 54, 38, 24, 30, 55, 40, 31, 37, 42, 49, /* EX */
56, 44, 29, 33, 51,120, 34,SYM, 57, 28, 45,121, 36,122,123,124, /* FX */
SYM,SYM,SYM,SYM,SYM,133,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
21, 37, 46, 38, 35,134, 39, 41, 22, 30, 40, 45, 29, 36, 48,135, /* CX */
136, 42, 23, 33, 49,137, 32,SYM, 52, 24, 43,138, 34,139,140, 44, /* DX */
21, 37, 46, 38, 35,141, 39, 41, 22, 30, 40, 45, 29, 36, 48,142, /* EX */
143, 42, 23, 33, 49,144, 32,SYM, 52, 24, 43,145, 34,146,147,148, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
static const int Unicode_Char_size = 68;
static const unsigned int Unicode_CharOrder[] =
{
65, 2, 66, 17, 67, 9, 68, 10, 69, 1, 70, 16, 71, 14, 72, 19,
73, 0, 74, 31, 75, 26, 76, 5, 77, 13, 78, 4, 79, 3, 80, 12,
81, 20, 82, 7, 83, 8, 84, 6, 85, 11, 86, 15, 87, 28, 88, 25,
89, 27, 90, 18, 97, 2, 98, 17, 99, 9, 100, 10, 101, 1,102, 16,
103, 14, 104, 19, 105, 0, 106, 31, 107, 26, 108, 5, 109, 13,110, 4,
111, 3, 112, 12, 113, 20, 114, 7, 115, 8, 116, 6, 117, 11,118, 15,
119, 28, 120, 25, 121, 27, 122, 18, 192, 21, 200, 22, 201, 30,204, 29,
210, 23, 211, 33, 214, 32, 217, 24, 224, 21, 232, 22, 233, 30,236, 29,
242, 23, 243, 33, 246, 32, 249, 24,
};
/* Model Table:
* Total sequences: 872
* First 512 sequences: 0.9989484485502651
* Next 512 sequences (512-1024): 0.0010515514497349433
* Rest: -4.336808689942018e-17
* Total sequences: 921
* First 512 sequences: 0.9992462827093448
* Next 512 sequences (512-1024): 0.0007537172906552294
* Rest: -2.0166160408230382e-17
* Negative sequences: TODO
*/
static const PRUint8 ItalianLangModel[] =
{
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,2,0,0,2,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,0,3,3,3,0,0,0,0,0,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,0,3,3,3,0,2,0,0,0,2,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,0,0,0,0,0,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,3,2,3,2,3,0,3,3,2,2,0,
3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,2,2,3,2,0,3,3,3,2,2,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,2,
3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,2,3,3,0,2,3,3,2,3,2,2,3,3,3,3,2,2,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,2,3,0,0,3,2,3,3,2,2,
3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,2,3,3,3,3,3,0,3,0,0,3,2,0,3,2,2,0,2,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,0,2,3,3,2,3,2,3,2,2,3,3,2,2,
3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,0,3,0,3,2,3,3,3,0,3,2,3,0,0,
3,3,3,3,3,3,2,3,3,3,3,3,3,3,2,3,3,0,0,2,0,0,0,3,0,2,3,0,0,3,2,2,2,2,
3,3,3,3,2,3,3,3,3,3,3,3,3,3,2,2,2,2,2,3,0,3,2,3,0,2,0,2,0,3,2,0,2,2,
3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,2,3,2,0,3,2,2,0,3,0,2,2,2,0,2,2,0,0,2,
3,3,3,3,2,3,3,0,2,2,2,3,2,2,2,3,2,0,0,2,0,2,2,3,2,0,0,0,0,2,2,2,2,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,3,0,2,3,0,2,0,3,0,3,0,2,2,2,2,3,2,0,
3,3,3,3,0,3,3,3,2,3,0,3,2,2,3,2,2,3,0,2,0,2,0,0,2,2,2,2,2,0,2,0,0,0,
3,3,3,3,3,2,2,2,2,0,2,3,0,2,3,0,3,2,3,3,0,3,0,3,0,2,0,2,0,3,2,0,2,2,
3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,0,2,0,2,0,3,0,3,0,3,0,2,0,0,3,0,3,0,
2,3,0,2,0,0,2,0,2,0,0,3,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,3,3,2,2,3,3,2,2,2,2,2,0,3,0,3,0,3,0,2,2,2,0,0,0,0,2,2,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,2,0,0,0,2,0,2,0,2,2,2,0,0,0,0,0,0,
2,0,0,0,2,0,3,0,2,0,0,0,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,2,3,2,0,2,0,2,0,0,2,2,0,3,0,0,0,2,0,3,0,0,0,0,0,0,0,0,
3,3,3,3,0,3,0,3,2,3,0,2,0,3,0,3,0,0,0,0,0,2,0,2,0,2,3,0,0,0,0,0,0,0,
3,3,3,3,2,2,2,2,0,2,2,3,2,0,0,0,0,0,0,2,0,3,0,2,0,2,0,2,0,0,0,0,0,2,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,2,3,2,2,3,3,2,3,2,3,0,2,2,0,2,3,0,2,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,
0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2,0,0,0,3,2,2,0,2,2,0,0,0,0,2,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2,0,0,0,3,2,2,0,0,2,2,0,0,0,0,2,2,0,2,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,3,3,3,2,2,0,2,3,0,3,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,0,3,3,3,3,2,0,2,0,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,0,0,3,3,3,3,0,0,3,0,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,0,0,3,3,3,3,0,0,2,0,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,2,0,3,3,3,3,3,2,0,2,
3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,2,2,3,3,2,3,3,2,2,3,
3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,2,3,3,3,2,3,2,3,3,0,2,3,3,3,3,0,2,3,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,2,3,2,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,0,3,3,3,3,3,2,2,2,
3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,3,2,2,3,3,2,0,3,2,0,3,3,2,3,2,0,0,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,3,0,3,2,3,2,2,2,3,3,3,2,2,2,2,
3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,2,2,2,2,3,0,3,3,3,0,3,2,3,0,0,
3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,3,0,3,3,0,0,0,3,2,2,0,3,2,2,2,0,3,2,
3,3,3,3,3,3,3,2,3,3,2,3,3,3,2,2,3,3,0,2,3,2,2,3,0,2,2,3,3,0,3,2,2,0,
3,3,3,3,3,3,3,3,3,2,2,3,2,3,3,2,2,3,0,3,0,0,0,3,2,2,2,2,2,2,2,0,3,2,
3,3,3,3,2,3,2,3,2,2,0,3,3,0,2,3,0,2,2,0,0,0,0,3,0,0,0,2,0,2,2,0,2,0,
3,3,3,3,3,3,3,3,3,3,0,3,3,2,3,0,3,2,0,2,0,2,2,2,0,0,2,0,0,0,2,0,2,0,
3,3,3,3,3,3,2,3,3,2,2,3,0,2,0,0,0,3,3,2,0,2,2,2,2,0,2,3,2,3,2,0,2,0,
3,3,3,3,2,2,2,0,2,2,2,3,2,2,2,0,0,3,3,2,2,0,0,3,0,0,2,2,2,0,0,2,2,3,
3,3,3,3,3,3,3,3,3,0,0,3,2,3,0,0,2,2,2,0,0,2,0,0,0,0,2,3,3,2,3,2,3,0,
2,0,2,0,0,0,0,2,0,0,0,3,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,2,2,3,2,0,3,2,0,0,0,0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,2,2,0,3,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,2,0,2,2,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,
3,3,3,2,0,2,3,0,0,2,0,2,3,0,0,3,2,0,0,2,0,0,0,0,0,3,2,2,0,0,0,0,2,0,
3,3,3,3,2,3,3,3,3,0,2,3,2,3,2,2,0,2,0,3,0,0,0,0,0,0,2,3,2,0,2,0,2,0,
2,3,3,3,3,3,2,3,3,3,3,2,2,3,0,0,3,2,0,2,0,0,0,0,0,2,2,0,2,0,0,0,2,0,
3,3,3,3,2,2,2,3,3,0,0,2,2,0,2,0,2,2,0,2,0,0,0,0,0,0,2,2,3,0,0,0,0,0,
0,0,2,0,0,0,2,0,2,2,0,0,2,0,2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2,0,0,2,2,3,2,3,2,2,3,2,0,2,2,2,2,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,0,0,0,2,0,2,0,3,2,2,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,
2,0,0,0,3,3,2,3,3,2,2,0,2,0,2,3,2,0,0,3,0,0,0,0,0,0,2,0,2,0,0,0,0,0,
2,0,2,0,3,0,0,2,2,2,0,0,2,2,0,2,0,2,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,
};
const SequenceModel Iso_8859_1ItalianModel =
{
Iso_8859_1_CharToOrderMap,
ItalianLangModel,
34,
(float)0.9992462827093448,
PR_TRUE,
"ISO-8859-1",
"it"
};
const SequenceModel Iso_8859_3ItalianModel =
{
Iso_8859_3_CharToOrderMap,
ItalianLangModel,
34,
(float)0.9989484485502651,
(float)0.9992462827093448,
PR_TRUE,
"ISO-8859-3",
"it"
};
const SequenceModel Iso_8859_15ItalianModel =
{
Iso_8859_15_CharToOrderMap,
ItalianLangModel,
34,
(float)0.9989484485502651,
PR_TRUE,
"ISO-8859-15",
"it"
};
const SequenceModel Iso_8859_9ItalianModel =
{
Iso_8859_9_CharToOrderMap,
ItalianLangModel,
34,
(float)0.9989484485502651,
(float)0.9992462827093448,
PR_TRUE,
"ISO-8859-9",
"it"
};
const SequenceModel Iso_8859_1ItalianModel =
const SequenceModel Iso_8859_15ItalianModel =
{
Iso_8859_1_CharToOrderMap,
Iso_8859_15_CharToOrderMap,
ItalianLangModel,
34,
(float)0.9989484485502651,
(float)0.9992462827093448,
PR_TRUE,
"ISO-8859-1",
"ISO-8859-15",
"it"
};
@ -262,8 +277,18 @@ const SequenceModel Windows_1252ItalianModel =
Windows_1252_CharToOrderMap,
ItalianLangModel,
34,
(float)0.9989484485502651,
(float)0.9992462827093448,
PR_TRUE,
"WINDOWS-1252",
"it"
};
const LanguageModel ItalianModel =
{
"it",
Unicode_CharOrder,
68,
ItalianLangModel,
34,
(float)0.9992462827093448,
};

View File

@ -36,12 +36,13 @@
* ***** END LICENSE BLOCK ***** */
#include "../nsSBCharSetProber.h"
#include "../nsLanguageDetector.h"
/********* Language model for: Spanish *********/
/**
* Generated by BuildLangModel.py
* On: 2015-12-12 18:39:02.290370
* On: 2021-03-16 11:33:00.157304
**/
/* Character Mapping Table:
@ -61,45 +62,45 @@
* ligature of 'o' and 'e' exists in ISO-8859-15 but not in ISO-8859-1
* even though they are both used for French. Same for the euro sign.
*/
static const unsigned char Iso_8859_1_CharToOrderMap[] =
{
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
SYM, 1, 14, 10, 8, 0, 16, 15, 20, 5, 23, 27, 7, 12, 3, 2, /* 4X */
13, 21, 6, 4, 9, 11, 18, 31, 28, 17, 24,SYM,SYM,SYM,SYM,SYM, /* 5X */
SYM, 1, 14, 10, 8, 0, 16, 15, 20, 5, 23, 27, 7, 12, 3, 2, /* 6X */
13, 21, 6, 4, 9, 11, 18, 31, 28, 17, 24,SYM,SYM,SYM,SYM,CTR, /* 7X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
SYM,SYM,SYM,SYM,SYM, 52,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
33, 25, 39, 46, 37, 45, 47, 35, 36, 26, 48, 40, 53, 22, 41, 43, /* CX */
49, 29, 38, 19, 50, 54, 34,SYM, 44, 51, 30, 55, 32, 42, 56, 57, /* DX */
33, 25, 39, 46, 37, 45, 47, 35, 36, 26, 48, 40, 58, 22, 41, 43, /* EX */
49, 29, 38, 19, 50, 59, 34,SYM, 44, 51, 30, 60, 32, 42, 61, 62, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
static const unsigned char Iso_8859_15_CharToOrderMap[] =
{
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
SYM, 1, 14, 10, 8, 0, 16, 15, 20, 5, 23, 27, 7, 12, 3, 2, /* 4X */
13, 21, 6, 4, 9, 11, 18, 31, 28, 17, 24,SYM,SYM,SYM,SYM,SYM, /* 5X */
SYM, 1, 14, 10, 8, 0, 16, 15, 20, 5, 23, 27, 7, 12, 3, 2, /* 6X */
13, 21, 6, 4, 9, 11, 18, 31, 28, 17, 24,SYM,SYM,SYM,SYM,CTR, /* 7X */
SYM, 1, 14, 10, 8, 0, 17, 15, 20, 4, 24, 29, 7, 12, 3, 2, /* 4X */
13, 22, 6, 5, 9, 11, 16, 31, 28, 18, 23,SYM,SYM,SYM,SYM,SYM, /* 5X */
SYM, 1, 14, 10, 8, 0, 17, 15, 20, 4, 24, 29, 7, 12, 3, 2, /* 6X */
13, 22, 6, 5, 9, 11, 16, 31, 28, 18, 23,SYM,SYM,SYM,SYM,CTR, /* 7X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
SYM,SYM,SYM,SYM,SYM,SYM, 63,SYM, 64,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
SYM,SYM,SYM,SYM, 65, 66,SYM,SYM, 67,SYM,SYM,SYM, 68, 69, 70,SYM, /* BX */
33, 25, 39, 46, 37, 45, 47, 35, 36, 26, 48, 40, 71, 22, 41, 43, /* CX */
49, 29, 38, 19, 50, 72, 34,SYM, 44, 51, 30, 73, 32, 42, 74, 75, /* DX */
33, 25, 39, 46, 37, 45, 47, 35, 36, 26, 48, 40, 76, 22, 41, 43, /* EX */
49, 29, 38, 19, 50, 77, 34,SYM, 44, 51, 30, 78, 32, 42, 79, 80, /* FX */
SYM,SYM,SYM,SYM,SYM,SYM, 38,SYM, 38,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
SYM,SYM,SYM,SYM, 51, 54,SYM,SYM, 51,SYM,SYM,SYM, 46, 46, 55,SYM, /* BX */
36, 25, 43, 45, 37, 56, 53, 33, 35, 26, 48, 41, 39, 21, 47, 52, /* CX */
49, 27, 44, 19, 42, 57, 34,SYM, 58, 59, 30, 60, 32, 40, 50, 61, /* DX */
36, 25, 43, 45, 37, 62, 53, 33, 35, 26, 48, 41, 39, 21, 47, 52, /* EX */
49, 27, 44, 19, 42, 63, 34,SYM, 64, 65, 30, 66, 32, 40, 50, 67, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
static const unsigned char Iso_8859_1_CharToOrderMap[] =
{
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
SYM, 1, 14, 10, 8, 0, 17, 15, 20, 4, 24, 29, 7, 12, 3, 2, /* 4X */
13, 22, 6, 5, 9, 11, 16, 31, 28, 18, 23,SYM,SYM,SYM,SYM,SYM, /* 5X */
SYM, 1, 14, 10, 8, 0, 17, 15, 20, 4, 24, 29, 7, 12, 3, 2, /* 6X */
13, 22, 6, 5, 9, 11, 16, 31, 28, 18, 23,SYM,SYM,SYM,SYM,CTR, /* 7X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
SYM,SYM,SYM,SYM,SYM, 68,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
36, 25, 43, 45, 37, 69, 53, 33, 35, 26, 48, 41, 39, 21, 47, 52, /* CX */
49, 27, 44, 19, 42, 70, 34,SYM, 71, 72, 30, 73, 32, 40, 50, 74, /* DX */
36, 25, 43, 45, 37, 75, 53, 33, 35, 26, 48, 41, 39, 21, 47, 52, /* EX */
49, 27, 44, 19, 42, 76, 34,SYM, 77, 78, 30, 79, 32, 40, 50, 80, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
@ -109,96 +110,120 @@ static const unsigned char Windows_1252_CharToOrderMap[] =
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
SYM, 1, 14, 10, 8, 0, 16, 15, 20, 5, 23, 27, 7, 12, 3, 2, /* 4X */
13, 21, 6, 4, 9, 11, 18, 31, 28, 17, 24,SYM,SYM,SYM,SYM,SYM, /* 5X */
SYM, 1, 14, 10, 8, 0, 16, 15, 20, 5, 23, 27, 7, 12, 3, 2, /* 6X */
13, 21, 6, 4, 9, 11, 18, 31, 28, 17, 24,SYM,SYM,SYM,SYM,CTR, /* 7X */
SYM,ILL,SYM, 81,SYM,SYM,SYM,SYM,SYM,SYM, 82,SYM, 83,ILL, 84,ILL, /* 8X */
ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 85,SYM, 86,ILL, 87, 88, /* 9X */
SYM, 1, 14, 10, 8, 0, 17, 15, 20, 4, 24, 29, 7, 12, 3, 2, /* 4X */
13, 22, 6, 5, 9, 11, 16, 31, 28, 18, 23,SYM,SYM,SYM,SYM,SYM, /* 5X */
SYM, 1, 14, 10, 8, 0, 17, 15, 20, 4, 24, 29, 7, 12, 3, 2, /* 6X */
13, 22, 6, 5, 9, 11, 16, 31, 28, 18, 23,SYM,SYM,SYM,SYM,CTR, /* 7X */
SYM,ILL,SYM, 81,SYM,SYM,SYM,SYM,SYM,SYM, 38,SYM, 46,ILL, 51,ILL, /* 8X */
ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 38,SYM, 46,ILL, 51, 82, /* 9X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
SYM,SYM,SYM,SYM,SYM, 89,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
33, 25, 39, 46, 37, 45, 47, 35, 36, 26, 48, 40, 90, 22, 41, 43, /* CX */
49, 29, 38, 19, 50, 91, 34,SYM, 44, 51, 30, 92, 32, 42, 93, 94, /* DX */
33, 25, 39, 46, 37, 45, 47, 35, 36, 26, 48, 40, 95, 22, 41, 43, /* EX */
49, 29, 38, 19, 50, 96, 34,SYM, 44, 51, 30, 97, 32, 42, 98, 99, /* FX */
SYM,SYM,SYM,SYM,SYM, 83,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
36, 25, 43, 45, 37, 84, 53, 33, 35, 26, 48, 41, 39, 21, 47, 52, /* CX */
49, 27, 44, 19, 42, 85, 34,SYM, 86, 87, 30, 88, 32, 40, 50, 89, /* DX */
36, 25, 43, 45, 37, 90, 53, 33, 35, 26, 48, 41, 39, 21, 47, 52, /* EX */
49, 27, 44, 19, 42, 91, 34,SYM, 92, 93, 30, 94, 32, 40, 50, 95, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
static const int Unicode_Char_size = 66;
static const unsigned int Unicode_CharOrder[] =
{
65, 1, 66, 14, 67, 10, 68, 8, 69, 0, 70, 17, 71, 15, 72, 20,
73, 4, 74, 24, 75, 29, 76, 7, 77, 12, 78, 3, 79, 2, 80, 13,
81, 22, 82, 6, 83, 5, 84, 9, 85, 11, 86, 16, 87, 31, 88, 28,
89, 18, 90, 23, 97, 1, 98, 14, 99, 10, 100, 8, 101, 0,102, 17,
103, 15, 104, 20, 105, 4, 106, 24, 107, 29, 108, 7, 109, 12,110, 3,
111, 2, 112, 13, 113, 22, 114, 6, 115, 5, 116, 9, 117, 11,118, 16,
119, 31, 120, 28, 121, 18, 122, 23, 193, 25, 201, 26, 205, 21,209, 27,
211, 19, 218, 30, 220, 32, 225, 25, 233, 26, 237, 21, 241, 27,243, 19,
250, 30, 252, 32,
};
/* Model Table:
* Total sequences: 897
* First 512 sequences: 0.9970385677528184
* Next 512 sequences (512-1024): 0.0029614322471815486
* Rest: 4.597017211338539e-17
* Total sequences: 1002
* First 512 sequences: 0.9966074680689881
* Next 512 sequences (512-1024): 0.003392531931011823
* Rest: 3.209238430557093e-17
* Negative sequences: TODO
*/
static const PRUint8 SpanishLangModel[] =
{
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,3,3,3,2,3,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,2,3,3,3,3,3,3,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,2,3,3,2,2,3,3,2,2,3,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,2,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,0,2,3,3,3,0,0,2,2,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,3,3,3,2,0,3,2,2,
3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,2,3,3,2,2,0,2,2,0,
3,3,3,2,3,3,3,3,2,2,2,3,3,2,2,3,2,3,3,3,3,2,3,2,2,3,3,2,0,0,2,2,2,
3,3,3,3,3,3,3,3,2,3,3,3,2,2,3,2,2,3,2,3,3,0,3,2,2,3,3,0,0,0,2,2,2,
3,3,3,3,3,3,3,3,2,3,3,3,2,2,2,2,2,3,0,3,3,2,3,0,2,3,3,3,0,0,2,0,0,
3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,2,3,0,2,0,
3,3,3,3,3,3,2,2,2,2,2,3,3,3,3,2,2,3,0,3,2,0,3,2,0,3,3,2,2,0,3,2,2,
3,3,3,2,3,3,3,3,2,3,3,3,2,3,3,0,2,2,2,3,3,0,3,2,0,3,3,2,0,0,3,0,0,
3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,2,2,3,2,3,2,2,3,3,0,3,2,2,0,0,2,2,0,
3,3,3,3,3,3,3,3,3,3,0,3,3,0,2,2,2,2,2,3,3,0,3,2,2,2,3,2,0,0,3,2,3,
3,3,3,2,2,3,3,3,2,3,2,3,2,2,2,2,3,2,0,3,0,0,3,2,0,2,2,2,0,0,3,2,0,
3,3,3,3,3,3,3,3,2,2,2,3,2,2,2,2,2,2,0,3,2,0,0,2,2,2,2,2,0,0,2,2,0,
3,3,3,2,2,3,2,2,2,0,2,3,0,2,0,2,2,2,2,3,0,0,3,0,0,2,3,2,0,0,0,0,0,
0,0,0,3,3,0,3,3,3,3,3,0,3,3,2,3,2,0,3,0,0,0,0,0,0,0,0,0,2,0,0,0,0,
3,3,3,3,2,3,3,3,3,3,2,3,3,0,2,0,2,3,2,2,2,0,3,2,2,2,3,0,2,0,2,2,2,
2,3,2,0,2,2,0,2,2,2,0,3,0,0,0,0,0,0,0,0,0,2,2,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,0,2,2,3,3,3,2,3,2,3,3,3,0,2,0,0,2,0,2,2,0,0,0,0,0,0,0,0,
3,3,3,2,0,3,2,2,2,2,0,3,2,2,0,0,0,0,0,3,0,0,2,2,0,2,3,0,0,0,2,0,2,
3,3,3,2,0,3,2,0,2,2,2,3,2,2,2,3,0,2,0,3,2,3,2,0,3,3,2,2,0,0,2,0,0,
2,0,0,3,3,2,3,3,2,3,3,2,3,3,2,3,3,2,2,0,2,2,0,2,2,0,0,0,2,2,0,0,0,
2,3,2,3,3,2,3,3,3,3,3,2,2,3,2,3,2,2,2,0,0,0,0,2,0,0,0,0,3,0,0,0,0,
3,3,3,2,3,3,3,3,2,2,2,3,3,0,2,2,2,3,2,0,2,0,2,0,0,0,0,2,0,0,2,2,0,
3,3,3,2,2,3,2,2,2,3,3,3,2,3,2,0,2,2,3,2,2,2,0,2,0,2,2,2,3,0,0,2,0,
3,3,3,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0,3,0,0,2,0,0,0,0,0,0,0,
2,3,2,3,3,0,2,3,2,3,2,0,3,2,3,0,2,0,0,0,0,0,0,2,2,0,0,0,0,2,0,0,0,
3,3,3,3,2,3,2,2,2,2,2,2,0,0,2,0,2,2,0,0,2,0,0,2,0,2,0,2,0,0,0,2,0,
3,0,0,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,0,3,3,3,2,3,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,2,2,3,3,3,3,3,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,2,2,2,3,3,0,3,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,3,2,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,3,3,3,3,3,3,3,3,2,2,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,2,0,3,3,0,0,3,2,2,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,0,2,3,3,2,2,
3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,0,2,2,2,2,2,
3,3,3,2,3,3,3,2,2,2,2,3,3,2,2,2,2,2,3,3,2,3,3,2,2,3,3,0,2,2,2,2,0,
3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,2,2,2,3,3,3,3,0,3,2,3,3,0,2,2,3,2,0,
3,3,3,3,3,2,3,3,3,3,3,3,3,2,2,2,2,2,2,3,3,3,2,2,0,3,3,0,2,3,2,0,0,
3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,2,2,3,3,3,3,3,3,3,2,3,0,2,0,
3,3,3,3,3,3,3,2,2,2,3,3,3,3,3,2,2,2,3,3,2,3,0,2,2,3,3,0,2,2,3,2,2,
3,3,3,2,3,3,3,3,2,3,3,3,2,3,2,2,2,2,2,3,3,3,2,2,0,3,3,0,0,2,3,0,0,
3,3,3,3,3,3,3,3,3,3,2,3,2,2,3,2,2,2,2,3,2,3,0,2,3,3,3,0,0,2,2,2,2,
3,3,3,3,3,3,3,3,2,3,2,3,2,2,2,2,0,2,3,3,3,3,0,2,0,2,3,0,2,2,3,2,3,
3,3,3,2,3,2,2,2,2,2,2,3,0,0,2,2,2,0,2,3,2,3,0,0,0,3,3,0,0,2,0,0,0,
3,3,3,2,3,2,3,3,2,3,2,3,2,2,2,2,0,3,2,3,0,3,0,0,0,2,3,0,0,2,3,0,0,
3,3,3,3,3,3,2,3,2,2,2,3,2,2,2,2,2,2,2,3,2,0,0,2,2,2,2,0,2,2,2,2,0,
2,2,0,3,2,3,3,3,3,2,3,0,3,3,2,3,3,2,2,0,2,0,2,0,2,0,0,2,2,2,0,0,0,
3,3,3,3,3,2,3,3,2,3,2,3,3,2,2,0,0,2,3,2,2,3,2,2,0,2,2,0,2,2,3,2,2,
2,3,3,3,0,3,3,2,3,3,3,0,3,2,2,3,2,3,0,0,0,0,2,2,2,0,2,2,2,2,0,0,0,
0,2,0,0,2,2,2,2,0,0,0,3,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,2,3,2,2,2,0,3,3,3,2,2,2,3,2,0,2,3,2,2,3,3,0,3,2,0,0,2,2,2,0,
3,3,3,2,3,2,2,2,2,2,2,3,0,2,0,0,2,0,2,3,2,2,0,0,0,2,3,0,0,2,2,2,0,
2,2,0,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,2,0,3,0,2,2,2,0,0,2,2,2,0,0,0,
2,3,2,3,2,3,3,3,3,3,3,2,3,3,2,3,2,2,2,2,2,0,2,2,2,0,0,2,3,0,0,0,0,
3,3,3,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,3,0,2,0,0,0,2,2,0,0,0,0,0,0,
3,3,3,0,3,2,2,2,2,3,3,2,2,3,2,2,3,2,2,2,2,2,2,0,0,2,2,0,3,0,0,2,0,
3,3,3,2,3,3,3,3,0,2,2,3,3,2,2,2,2,0,3,2,2,2,0,2,0,2,3,0,2,2,2,2,2,
2,3,2,3,0,3,2,3,2,3,3,0,3,2,3,0,0,2,0,0,2,0,0,2,2,0,0,2,0,0,0,0,0,
3,3,3,2,3,2,2,2,2,2,2,2,2,2,2,0,0,0,2,2,3,0,0,2,2,2,2,0,0,2,0,2,0,
3,0,0,2,2,0,0,2,0,2,0,0,2,2,2,2,0,2,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,
};
const SequenceModel Iso_8859_1SpanishModel =
{
Iso_8859_1_CharToOrderMap,
SpanishLangModel,
33,
(float)0.9970385677528184,
PR_TRUE,
"ISO-8859-1",
"es"
};
const SequenceModel Iso_8859_15SpanishModel =
{
Iso_8859_15_CharToOrderMap,
SpanishLangModel,
33,
(float)0.9970385677528184,
(float)0.9966074680689881,
PR_TRUE,
"ISO-8859-15",
"es"
};
const SequenceModel Iso_8859_1SpanishModel =
{
Iso_8859_1_CharToOrderMap,
SpanishLangModel,
33,
(float)0.9966074680689881,
PR_TRUE,
"ISO-8859-1",
"es"
};
const SequenceModel Windows_1252SpanishModel =
{
Windows_1252_CharToOrderMap,
SpanishLangModel,
33,
(float)0.9970385677528184,
(float)0.9966074680689881,
PR_TRUE,
"WINDOWS-1252",
"es"
};
const LanguageModel SpanishModel =
{
"es",
Unicode_CharOrder,
66,
SpanishLangModel,
33,
(float)0.9966074680689881,
};