diff --git a/script/BuildLangModelLogs/LangSlovakModel.log b/script/BuildLangModelLogs/LangSlovakModel.log index 4dc3fe5..429f32e 100644 --- a/script/BuildLangModelLogs/LangSlovakModel.log +++ b/script/BuildLangModelLogs/LangSlovakModel.log @@ -1,156 +1,175 @@ = Logs of language model for Slovak (sk) = - Generated by BuildLangModel.py -- Started: 2021-03-16 20:04:01.478267 +- Started: 2021-03-21 12:48:41.368218 - Maximum depth: 4 - Max number of pages: 100 == Parsed pages == -Dôkaz (matematika) (revision 7170221) -1825 (revision 6937105) -1839 (revision 6804159) -1847 (revision 7167629) -1852 (revision 6923466) -1878 (revision 7159904) -1955 (revision 7061181) -1976 (revision 7100059) -1983 (revision 7174204) +Európska_únia (revision 7169513) +1. decembra (revision 6792273) +1. júl (revision 7066144) +1. svetová vojna (revision 7159151) +10 centov (euro) (revision 6293215) +1952 (revision 7177031) +1957 (revision 7078231) +1958 (revision 7144704) +1960 (revision 7163978) +1967 (revision 7016805) +1968 (revision 7173483) +1973 (revision 7149623) +1979 (revision 7169115) +1981 (revision 7066520) +1985 (revision 7161691) +1986 (revision 7151177) +1987 (revision 7065067) +1990 (revision 7178863) +1992 (revision 7135542) 1993 (revision 7122277) 1995 (revision 7133683) -2012 (revision 7135523) -Adrien-Marie Legendre (revision 6556308) -Algebraická geometria (revision 5964212) -Algebraická rovnica (revision 6586551) -Algebrické číslo (revision 6382942) -Algoritmus (revision 7100698) -Andrew Wiles (revision 6813255) -Arabi (revision 7124298) -Arabčina (revision 7148041) -Aristoteles (revision 7150270) -Arthur Cayley (revision 6332355) -Axióma (revision 7073489) -Babylonia (revision 6432954) -Bernard Bolzano (revision 6903631) -Boh (revision 7166677) -Bolzanova veta (revision 6852875) -Bytie (revision 6569833) -Byzantská ríša (revision 7168566) -Caroline Blundenová (revision 7170221) -Cauchyho postupnosť (revision 6215169) -Celé číslo (revision 7047567) -Charles Hermite (revision 6412828) -Daniel Marcus (revision 5291472) -David Hilbert (revision 5968866) -Dedukcia (revision 6338099) -Definícia (revision 6965423) -Derivácia (funkcia) (revision 7014993) -Desiatková číselná sústava (revision 7047888) -Diofantická rovnica (revision 6060359) -Dynastia Chan (revision 7025657) -Dôkaz (logika) (revision 5495754) -Dôkaz sporom (revision 7051518) -Energia (revision 6975312) -Eric Weisstein (revision 6054413) -Ernst Kummer (revision 6001344) -Európa (revision 7164742) -Experiment (revision 6354302) -Fenomén (filozofia) (revision 6558128) -Filozofia (revision 6942330) -Formula (logika) (revision 3916562) -Formálny dôkaz (revision 7170221) -Formálny jazyk (revision 6505890) -Gabriel Cramer (revision 7068001) -Galoisova teória (revision 6749172) -Gentzenovský kalkul (revision 7170221) -Geometria (revision 7010499) -Geometrický dôkaz (revision 7170221) -Georg Ferdinand Cantor (revision 6697670) -Giordano Bruno (revision 7072808) -Gottlob Frege (revision 6580699) -Gödelova veta o neúplnosti (revision 6968373) -Hardvér (revision 6946820) -Henri Poincaré (revision 6830074) -Hilbertovský kalkul (revision 7170221) -Hmotnosť (revision 7021343) -Hypotéza (revision 6850461) -Idea (revision 6113421) -India (revision 6976622) -Intuícia (revision 5837951) -Jazyk (lingvistika) (revision 6462864) -John Taylor (revision 6741201) -Kardinálne číslo (revision 7154031) -Kenneth Appel (revision 5968422) -Klasická mechanika (revision 6295646) -Konečná množina (revision 6850487) -Konfucianizmus (revision 6948500) -Kresťanstvo (revision 7150939) -Latinčina (revision 7110742) -Leonhard Euler (revision 7016638) -Lineárna algebra (revision 6564030) -Logická axióma (revision 5495754) -Logický kalkul (revision 1608550) +1999 (revision 7133241) +1 cent (euro) (revision 6963154) +1 euro (revision 6264994) +2003 (revision 7135529) +2004 (revision 7149802) +2007 (revision 7135534) +2008 (revision 7156084) +2009 (revision 7135536) +2013 (revision 7135522) +2016 (revision 7159554) +2017 (revision 7174262) +20 centov (euro) (revision 6293208) +23. jún (revision 7052430) +2 centy (euro) (revision 6963155) +2 eurá (revision 6452782) +31. december (revision 7149783) +50 centov (euro) (revision 6293202) +5 centov (euro) (revision 6963157) +Acquis communautaire (revision 7033703) +Al Gore (revision 7146244) +Albánsko (revision 7172414) +Americký dolár (revision 7050515) +Amsterdamská zmluva (revision 7070102) +Angličtina (revision 7148052) +Angola (revision 7035956) +Antigua a Barbuda (revision 6560340) +Argentína (revision 7171908) +Arménsko (revision 7147325) +Atény (revision 7150984) +Austrália (štát) (revision 7154003) +Azory (revision 6595058) +Bahrajn (revision 7178284) +Bangladéš (revision 7147804) +Barack Obama (revision 7158748) +Barbados (revision 7178784) +Belgicko (revision 7163339) +Belgický frank (revision 6953531) +Belize (revision 7156055) +Benin (revision 7172640) +Bolívia (revision 7111159) +Botswana (revision 7158699) +Brazília (revision 7177507) +Brettonwoodská menová sústava (revision 6710540) +Brunej (revision 6975045) +Brusel (revision 7037073) +Bulharsko (revision 7177290) +Bulharský lev (revision 6230899) +Bulharčina (revision 7150125) +Burkina (revision 7158783) +Burundi (revision 7049945) +Ceuta (revision 6575679) +Charles Michel (revision 7098830) +Chorvátska kuna (revision 6935490) +Chorvátsko (revision 7131429) +Chorvátčina (revision 7178832) +Clo (revision 6894735) +Cyperská libra (revision 5964697) +Cyprus (revision 7035263) +David-Maria Sassoli (revision 7032560) +David Cameron (revision 7078464) +Demokracia (revision 7049807) +Denis Mukwege (revision 6800186) +Dominika (štát) (revision 7126694) +Dominikánska republika (revision 7080374) +Drachma (novoveké Grécko) (revision 6391564) +Druhá svetová vojna (revision 7151355) +Dunaj (revision 7150320) +Dánska koruna (revision 6125942) +Dánsko (revision 7161625) +Dánčina (revision 6557304) +Džibutsko (revision 7111764) +EHS (revision 6927031) +Eduard Kukan (revision 7079321) +Egypt (revision 7151318) +Ekvádor (revision 7073543) +Ellen Johnsonová- Sirleafová (revision 7151906) +Estónska koruna (revision 6751629) +Estónsko (revision 7148919) == End of Parsed pages == -- Wikipedia parsing ended at: 2021-03-16 20:13:09.022092 +- Wikipedia parsing ended at: 2021-03-21 13:00:32.553701 -64 characters appeared 535286 times. +70 characters appeared 674892 times. -First 46 characters: -[ 0] Char o: 8.787265125559047 % -[ 1] Char a: 8.624174740232323 % -[ 2] Char e: 8.577470735270492 % -[ 3] Char n: 6.100103496074995 % -[ 4] Char i: 5.884891441210867 % -[ 5] Char t: 5.302772723366575 % -[ 6] Char r: 5.02273550961542 % -[ 7] Char s: 4.340670221152805 % -[ 8] Char k: 4.253240323864252 % -[ 9] Char v: 4.073896944810811 % -[10] Char l: 3.6208680966810265 % -[11] Char d: 3.3796886150581185 % -[12] Char m: 3.248356953105443 % -[13] Char p: 2.8470761424733695 % -[14] Char u: 2.6178528861206907 % -[15] Char c: 2.426740097816868 % -[16] Char z: 2.104856095619912 % -[17] Char h: 2.080570013039758 % -[18] Char j: 2.0389100406138025 % -[19] Char á: 1.675926514050433 % -[20] Char b: 1.6690143213160817 % -[21] Char y: 1.6607944164427988 % -[22] Char ý: 1.2490519086992748 % -[23] Char í: 1.1096871578931637 % -[24] Char č: 0.9322119390381964 % -[25] Char é: 0.8785957413420117 % -[26] Char ž: 0.7489454235679618 % -[27] Char ú: 0.702615050645823 % -[28] Char f: 0.6794498641847535 % -[29] Char š: 0.6790762321450589 % -[30] Char g: 0.6219105300717748 % -[31] Char ť: 0.4550838243481055 % -[32] Char ô: 0.38428055282596596 % -[33] Char ľ: 0.3648516867618432 % -[34] Char ó: 0.23090460053130477 % -[35] Char x: 0.22922325635267876 % -[36] Char ň: 0.09434209002290364 % -[37] Char w: 0.08855079340763629 % -[38] Char ä: 0.07005600744275023 % -[39] Char ď: 0.06706695112519288 % -[40] Char q: 0.018121153925191393 % -[41] Char ĺ: 0.010274881091603367 % -[42] Char ě: 0.010274881091603367 % -[43] Char ö: 0.010088065071756034 % -[44] Char ř: 0.007285824774046024 % -[45] Char ŕ: 0.006351744674809354 % +Most Frequent characters: +[ 0] Char a: 8.935503754674821 % +[ 1] Char o: 8.347409659619613 % +[ 2] Char e: 8.052103151319026 % +[ 3] Char n: 6.170320584626874 % +[ 4] Char r: 6.046300741451965 % +[ 5] Char i: 5.852195610556948 % +[ 6] Char s: 5.3632284869282785 % +[ 7] Char k: 4.751278723114217 % +[ 8] Char t: 4.600439774067555 % +[ 9] Char l: 4.167037096305779 % +[10] Char v: 4.090580418792933 % +[11] Char m: 3.1385762462734776 % +[12] Char d: 2.7853345424156752 % +[13] Char u: 2.7336225647955525 % +[14] Char p: 2.6873929458342967 % +[15] Char c: 2.5881178025521123 % +[16] Char á: 2.0701089952170126 % +[17] Char h: 2.0477350450146097 % +[18] Char j: 1.9521641981235516 % +[19] Char b: 1.921344452149381 % +[20] Char z: 1.6398179264237835 % +[21] Char y: 1.3830361005909093 % +[22] Char ý: 1.2827237543192096 % +[23] Char í: 0.8906610242824038 % +[24] Char č: 0.8473948424340486 % +[25] Char é: 0.7884224438873183 % +[26] Char ú: 0.7808656792494206 % +[27] Char g: 0.749897761419605 % +[28] Char f: 0.6475110091688744 % +[29] Char š: 0.6189138410293795 % +[30] Char ž: 0.4720755320851336 % +[31] Char ľ: 0.4089543215803418 % +[32] Char ó: 0.3095310064425123 % +[33] Char ť: 0.24344635882481935 % +[34] Char w: 0.11735210967088068 % +[35] Char ô: 0.10297943967331069 % +[36] Char ä: 0.09142203493299668 % +[37] Char x: 0.08312441101687382 % +[38] Char ň: 0.07201152184349496 % +[39] Char ď: 0.06993711586446424 % +[40] Char q: 0.017187935254825957 % +[41] Char ë: 0.011112889173378852 % +[42] Char ř: 0.010075686183863493 % +[43] Char ü: 0.009186655049993185 % +[44] Char ě: 0.008445795771767926 % +[45] Char ö: 0.007260420926607517 % +[46] Char ĺ: 0.006371389792737208 % +[47] Char ć: 0.006223217937092157 % +[48] Char ŕ: 0.0044451556693515405 % -The first 46 characters have an accumulated ratio of 0.9998617561453131. +The first 49 characters have an accumulated ratio of 0.9998118217433309. -1198 sequences found. +1410 sequences found. -First 512 (typical positive ratio): 0.9724967373205526 -Next 512 (512-1024): 0.007489454235679618 -Rest: 0.00042527339003644096 +First 773 (typical positive ratio): 0.9950030300775062 +Next 277 (1050-773): 0.003999347913144824 +Rest: 0.0009976220093489419 -- Processing end: 2021-03-16 20:13:09.628753 +- Processing end: 2021-03-21 13:00:33.050085 diff --git a/script/BuildLangModelLogs/LangSloveneModel.log b/script/BuildLangModelLogs/LangSloveneModel.log index 9ec9020..3dfbf39 100644 --- a/script/BuildLangModelLogs/LangSloveneModel.log +++ b/script/BuildLangModelLogs/LangSloveneModel.log @@ -1,146 +1,150 @@ = Logs of language model for Slovene (sl) = - Generated by BuildLangModel.py -- Started: 2021-03-16 20:13:09.868611 +- Started: 2021-03-21 12:30:22.611188 - Maximum depth: 4 - Max number of pages: 100 == Parsed pages == -XCOM: Enemy Unknown (revision 5360018) -1UP.com (revision 5138164) -2K Games (revision 4110089) -Android (operacijski sistem) (revision 5423518) -Animator videoigre (revision 5438736) -App Store (revision 4916505) -Artefakt (revision 4871634) -Athlon (revision 5138170) -Avstralazija (revision 5234981) -Avtopsija (revision 5394899) -Bralno-pisalni pomnilnik (revision 5307992) -Civilization (serija) (revision 5138157) -Deus Ex: Human Revolution (revision 5312201) -DirectX (revision 4477913) -Dishonored (revision 5359830) -Edge (magazine) (revision 5356455) -Enoigralska videoigra (revision 5116872) -Eurogamer (revision 5312201) -Evropa (revision 5448355) -Fantasy Flight Games (revision 4649361) -Firaxis Games (revision 4110089) -Francoska narodna knjižnica (revision 4596643) -GameRankings (revision 3934020) -GameSpot (revision 5116871) -GameSpy (revision 5168684) -GameTrailers (revision 5298120) -Game Informer (revision 5360018) -GamesTM (revision 5360018) -Grafična kartica (revision 5374734) -Granata (revision 4837685) -Holografija (revision 4760425) -IGN (revision 5370204) -IOS (revision 5404204) -Igra igranja vlog (revision 4768087) -Igra na deski (revision 5431955) -Igralna konzola (revision 4773547) -Igralni pogon (revision 4771045) -Intel (revision 5366957) -International Standard Book Number (revision 4765322) -Izdelovalec videoigre (revision 5438736) -Joker (revija) (revision 5351778) -Kotaku (revision 4613535) -Kristal (revision 5068718) -Linux (revision 5457231) -Lovec prestreznik (revision 4758667) -MTV (revision 5406174) -Mac OS X (revision 5212452) -Machinima (revision 5295004) -Major (revision 4758895) -Mednarodna različica (revision 5032649) -Metacritic (revision 3934020) -Michael McCann (skladatelj) (revision 5312201) -MicroProse (revision 5116826) -Microsoft Windows (revision 5460799) -Možje v črnem (revision 5262890) -Nezemeljsko življenje (revision 5386002) -Normativna kontrola (revision 5316351) -NowGamer (revision 5363253) -OS X (revision 5212452) -Ognjena ekipa (revision 4694450) -Operacijski sistem (revision 5309675) -Ostrostrelec (revision 4810396) -Pilot (revision 4758828) -PlayStation 3 (revision 5245525) -PlayStation Network (revision 4784984) -PlayStation Vita (revision 5245581) -Procesor (revision 5262718) -Producent videoiger (revision 5368686) -Razvijalec videoiger (revision 5171689) -Računalniška miška (revision 5169871) -Računalniško okolje (revision 5250619) -Severna Amerika (revision 5400891) -Sid Meier (revision 4061487) -Stealth (revision 4618630) -Steam (revision 5171704) -Strateška videoigra (revision 5245834) -Tablični računalnik (revision 5312221) -Telepatija (revision 4846742) -The Bureau: XCOM Declassified (revision 5360018) -The Guardian (revision 5361337) -Trdi disk (revision 5329681) -UFO: Enemy Unknown (revision 5360018) -Unreal Engine (revision 4771045) -Unreal Engine 3 (revision 4771045) -Uporabniški vmesnik (revision 5118420) -Valve Corporation (revision 5168680) -Večigralska videoigra (revision 4618639) -VideoGamer.com (revision 5363253) -Vohunski satelit (revision 5450401) -Vojaška taktika (revision 4759159) +Ljubljana (revision 5468628) +1689 (revision 4230028) +1918 (revision 5249637) +1926 (revision 5456617) +1929 (revision 5444196) +1930. (revision 5118014) +2011 (revision 5469547) +25. junij (revision 5447338) +A1 (radio) (revision 5360678) +ACH Volley (revision 5089458) +AKC Metelkova mesto (revision 5323280) +Abecedarium (revision 5092193) +Academia operosorum Labacensis (revision 5228146) +Adam Bohorič (revision 5414191) +Ajdovščina (revision 5423173) +Albert Kosmač (revision 5368699) +Albin Belar (revision 5197298) +Aleksander Bajt (revision 4917916) +Aleksandrija (revision 5405515) +Aleš Kunaver (revision 5029295) +Alojzij Šuštar (revision 5442498) +Alpe (revision 5464842) +Amsterdam (revision 5359727) +Anastasius Grün (revision 5070788) +Andorra la Vella (revision 5390252) +Andrej Fleischmann (revision 4930149) +Andrej Smole (revision 5467820) +Angela Vode (revision 5466809) +Anica Cevc (revision 5414746) +Anja Bukovec (revision 5041799) +Anton Aleksander Auersperg (revision 5070788) +Anton Alojzij Wolf (revision 5361749) +Anton Bitenc (revision 5463597) +Anton Bonaventura Jeglič (revision 5414522) +Anton Cerar (revision 5376771) +Anton Codelli (izumitelj) (revision 5161385) +Anton Foerster (revision 5270593) +Anton Gvajc (revision 5035801) +Anton Lajovic (revision 4867406) +Anton Melik (revision 5272303) +Anton Ocvirk (revision 5470942) +Anton Peterlin (revision 4979305) +Anton Stres (revision 5464457) +Anton Tomaž Linhart (revision 5413399) +Anton Verovšek (revision 5412417) +Anton Vodnik (revision 5180239) +Anton Šivic (revision 5410565) +Antwerpen (revision 5375367) +Arena Stožice (revision 5462141) +Argentinski park, Ljubljana (revision 5398130) +Argonavti (revision 5425545) +Arne Hodalič (revision 5417283) +Art nouveau (revision 5371096) +Ateizem (revision 5427207) +Atene (revision 5360039) +Ati Soss (revision 5463553) +Atila (revision 5425308) +Avgusta Danilova (revision 4788392) +Avstro-Ogrska (revision 5431606) +Avtobusna postaja Ljubljana (revision 4479008) +Avtocesta A1 (revision 5292269) +Avtocesta A2 (revision 5387166) +Aškerčeva cesta, Ljubljana (revision 4578067) +BTC (revision 5450525) +Bajer (potok) (revision 5147457) +Bakrorez (revision 5375208) +Bangkok (revision 5378204) +Barje (revision 5180470) +Barok (revision 5463042) +Bejrut (revision 5356724) +Benetke (revision 5424094) +Beograd (revision 5448139) +Berlin (revision 5435344) +Bern (revision 5466493) +Biblija (revision 5404188) +Bicike(lj) (revision 5468628) +Bine Rogelj (revision 5086972) +Biodiverziteta (revision 5352270) +Bizoviški potok (revision 5305268) +Bled (revision 5469179) +Bleiweisova cesta, Ljubljana (revision 5184903) +Bogo Grafenauer (revision 5311308) +Bogota (revision 5363243) +Bojan Adamič (revision 5409135) +Bojan Čop (revision 5247252) +Bojan Štih (revision 5305724) +Boris Kobe (revision 5296972) +Boris Sket (revision 5413264) +Borut Lesjak (revision 5273043) +Botanični vrt, Ljubljana (revision 5142111) +Botanični vrt Ljubljana (revision 5142111) +Botanični vrt Univerze v Ljubljani (revision 5142111) +Bovec (revision 5330651) +Boštjan Putrih (revision 5124433) +Boštjan Žekš (revision 5415317) +Božena Ravnihar (revision 5415042) +Božo Vodušek (revision 5122962) +Božo Škerlj (revision 5268384) == End of Parsed pages == -- Wikipedia parsing ended at: 2021-03-16 20:20:05.416719 +- Wikipedia parsing ended at: 2021-03-21 12:38:56.631283 -41 characters appeared 318060 times. +57 characters appeared 519434 times. -First 29 characters: -[ 0] Char a: 10.018235553040308 % -[ 1] Char e: 9.988995786958435 % -[ 2] Char i: 9.602590706156072 % -[ 3] Char o: 9.246054203609381 % -[ 4] Char n: 7.188580770923725 % -[ 5] Char r: 5.758976293781048 % -[ 6] Char s: 4.588442432245488 % -[ 7] Char t: 4.5786958435515315 % -[ 8] Char l: 4.357668364459536 % -[ 9] Char j: 4.260202477519965 % -[10] Char v: 3.809344148902723 % -[11] Char p: 3.4980821228698984 % -[12] Char k: 3.4751304785260646 % -[13] Char d: 3.143746462931522 % -[14] Char m: 2.928692699490662 % -[15] Char z: 2.332893164811671 % -[16] Char u: 1.9908193422624663 % -[17] Char g: 1.9298245614035088 % -[18] Char b: 1.5607118153807458 % -[19] Char c: 1.2903225806451613 % -[20] Char h: 1.2145507137018172 % -[21] Char č: 1.1001068980695468 % -[22] Char š: 0.6841476450984091 % -[23] Char ž: 0.44331258253159783 % -[24] Char f: 0.4203609381877633 % -[25] Char w: 0.2021631138778847 % -[26] Char y: 0.19618939822674966 % -[27] Char x: 0.16726403823178018 % -[28] Char q: 0.011004213041564485 % +Most Frequent characters: +[ 0] Char e: 10.223242991409881 % +[ 1] Char a: 10.130257164529084 % +[ 2] Char i: 8.972265966417293 % +[ 3] Char o: 8.507144314773388 % +[ 4] Char n: 7.334329289187846 % +[ 5] Char r: 5.438226993227244 % +[ 6] Char s: 5.162157271183634 % +[ 7] Char l: 5.052614961669818 % +[ 8] Char t: 4.829679997843807 % +[ 9] Char j: 4.445223069725894 % +[10] Char v: 4.3826549667522725 % +[11] Char k: 3.543664835185991 % +[12] Char d: 3.1351432520782234 % +[13] Char p: 2.8430945991213514 % +[14] Char m: 2.7564618411578756 % +[15] Char u: 2.3069340859473964 % +[16] Char z: 2.0064146744340956 % +[17] Char b: 1.937300985303234 % +[18] Char g: 1.6027060223242993 % +[19] Char h: 1.1235306121663196 % +[20] Char č: 1.0794441642249064 % +[21] Char c: 1.048256371358055 % +[22] Char š: 0.9687467512715764 % +[23] Char ž: 0.5263421339380941 % +[24] Char f: 0.41391206582549467 % -The first 29 characters have an accumulated ratio of 0.9998710935043701. +The first 25 characters have an accumulated ratio of 0.9976974938105708. -698 sequences found. +880 sequences found. -First 512 (typical positive ratio): 0.998296272473889 -Next 512 (512-1024): 0.004433125825315978 -Rest: -2.8189256484623115e-17 +First 449 (typical positive ratio): 0.9950499684040537 +Next 172 (621-449): 0.003957684836286113 +Rest: 0.000992346759660201 -- Processing end: 2021-03-16 20:20:05.900813 +- Processing end: 2021-03-21 12:38:56.993560 diff --git a/script/langs/sk.py b/script/langs/sk.py index c670839..26ba2c8 100644 --- a/script/langs/sk.py +++ b/script/langs/sk.py @@ -58,10 +58,8 @@ charsets = ['ISO-8859-2', 'Windows-1250', 'IBM852', 'MAC-CENTRALEUROPE'] # If use_ascii=True, there is no need to add any ASCII characters. # If case_mapping=True, there is no need to add several cases of a same # character (provided Python algorithms know the right cases). -alphabet = 'áäčďĺľňóŕšťúýž' -# The starred page which was rewarded on the main page when I created -# the data. -start_pages = ['Dôkaz (matematika)'] +alphabet = 'áäčďéíĺľňóôŕšťúýž' +start_pages = ['Európska_únia'] # give possibility to select another code for the Wikipedia URL. wikipedia_code = code # 'a' and 'A' will be considered the same character, and so on. diff --git a/script/langs/sl.py b/script/langs/sl.py index bf02bf8..7f8a1c1 100644 --- a/script/langs/sl.py +++ b/script/langs/sl.py @@ -44,16 +44,20 @@ import re name = 'Slovene' code = 'sl' -use_ascii = True +# ASCII is used except q and w, x and y according to Wikipedia. +use_ascii = False charsets = ['ISO-8859-2', 'ISO-8859-16', 'Windows-1250', 'IBM852', 'MAC-CENTRALEUROPE'] ## Optional Properties ## # Alphabet characters. -alphabet = 'čšž' -# The starred page which was rewarded on the main page when I created -# the data. -start_pages = ['XCOM: Enemy Unknown'] +# XXX According to Wikipedia there are 2 incompatible diacritics +# systems, but both seem barely used on Wikipedia (even though I can see +# some usage here or there, on some titles), so I assume these don't +# exist. Maybe this would have to be separate models for variants of +# the language. +alphabet = 'abcčdefghijklmnoprsštuvzž' +start_pages = ['Ljubljana'] wikipedia_code = code case_mapping = True diff --git a/src/LangModels/LangSlovakModel.cpp b/src/LangModels/LangSlovakModel.cpp index ffc3410..45859f0 100644 --- a/src/LangModels/LangSlovakModel.cpp +++ b/src/LangModels/LangSlovakModel.cpp @@ -42,7 +42,7 @@ /** * Generated by BuildLangModel.py - * On: 2021-03-16 20:13:09.022988 + * On: 2021-03-21 13:00:32.554155 **/ /* Character Mapping Table: @@ -68,18 +68,18 @@ static const unsigned char Iso_8859_2_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 1, 20, 15, 11, 2, 28, 30, 17, 4, 18, 8, 10, 12, 3, 0, /* 4X */ - 13, 40, 6, 7, 5, 14, 9, 37, 35, 21, 16,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 1, 20, 15, 11, 2, 28, 30, 17, 4, 18, 8, 10, 12, 3, 0, /* 6X */ - 13, 40, 6, 7, 5, 14, 9, 37, 35, 21, 16,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 0, 19, 15, 12, 2, 28, 27, 17, 5, 18, 7, 9, 11, 3, 1, /* 4X */ + 14, 40, 4, 6, 8, 13, 10, 34, 37, 21, 20,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 19, 15, 12, 2, 28, 27, 17, 5, 18, 7, 9, 11, 3, 1, /* 6X */ + 14, 40, 4, 6, 8, 13, 10, 34, 37, 21, 20,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM, 64,SYM, 49,SYM, 33, 65,SYM,SYM, 29, 59, 31, 66,SYM, 26, 61, /* AX */ - SYM, 67,SYM, 49,SYM, 33, 68,SYM,SYM, 29, 59, 31, 69,SYM, 26, 61, /* BX */ - 45, 19, 70, 55, 38, 41, 47, 50, 24, 25, 57, 51, 42, 23, 71, 39, /* CX */ - 53, 54, 36, 34, 32, 62, 43,SYM, 44, 48, 27, 56, 46, 22, 72, 60, /* DX */ - 45, 19, 73, 55, 38, 41, 47, 50, 24, 25, 57, 51, 42, 23, 74, 39, /* EX */ - 53, 54, 36, 34, 32, 62, 43,SYM, 44, 48, 27, 56, 46, 22, 75,SYM, /* FX */ + SYM, 70,SYM, 50,SYM, 31, 58,SYM,SYM, 29, 54, 33, 71,SYM, 30, 63, /* AX */ + SYM, 72,SYM, 50,SYM, 31, 58,SYM,SYM, 29, 54, 33, 73,SYM, 30, 63, /* BX */ + 48, 16, 66, 51, 36, 46, 47, 49, 24, 25, 56, 41, 44, 23, 61, 39, /* CX */ + 57, 53, 38, 32, 35, 55, 45,SYM, 42, 52, 26, 74, 43, 22, 68, 59, /* DX */ + 48, 16, 66, 51, 36, 46, 47, 49, 24, 25, 56, 41, 44, 23, 61, 39, /* EX */ + 57, 53, 38, 32, 35, 55, 45,SYM, 42, 52, 26, 75, 43, 22, 68,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -89,18 +89,18 @@ static const unsigned char Windows_1250_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 1, 20, 15, 11, 2, 28, 30, 17, 4, 18, 8, 10, 12, 3, 0, /* 4X */ - 13, 40, 6, 7, 5, 14, 9, 37, 35, 21, 16,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 1, 20, 15, 11, 2, 28, 30, 17, 4, 18, 8, 10, 12, 3, 0, /* 6X */ - 13, 40, 6, 7, 5, 14, 9, 37, 35, 21, 16,SYM,SYM,SYM,SYM,CTR, /* 7X */ - SYM,ILL,SYM,ILL,SYM,SYM,SYM,SYM,ILL,SYM, 29,SYM, 76, 31, 26, 77, /* 8X */ - ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM, 29,SYM, 78, 31, 26, 79, /* 9X */ - SYM,SYM,SYM, 49,SYM, 80,SYM,SYM,SYM,SYM, 59,SYM,SYM,SYM,SYM, 61, /* AX */ - SYM,SYM,SYM, 49,SYM,SYM,SYM,SYM,SYM, 81, 59,SYM, 33,SYM, 33, 61, /* BX */ - 45, 19, 82, 55, 38, 41, 47, 50, 24, 25, 57, 51, 42, 23, 83, 39, /* CX */ - 53, 54, 36, 34, 32, 62, 43,SYM, 44, 48, 27, 56, 46, 22, 84, 60, /* DX */ - 45, 19, 85, 55, 38, 41, 47, 50, 24, 25, 57, 51, 42, 23, 86, 39, /* EX */ - 53, 54, 36, 34, 32, 62, 43,SYM, 44, 48, 27, 56, 46, 22, 87,SYM, /* FX */ + SYM, 0, 19, 15, 12, 2, 28, 27, 17, 5, 18, 7, 9, 11, 3, 1, /* 4X */ + 14, 40, 4, 6, 8, 13, 10, 34, 37, 21, 20,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 19, 15, 12, 2, 28, 27, 17, 5, 18, 7, 9, 11, 3, 1, /* 6X */ + 14, 40, 4, 6, 8, 13, 10, 34, 37, 21, 20,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM,ILL,SYM,ILL,SYM,SYM,SYM,SYM,ILL,SYM, 29,SYM, 58, 33, 30, 76, /* 8X */ + ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM, 29,SYM, 58, 33, 30, 77, /* 9X */ + SYM,SYM,SYM, 50,SYM, 78,SYM,SYM,SYM,SYM, 54,SYM,SYM,SYM,SYM, 63, /* AX */ + SYM,SYM,SYM, 50,SYM,SYM,SYM,SYM,SYM, 79, 54,SYM, 31,SYM, 31, 63, /* BX */ + 48, 16, 66, 51, 36, 46, 47, 49, 24, 25, 56, 41, 44, 23, 61, 39, /* CX */ + 57, 53, 38, 32, 35, 55, 45,SYM, 42, 52, 26, 80, 43, 22, 68, 59, /* DX */ + 48, 16, 66, 51, 36, 46, 47, 49, 24, 25, 56, 41, 44, 23, 61, 39, /* EX */ + 57, 53, 38, 32, 35, 55, 45,SYM, 42, 52, 26, 81, 43, 22, 68,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -110,18 +110,18 @@ static const unsigned char Ibm852_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 1, 20, 15, 11, 2, 28, 30, 17, 4, 18, 8, 10, 12, 3, 0, /* 4X */ - 13, 40, 6, 7, 5, 14, 9, 37, 35, 21, 16,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 1, 20, 15, 11, 2, 28, 30, 17, 4, 18, 8, 10, 12, 3, 0, /* 6X */ - 13, 40, 6, 7, 5, 14, 9, 37, 35, 21, 16,SYM,SYM,SYM,SYM,CTR, /* 7X */ - 50, 46, 25, 88, 38, 48, 47, 50, 49, 51, 62, 62, 89, 90, 38, 47, /* 8X */ - 25, 41, 41, 32, 43, 33, 33, 91, 92, 43, 46, 31, 31, 49,SYM, 24, /* 9X */ - 19, 23, 34, 27, 93, 94, 26, 26, 57, 57,SYM, 95, 24, 59,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM, 19, 96, 42, 59,SYM,SYM,SYM,SYM, 61, 61,SYM, /* BX */ - SYM,SYM,SYM,SYM,SYM,SYM, 55, 55,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* CX */ - 53, 53, 39, 51, 39, 36, 23, 97, 42,SYM,SYM,SYM,SYM, 98, 48,SYM, /* DX */ - 34, 60, 32, 54, 54, 36, 29, 29, 45, 27, 45, 56, 22, 22, 99,SYM, /* EX */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 56, 44, 44,SYM,SYM, /* FX */ + SYM, 0, 19, 15, 12, 2, 28, 27, 17, 5, 18, 7, 9, 11, 3, 1, /* 4X */ + 14, 40, 4, 6, 8, 13, 10, 34, 37, 21, 20,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 19, 15, 12, 2, 28, 27, 17, 5, 18, 7, 9, 11, 3, 1, /* 6X */ + 14, 40, 4, 6, 8, 13, 10, 34, 37, 21, 20,SYM,SYM,SYM,SYM,CTR, /* 7X */ + 49, 43, 25, 66, 36, 52, 47, 49, 50, 41, 55, 55, 61, 82, 36, 47, /* 8X */ + 25, 46, 46, 35, 45, 31, 31, 58, 58, 45, 43, 33, 33, 50,SYM, 24, /* 9X */ + 16, 23, 32, 26, 83, 84, 30, 30, 56, 56,SYM, 85, 24, 54,SYM,SYM, /* AX */ + SYM,SYM,SYM,SYM,SYM, 16, 66, 44, 54,SYM,SYM,SYM,SYM, 63, 63,SYM, /* BX */ + SYM,SYM,SYM,SYM,SYM,SYM, 51, 51,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* CX */ + 57, 57, 39, 41, 39, 38, 23, 61, 44,SYM,SYM,SYM,SYM, 68, 52,SYM, /* DX */ + 32, 59, 35, 53, 53, 38, 29, 29, 48, 26, 48, 86, 22, 22, 68,SYM, /* EX */ + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 87, 42, 42,SYM,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -131,140 +131,148 @@ static const unsigned char Mac_Centraleurope_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 1, 20, 15, 11, 2, 28, 30, 17, 4, 18, 8, 10, 12, 3, 0, /* 4X */ - 13, 40, 6, 7, 5, 14, 9, 37, 35, 21, 16,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 1, 20, 15, 11, 2, 28, 30, 17, 4, 18, 8, 10, 12, 3, 0, /* 6X */ - 13, 40, 6, 7, 5, 14, 9, 37, 35, 21, 16,SYM,SYM,SYM,SYM,CTR, /* 7X */ - 38, 63, 63, 25,100, 43, 46, 19,101, 24, 38, 24, 47, 47, 25,102, /* 8X */ - 103, 39, 23, 39,104,105,106, 34,107, 32, 43,108, 27, 42, 42, 46, /* 9X */ - SYM,SYM, 57,SYM,SYM,SYM,SYM, 60,SYM,SYM,SYM, 57,SYM,SYM,109,110, /* AX */ - 111, 58,SYM,SYM, 58,112,SYM,SYM, 49,113,114, 33, 33, 41, 41,115, /* BX */ - 116, 54,SYM,SYM, 54, 36,SYM,SYM,SYM,SYM,SYM, 36, 62,117, 62, 52, /* CX */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 52, 45, 45, 44,SYM,SYM, 44,118, /* DX */ - 119, 29,SYM,SYM, 29,120,121, 19, 31, 31, 23, 26, 26,122, 34, 32, /* EX */ - 123, 48, 27, 48, 56, 56,124,125, 22, 22,126, 61, 49, 61,127,SYM, /* FX */ + SYM, 0, 19, 15, 12, 2, 28, 27, 17, 5, 18, 7, 9, 11, 3, 1, /* 4X */ + 14, 40, 4, 6, 8, 13, 10, 34, 37, 21, 20,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 0, 19, 15, 12, 2, 28, 27, 17, 5, 18, 7, 9, 11, 3, 1, /* 6X */ + 14, 40, 4, 6, 8, 13, 10, 34, 37, 21, 20,SYM,SYM,SYM,SYM,CTR, /* 7X */ + 36, 69, 69, 25, 88, 45, 43, 16, 89, 24, 36, 24, 47, 47, 25, 90, /* 8X */ + 91, 39, 23, 39, 67, 67, 92, 32, 93, 35, 45, 60, 26, 44, 44, 43, /* 9X */ + SYM,SYM, 56,SYM,SYM,SYM,SYM, 59,SYM,SYM,SYM, 56,SYM,SYM, 94, 95, /* AX */ + 96, 64,SYM,SYM, 64, 97,SYM,SYM, 50, 98, 99, 31, 31, 46, 46,100, /* BX */ + 101, 53,SYM,SYM, 53, 38,SYM,SYM,SYM,SYM,SYM, 38, 55, 60, 55, 62, /* CX */ + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 62, 48, 48, 42,SYM,SYM, 42,102, /* DX */ + 103, 29,SYM,SYM, 29, 58, 58, 16, 33, 33, 23, 30, 30, 65, 32, 35, /* EX */ + 65, 52, 26, 52,104,105,106,107, 22, 22,108, 63, 50, 63,109,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ -static const int Unicode_Char_size = 92; +static const int Unicode_Char_size = 98; static const unsigned int Unicode_CharOrder[] = { - 65, 1, 66, 20, 67, 15, 68, 11, 69, 2, 70, 28, 71, 30, 72, 17, - 73, 4, 74, 18, 75, 8, 76, 10, 77, 12, 78, 3, 79, 0, 80, 13, - 81, 40, 82, 6, 83, 7, 84, 5, 85, 14, 86, 9, 87, 37, 88, 35, - 89, 21, 90, 16, 97, 1, 98, 20, 99, 15, 100, 11, 101, 2,102, 28, - 103, 30, 104, 17, 105, 4, 106, 18, 107, 8, 108, 10, 109, 12,110, 3, - 111, 0, 112, 13, 113, 40, 114, 6, 115, 7, 116, 5, 117, 14,118, 9, - 119, 37, 120, 35, 121, 21, 122, 16, 193, 19, 196, 38, 201, 25,205, 23, - 211, 34, 212, 32, 214, 43, 218, 27, 221, 22, 225, 19, 228, 38,233, 25, - 237, 23, 243, 34, 244, 32, 246, 43, 250, 27, 253, 22, 268, 24,269, 24, - 270, 39, 271, 39, 282, 42, 283, 42, 313, 41, 314, 41, 317, 33,318, 33, - 327, 36, 328, 36, 340, 45, 341, 45, 344, 44, 345, 44, 352, 29,353, 29, - 356, 31, 357, 31, 381, 26, 382, 26, + 65, 0, 66, 19, 67, 15, 68, 12, 69, 2, 70, 28, 71, 27, 72, 17, + 73, 5, 74, 18, 75, 7, 76, 9, 77, 11, 78, 3, 79, 1, 80, 14, + 81, 40, 82, 4, 83, 6, 84, 8, 85, 13, 86, 10, 87, 34, 88, 37, + 89, 21, 90, 20, 97, 0, 98, 19, 99, 15, 100, 12, 101, 2,102, 28, + 103, 27, 104, 17, 105, 5, 106, 18, 107, 7, 108, 9, 109, 11,110, 3, + 111, 1, 112, 14, 113, 40, 114, 4, 115, 6, 116, 8, 117, 13,118, 10, + 119, 34, 120, 37, 121, 21, 122, 20, 193, 16, 196, 36, 201, 25,203, 41, + 205, 23, 211, 32, 212, 35, 214, 45, 218, 26, 220, 43, 221, 22,225, 16, + 228, 36, 233, 25, 235, 41, 237, 23, 243, 32, 244, 35, 246, 45,250, 26, + 252, 43, 253, 22, 262, 47, 263, 47, 268, 24, 269, 24, 270, 39,271, 39, + 282, 44, 283, 44, 313, 46, 314, 46, 317, 31, 318, 31, 327, 38,328, 38, + 340, 48, 341, 48, 344, 42, 345, 42, 352, 29, 353, 29, 356, 33,357, 33, + 381, 30, 382, 30, }; /* Model Table: - * Total sequences: 1198 - * First 512 sequences: 0.9724967373205526 - * Next 512 sequences (512-1024): 0.02707798928941092 - * Rest: 0.00042527339003644096 + * Total considered sequences: 1410 / 2401 + * - Positive sequences: first 773 (0.9950030300775062) + * - Probable sequences: next 277 (1050-773) (0.003999347913144824) + * - Neutral sequences: last 1351 (0.0009976220093489419) + * - Negative sequences: 991 (off-ratio) * Negative sequences: TODO */ static const PRUint8 SlovakLangModel[] = { - 2,2,2,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,0, - 0,3,2,3,1,3,2,3,1,0,3,0,2,3,2,0,1,2,0,0,0,1,0, - 2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,0, - 0,3,0,3,0,3,3,3,3,0,2,1,3,2,2,0,2,2,0,0,0,2,0, - 3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0, - 2,3,0,3,2,3,3,3,2,0,3,3,3,3,2,0,3,2,0,0,1,0,0, - 3,3,3,3,3,3,2,3,3,2,2,3,2,2,3,3,2,2,2,3,2,3,3, - 3,3,3,2,3,2,3,3,0,2,0,2,0,0,2,0,0,2,0,2,0,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0, - 3,3,2,3,0,3,3,3,3,0,2,3,2,2,2,0,0,2,0,0,1,2,0, - 3,3,3,3,3,2,3,3,3,3,3,1,3,2,3,2,3,3,2,3,3,3,3, - 3,2,3,0,3,2,2,2,1,0,2,3,0,2,1,2,2,1,1,2,2,0,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,2,2,3,3,3,3, - 3,3,3,2,2,2,2,3,2,2,0,3,2,2,2,2,0,2,0,0,2,0,0, - 3,3,3,3,3,3,2,3,3,3,3,2,3,3,3,3,2,2,2,3,2,3,2, - 3,2,2,1,3,2,0,2,3,2,2,1,0,2,2,0,0,2,0,0,1,0,0, - 3,3,3,2,3,3,3,2,2,3,3,3,2,2,3,3,2,2,2,3,2,3,3, - 2,2,3,2,3,2,2,2,0,3,2,2,0,2,2,0,0,0,0,0,2,2,1, - 3,3,3,3,3,2,3,3,2,0,3,3,1,3,3,2,3,2,0,3,2,3,3, - 3,2,3,2,2,1,3,2,0,2,0,0,0,2,0,3,2,0,2,2,1,2,2, - 3,3,3,3,3,3,2,3,3,2,3,2,2,2,3,2,2,2,2,3,2,3,3, - 3,2,3,2,3,2,2,3,0,1,0,3,0,2,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,2,3,3,3,3,3,2,3,3,3,3,3,2,2,3,2,3,3, - 3,2,2,2,2,2,2,2,0,3,3,2,2,2,2,0,0,0,2,2,0,2,0, - 3,3,3,3,3,3,2,3,2,0,2,2,3,3,3,2,0,0,2,3,3,3,2, - 3,1,2,2,2,0,0,1,0,3,0,2,1,1,0,3,0,1,0,0,1,0,2, - 3,3,3,3,3,3,3,2,2,0,3,2,2,2,3,2,1,2,0,3,2,2,2, - 3,1,2,0,2,2,2,0,2,3,1,2,0,2,0,2,0,0,2,2,0,2,0, - 2,2,2,3,2,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,2,0, - 2,3,1,3,1,2,3,3,2,0,2,0,2,1,2,0,2,2,0,0,0,0,0, - 3,3,3,3,3,2,2,2,3,2,2,2,1,0,3,2,2,3,0,2,0,2,1, - 3,0,2,0,3,1,0,2,2,0,0,2,0,2,0,0,0,2,0,0,0,0,0, - 3,3,3,3,3,2,3,3,3,3,3,3,3,2,3,2,2,3,2,3,2,3,3, - 2,1,2,1,2,0,2,2,2,0,2,2,0,2,2,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,2,2,2,3,2,3,1,3,2,0,2,1,3,2,3,3, - 2,1,2,0,2,1,2,0,0,0,2,1,0,2,2,1,0,0,2,0,2,1,2, - 3,3,3,3,3,3,2,3,2,3,2,2,3,2,3,2,2,2,2,2,2,0,0, - 3,2,0,0,3,0,3,2,0,0,2,2,0,1,0,1,0,0,0,0,0,0,0, - 0,0,0,3,0,3,3,3,3,3,3,3,3,3,2,3,3,2,3,0,3,0,0, - 0,2,0,2,0,0,3,1,0,0,2,0,0,2,0,0,2,0,0,0,0,2,0, - 3,3,3,3,3,2,3,3,2,2,3,3,2,0,3,2,2,2,3,3,2,3,2, - 2,2,2,1,2,0,2,2,0,1,0,1,0,2,0,1,0,0,0,0,1,2,0, - 2,2,2,3,2,3,3,3,3,3,3,2,3,3,2,3,3,3,2,0,3,0,0, - 0,2,0,2,2,1,3,2,3,0,1,1,0,2,1,0,0,0,0,0,0,0,0, - 0,0,0,2,0,2,3,2,2,3,2,2,3,2,0,3,3,2,2,0,2,0,0, - 0,2,0,1,0,0,2,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0, - 0,2,0,3,0,3,3,3,3,3,3,2,3,3,0,3,2,2,2,0,2,0,0, - 0,2,0,2,0,0,3,1,2,0,2,0,0,1,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,2,2,3,0,3,0,2,0,2,2,0,0,0,2,0,0,0, - 3,0,1,1,1,0,3,0,0,0,0,2,0,2,0,0,0,0,0,0,0,0,0, - 2,0,2,2,0,3,3,2,2,2,2,2,3,2,0,3,3,3,0,0,1,0,0, - 0,2,0,0,0,2,2,2,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, - 3,3,3,3,3,0,1,2,2,1,2,3,0,0,3,0,0,0,1,0,2,0,0, - 3,0,2,0,0,0,2,0,0,0,0,0,0,2,0,0,2,0,0,0,0,0,0, - 1,0,0,3,0,3,3,3,2,3,3,3,2,3,0,3,3,2,0,0,2,0,0, - 0,3,0,2,0,2,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,2,3,2,3,1,2,1,2,0,2,0,3,0,0,1,0,2,0,3,0, - 2,0,2,0,1,2,1,2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,0,2,2,2,3,1,2,3,2,0,0,2,0,1,2,0,0, - 3,2,1,0,1,0,2,0,2,0,2,0,0,1,0,0,0,0,0,0,0,0,0, - 3,3,3,2,3,2,3,2,0,2,3,2,2,1,3,1,0,2,1,2,0,2,0, - 0,0,2,0,2,0,0,2,0,0,0,2,0,0,1,0,0,0,0,0,2,0,0, - 3,3,0,0,1,0,2,0,1,0,0,2,0,1,2,0,0,0,0,2,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,2,3,3,3,3,3,2,1,0,0,2,2,0,0,0,2,0,0, - 0,0,0,3,0,0,1,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,0,3,0,0,0,2,3,2,0,0,2,0,3,0,1,1,0,0,2,0,0, - 0,2,0,0,2,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,3,0,2,3,2,1,1,2,3,3,3,0,1,2,0,0,0,3,0,0, - 0,0,0,1,0,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 2,2,2,2,3,3,0,0,1,0,2,0,2,2,2,0,0,1,0,0,0,1,0, - 1,0,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0, - 3,3,0,0,0,0,0,0,0,0,0,1,2,0,2,0,0,2,0,2,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 2,2,2,1,3,2,0,2,1,0,2,0,1,0,0,0,0,2,0,0,1,1,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0, - 0,0,0,2,0,2,1,0,1,0,0,2,0,0,0,2,2,0,0,0,2,0,0, - 0,3,0,0,0,0,0,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0, - 2,3,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0, - 0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,2,0,0,1,0,0,0,0,2,0,0,0,0,2,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,2,0,0, - 0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, - 0,0,0,2,0,2,2,0,2,0,2,0,1,0,0,1,2,0,2,0,0,0,0, - 0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,2,0,2,2,2,0,0,2,2,2,0,0,0,0,2,0,0,1,0,0, - 0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,2,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0, - 2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,2,0,0,0,0,0,0,1,0,0,0,0,2,0,0,0,0,0, - 0,0,0,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,0,2, + 3,0,3,3,3,3,3,3,1,3,3,0,0,3,3,3,2,1,2,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,0,1, + 3,2,0,3,3,3,3,3,0,0,3,0,0,3,3,3,1,1,2,1,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,1, + 3,0,3,3,3,3,3,3,2,3,3,0,0,3,3,3,1,0,1,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3, + 3,3,3,3,3,3,3,0,3,0,3,1,1,1,0,1,1,2,0,1,3,1,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3, + 3,3,3,3,3,3,3,0,3,2,2,3,0,2,2,0,2,2,0,2,0,2,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,0,3, + 3,3,2,3,3,3,3,2,3,3,2,0,1,3,1,0,3,1,3,0,0,0,0,3,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3, + 2,3,3,3,3,0,1,2,2,3,3,3,1,0,2,0,2,2,0,1,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,3,2,1,1,2,3,3,3, + 2,3,3,1,2,0,1,3,3,0,2,3,1,0,3,0,0,2,2,1,0,2,1,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,2,3,1,3,3,3,1,3,3,3,3,3, + 2,3,3,2,2,2,0,0,3,1,2,1,1,1,2,2,0,2,2,1,2,1,1,0,1, + 3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3, + 3,3,3,3,3,3,3,1,3,0,2,2,2,0,2,1,2,0,0,1,0,1,0,0,0, + 3,3,3,3,3,3,3,3,3,3,1,2,3,3,3,3,3,2,2,3,3,3,3,3, + 3,3,3,2,1,3,2,0,1,0,0,3,3,0,2,3,0,1,0,0,2,1,0,0,2, + 3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,1,2,3,2,3,2,3, + 2,3,3,1,2,1,2,0,2,0,1,3,3,1,1,0,0,1,0,2,1,0,0,0,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 2,3,3,3,2,3,3,3,3,0,3,3,1,0,3,0,0,2,2,1,2,1,3,0,1, + 3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,0,1, + 3,1,0,3,3,3,3,2,1,2,2,0,0,3,1,3,2,1,1,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,0,1,2,3,3,3,3,3,1,2,0,3,1,3, + 2,3,3,0,2,3,0,1,1,2,0,3,3,0,2,0,0,2,0,1,0,1,2,0,0, + 3,3,3,3,3,3,3,3,3,3,2,1,2,3,1,3,2,3,1,2,3,3,0,3, + 0,3,3,2,2,0,0,0,1,2,0,0,0,0,1,0,3,1,0,0,0,0,0,0,0, + 0,1,1,3,3,1,3,3,3,3,3,3,3,2,3,3,0,3,3,3,3,0,0,0, + 3,0,0,2,1,3,3,3,0,1,0,0,0,0,2,1,0,0,2,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,2,3,1,2,2,0,3,3,2, + 2,3,3,1,1,3,0,3,2,0,2,2,2,0,2,0,2,1,0,2,0,2,2,0,2, + 3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,2,3,3,2,3,3,1,0,3, + 3,1,3,0,1,3,2,1,2,1,0,0,1,0,1,0,0,2,0,2,0,1,0,0,0, + 3,3,3,3,3,3,3,3,2,3,2,3,3,3,1,3,3,2,3,3,2,3,3,3, + 3,3,2,2,0,2,1,2,1,0,2,1,0,0,1,0,0,1,2,1,1,1,1,0,0, + 3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 2,3,3,1,0,3,0,2,3,3,2,0,0,0,2,0,0,1,0,1,0,0,0,0,0, + 3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,1,3,3,3,3,1,0,0, + 3,0,2,2,2,3,3,2,0,3,1,0,0,0,3,0,1,0,0,0,0,2,0,0,0, + 0,0,0,3,3,0,3,3,3,2,3,3,2,0,2,3,0,2,1,3,3,0,0,0, + 2,0,0,0,0,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 1,2,0,3,3,1,3,3,3,3,3,3,3,0,3,3,0,3,3,3,3,0,0,0, + 3,0,0,2,1,3,3,2,0,3,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0, + 3,3,3,3,1,3,3,3,3,3,1,2,0,3,0,2,2,1,0,0,0,0,0,3, + 0,0,1,0,0,3,0,1,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0, + 0,2,1,3,3,0,2,3,3,3,2,3,3,1,2,3,0,3,1,1,3,0,0,0, + 2,0,0,2,2,2,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0, + 1,2,0,3,3,0,3,3,3,3,3,3,3,0,3,3,0,3,1,3,3,0,0,0, + 3,0,0,0,2,3,3,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,2,3,3,2,2,2,3,0,0,3,3,2,2,0,3,0,1, + 0,2,2,3,1,0,0,1,3,0,2,0,0,0,0,0,0,1,0,2,0,2,0,0,0, + 3,3,3,2,3,3,2,3,3,3,1,2,1,3,2,1,3,0,1,1,0,3,1,3, + 1,3,1,2,3,1,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,2,3,3,3,3,3,3,2,0,3,3,0,2,2,0,1,0,0,0,3, + 2,2,1,0,0,3,0,2,1,3,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0, + 3,3,3,3,1,3,3,3,0,1,0,2,3,3,0,0,2,0,1,3,0,0,0,3, + 0,2,1,0,0,3,0,0,0,0,0,0,0,0,2,2,0,0,0,0,0,0,0,0,0, + 3,3,2,3,1,0,3,3,2,0,2,3,1,3,0,1,0,1,1,3,0,0,0,0, + 1,0,3,2,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,3,3,1,2,1,3,2,1,3,3,0,3,2,0,1,2,3,3,0,0,0, + 0,0,0,3,2,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,0,1,2,2,0,1,1,0,0,1,2,2,0,0,1,1,0,1,0,0,0,0, + 1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,2,3,3,3,1,2,2,0,1,1,1,0,1,0,2,0,1,0,2,0,1, + 0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,2,0,0,0,0,0, + 0,0,0,0,3,0,3,2,2,3,3,2,3,0,1,2,0,1,2,3,3,0,0,0, + 1,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,1,0,2,2,1,2,2,3,0,1,0,2,0,0,1,0,1,0,1,3,0,0,0, + 3,0,0,1,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0, + 3,2,3,2,0,3,0,2,3,0,2,1,0,2,3,0,0,0,0,0,0,2,0,1, + 0,0,0,0,2,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0, + 3,3,0,0,0,0,1,0,0,0,0,2,1,3,0,0,2,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,0,0,0,0,2,0,0,0,0,2,0,3,0,0,1,0,0,0,0,0,0,0, + 0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 1,0,2,0,0,2,0,0,0,0,1,0,0,3,0,0,0,0,0,1,0,1,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,1,2,0,2,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,1,2,0,0,2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,3, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,2,2,2,1,1,1,2,2,0,0,1,0,0,2,0,1,0,2,1,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0, + 0,0,0,1,2,0,1,3,2,1,0,1,1,0,2,2,0,1,1,0,1,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0, + 0,0,0,2,2,0,1,1,2,1,0,2,0,0,1,1,0,1,0,1,0,0,0,0, + 0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,2,0,0,0,0, + 0,0,0,0,0,0,3,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0, + 2,2,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,1,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0, + 0,0,0,0,0,2,1,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0, }; @@ -272,8 +280,8 @@ const SequenceModel Iso_8859_2SlovakModel = { Iso_8859_2_CharToOrderMap, SlovakLangModel, - 46, - (float)0.9724967373205526, + 49, + (float)0.9990023779906511, PR_TRUE, "ISO-8859-2", "sk" @@ -283,8 +291,8 @@ const SequenceModel Windows_1250SlovakModel = { Windows_1250_CharToOrderMap, SlovakLangModel, - 46, - (float)0.9724967373205526, + 49, + (float)0.9990023779906511, PR_TRUE, "WINDOWS-1250", "sk" @@ -294,8 +302,8 @@ const SequenceModel Ibm852SlovakModel = { Ibm852_CharToOrderMap, SlovakLangModel, - 46, - (float)0.9724967373205526, + 49, + (float)0.9990023779906511, PR_TRUE, "IBM852", "sk" @@ -305,8 +313,8 @@ const SequenceModel Mac_CentraleuropeSlovakModel = { Mac_Centraleurope_CharToOrderMap, SlovakLangModel, - 46, - (float)0.9724967373205526, + 49, + (float)0.9990023779906511, PR_TRUE, "MAC-CENTRALEUROPE", "sk" @@ -316,8 +324,8 @@ const LanguageModel SlovakModel = { "sk", Unicode_CharOrder, - 92, + 98, SlovakLangModel, - 46, - (float)0.9724967373205526, + 49, + (float)0.9998118217433309, }; diff --git a/src/LangModels/LangSloveneModel.cpp b/src/LangModels/LangSloveneModel.cpp index ccb4f7f..639bdee 100644 --- a/src/LangModels/LangSloveneModel.cpp +++ b/src/LangModels/LangSloveneModel.cpp @@ -42,7 +42,7 @@ /** * Generated by BuildLangModel.py - * On: 2021-03-16 20:20:05.416974 + * On: 2021-03-21 12:38:56.631590 **/ /* Character Mapping Table: @@ -68,18 +68,18 @@ static const unsigned char Iso_8859_2_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 18, 19, 13, 1, 24, 17, 20, 2, 9, 12, 8, 14, 4, 3, /* 4X */ - 11, 28, 5, 6, 7, 16, 10, 25, 27, 26, 15,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 18, 19, 13, 1, 24, 17, 20, 2, 9, 12, 8, 14, 4, 3, /* 6X */ - 11, 28, 5, 6, 7, 16, 10, 25, 27, 26, 15,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 1, 17, 21, 12, 0, 24, 18, 19, 2, 9, 11, 7, 14, 4, 3, /* 4X */ + 13, 32, 5, 6, 8, 15, 10, 25, 27, 26, 16,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 1, 17, 21, 12, 0, 24, 18, 19, 2, 9, 11, 7, 14, 4, 3, /* 6X */ + 13, 32, 5, 6, 8, 15, 10, 25, 27, 26, 16,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM, 41,SYM, 42,SYM, 43, 44,SYM,SYM, 22, 45, 46, 47,SYM, 23, 48, /* AX */ - SYM, 49,SYM, 50,SYM, 51, 52,SYM,SYM, 22, 53, 54, 55,SYM, 23, 56, /* BX */ - 57, 33, 58, 59, 60, 61, 62, 31, 21, 29, 63, 37, 64, 30, 65, 66, /* CX */ - 67, 68, 69, 32, 36, 70, 71,SYM, 72, 73, 39, 74, 75, 40, 76, 77, /* DX */ - 78, 33, 79, 80, 81, 82, 83, 31, 21, 29, 84, 37, 85, 30, 86, 87, /* EX */ - 88, 89, 90, 32, 36, 91, 92,SYM, 93, 94, 39, 95, 96, 40, 97,SYM, /* FX */ + SYM, 57,SYM, 48,SYM, 58, 59,SYM,SYM, 22, 60, 61, 62,SYM, 23, 63, /* AX */ + SYM, 64,SYM, 48,SYM, 65, 66,SYM,SYM, 22, 67, 68, 69,SYM, 23, 70, /* BX */ + 71, 34, 72, 73, 33, 74, 30, 45, 20, 29, 75, 46, 76, 37, 51, 77, /* CX */ + 35, 53, 78, 36, 49, 79, 31,SYM, 41, 80, 81, 82, 28, 43, 83, 44, /* DX */ + 84, 34, 85, 86, 33, 87, 30, 45, 20, 29, 88, 46, 89, 37, 51, 90, /* EX */ + 35, 53, 91, 36, 49, 92, 31,SYM, 41, 93, 94, 95, 28, 43, 96,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -89,18 +89,18 @@ static const unsigned char Iso_8859_16_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 18, 19, 13, 1, 24, 17, 20, 2, 9, 12, 8, 14, 4, 3, /* 4X */ - 11, 28, 5, 6, 7, 16, 10, 25, 27, 26, 15,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 18, 19, 13, 1, 24, 17, 20, 2, 9, 12, 8, 14, 4, 3, /* 6X */ - 11, 28, 5, 6, 7, 16, 10, 25, 27, 26, 15,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 1, 17, 21, 12, 0, 24, 18, 19, 2, 9, 11, 7, 14, 4, 3, /* 4X */ + 13, 32, 5, 6, 8, 15, 10, 25, 27, 26, 16,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 1, 17, 21, 12, 0, 24, 18, 19, 2, 9, 11, 7, 14, 4, 3, /* 6X */ + 13, 32, 5, 6, 8, 15, 10, 25, 27, 26, 16,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ - SYM, 98, 99,100,SYM,SYM, 22,SYM, 22,SYM,101,SYM,102,SYM,103,104, /* AX */ - SYM,SYM, 21,105, 23,SYM,SYM,SYM, 23, 21,106,SYM,107,108,109,110, /* BX */ - 111, 33,112,113,114,115,116, 31, 35, 29, 34, 37,117, 30,118,119, /* CX */ - 120,121,122, 32, 36,123,124,125,126,127, 39,128,129,130,131,132, /* DX */ - 133, 33,134,135,136,137,138, 31, 35, 29, 34, 37,139, 30,140,141, /* EX */ - 142,143,144, 32, 36,145,146,147,148,149, 39,150,151,152,153,154, /* FX */ + SYM, 97, 98, 48,SYM,SYM, 22,SYM, 22,SYM, 99,SYM,100,SYM,101,102, /* AX */ + SYM,SYM, 20, 48, 23,SYM,SYM,SYM, 23, 20,103,SYM, 52, 52,104,105, /* BX */ + 39, 34,106,107, 33, 30, 47, 45, 38, 29,108, 46, 50, 37, 51,109, /* CX */ + 35, 53, 56, 36, 49,110, 31,111,112, 54,113, 55, 28,114,115, 44, /* DX */ + 39, 34,116,117, 33, 30, 47, 45, 38, 29,118, 46, 50, 37, 51,119, /* EX */ + 35, 53, 56, 36, 49,120, 31,121,122, 54,123, 55, 28,124,125,126, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -110,18 +110,18 @@ static const unsigned char Windows_1250_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 18, 19, 13, 1, 24, 17, 20, 2, 9, 12, 8, 14, 4, 3, /* 4X */ - 11, 28, 5, 6, 7, 16, 10, 25, 27, 26, 15,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 18, 19, 13, 1, 24, 17, 20, 2, 9, 12, 8, 14, 4, 3, /* 6X */ - 11, 28, 5, 6, 7, 16, 10, 25, 27, 26, 15,SYM,SYM,SYM,SYM,CTR, /* 7X */ - SYM,ILL,SYM,ILL,SYM,SYM,SYM,SYM,ILL,SYM, 22,SYM,155,156, 23,157, /* 8X */ - ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM, 22,SYM,158,159, 23,160, /* 9X */ - SYM,SYM,SYM,161,SYM,162,SYM,SYM,SYM,SYM,163,SYM,SYM,SYM,SYM,164, /* AX */ - SYM,SYM,SYM,165,SYM,SYM,SYM,SYM,SYM,166,167,SYM,168,SYM,169,170, /* BX */ - 171, 33,172,173,174,175,176, 31, 21, 29,177, 37,178, 30,179,180, /* CX */ - 181,182,183, 32, 36,184,185,SYM,186,187, 39,188,189, 40,190,191, /* DX */ - 192, 33,193,194,195,196,197, 31, 21, 29,198, 37,199, 30,200,201, /* EX */ - 202,203,204, 32, 36,205,206,SYM,207,208, 39,209,210, 40,211,SYM, /* FX */ + SYM, 1, 17, 21, 12, 0, 24, 18, 19, 2, 9, 11, 7, 14, 4, 3, /* 4X */ + 13, 32, 5, 6, 8, 15, 10, 25, 27, 26, 16,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 1, 17, 21, 12, 0, 24, 18, 19, 2, 9, 11, 7, 14, 4, 3, /* 6X */ + 13, 32, 5, 6, 8, 15, 10, 25, 27, 26, 16,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM,ILL,SYM,ILL,SYM,SYM,SYM,SYM,ILL,SYM, 22,SYM,127,128, 23,129, /* 8X */ + ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM, 22,SYM,130,131, 23,132, /* 9X */ + SYM,SYM,SYM, 48,SYM,133,SYM,SYM,SYM,SYM,134,SYM,SYM,SYM,SYM,135, /* AX */ + SYM,SYM,SYM, 48,SYM,SYM,SYM,SYM,SYM,136,137,SYM,138,SYM,139,140, /* BX */ + 141, 34,142,143, 33,144, 30, 45, 20, 29,145, 46,146, 37, 51,147, /* CX */ + 35, 53,148, 36, 49,149, 31,SYM, 41,150,151,152, 28, 43,153, 44, /* DX */ + 154, 34,155,156, 33,157, 30, 45, 20, 29,158, 46,159, 37, 51,160, /* EX */ + 35, 53,161, 36, 49,162, 31,SYM, 41,163,164,165, 28, 43,166,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -131,18 +131,18 @@ static const unsigned char Ibm852_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 18, 19, 13, 1, 24, 17, 20, 2, 9, 12, 8, 14, 4, 3, /* 4X */ - 11, 28, 5, 6, 7, 16, 10, 25, 27, 26, 15,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 18, 19, 13, 1, 24, 17, 20, 2, 9, 12, 8, 14, 4, 3, /* 6X */ - 11, 28, 5, 6, 7, 16, 10, 25, 27, 26, 15,SYM,SYM,SYM,SYM,CTR, /* 7X */ - 31,212, 29,213,214,215,216, 31,217, 37,218,219,220,221,222,223, /* 8X */ - 29,224,225, 36,226,227,228,229,230,231,232,233,234,235,SYM, 21, /* 9X */ - 33, 30, 32, 39,236,237, 23, 23,238,239,SYM,240, 21,241,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM, 33,242,243,244,SYM,SYM,SYM,SYM,245,246,SYM, /* BX */ - SYM,SYM,SYM,SYM,SYM,SYM,247,248,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* CX */ - 249,249,249, 37,249,249, 30,249,249,SYM,SYM,SYM,SYM,249,249,SYM, /* DX */ - 32,249, 36,249,249,249, 22, 22,249, 39,249,249, 40, 40,249,SYM, /* EX */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,249,249,249,SYM,SYM, /* FX */ + SYM, 1, 17, 21, 12, 0, 24, 18, 19, 2, 9, 11, 7, 14, 4, 3, /* 4X */ + 13, 32, 5, 6, 8, 15, 10, 25, 27, 26, 16,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 1, 17, 21, 12, 0, 24, 18, 19, 2, 9, 11, 7, 14, 4, 3, /* 6X */ + 13, 32, 5, 6, 8, 15, 10, 25, 27, 26, 16,SYM,SYM,SYM,SYM,CTR, /* 7X */ + 45, 28, 29,167, 33,168, 30, 45, 48, 46,169,170, 51,171, 33, 30, /* 8X */ + 29,172,173, 49, 31,174,175,176,177, 31, 28,178,179, 48,SYM, 20, /* 9X */ + 34, 37, 36,180,181,182, 23, 23,183,184,SYM,185, 20,186,SYM,SYM, /* AX */ + SYM,SYM,SYM,SYM,SYM, 34,187,188,189,SYM,SYM,SYM,SYM,190,191,SYM, /* BX */ + SYM,SYM,SYM,SYM,SYM,SYM,192,193,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* CX */ + 35, 35,194, 46,195,196, 37, 51,197,SYM,SYM,SYM,SYM,198,199,SYM, /* DX */ + 36, 44, 49, 53, 53,200, 22, 22,201,202,203,204, 43, 43,205,SYM, /* EX */ + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,206, 41, 41,SYM,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -152,73 +152,69 @@ static const unsigned char Mac_Centraleurope_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 0, 18, 19, 13, 1, 24, 17, 20, 2, 9, 12, 8, 14, 4, 3, /* 4X */ - 11, 28, 5, 6, 7, 16, 10, 25, 27, 26, 15,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 0, 18, 19, 13, 1, 24, 17, 20, 2, 9, 12, 8, 14, 4, 3, /* 6X */ - 11, 28, 5, 6, 7, 16, 10, 25, 27, 26, 15,SYM,SYM,SYM,SYM,CTR, /* 7X */ - 249,249,249, 29,249,249,249, 33,249, 21,249, 21,249,249, 29,249, /* 8X */ - 249,249, 30,249, 38, 38,249, 32,249, 36,249,249, 39,249,249,249, /* 9X */ - SYM,SYM,249,SYM,SYM,SYM,SYM,249,SYM,SYM,SYM,249,SYM,SYM,249,249, /* AX */ - 249,249,SYM,SYM,249,249,SYM,SYM,249,249,249,249,249,249,249,249, /* BX */ - 249,249,SYM,SYM,249,249,SYM,SYM,SYM,SYM,SYM,249,249,249,249,249, /* CX */ - SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,249,249,249,249,SYM,SYM,249,249, /* DX */ - 249, 22,SYM,SYM, 22,249,249, 33,249,249, 30, 23, 23,249, 32, 36, /* EX */ - 249,249, 39,249,249,249,249,249, 40, 40,249,249,249,249,249,SYM, /* FX */ + SYM, 1, 17, 21, 12, 0, 24, 18, 19, 2, 9, 11, 7, 14, 4, 3, /* 4X */ + 13, 32, 5, 6, 8, 15, 10, 25, 27, 26, 16,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 1, 17, 21, 12, 0, 24, 18, 19, 2, 9, 11, 7, 14, 4, 3, /* 6X */ + 13, 32, 5, 6, 8, 15, 10, 25, 27, 26, 16,SYM,SYM,SYM,SYM,CTR, /* 7X */ + 33, 42, 42, 29,207, 31, 28, 34,208, 20, 33, 20, 30, 30, 29,209, /* 8X */ + 210,211, 37,212, 40, 40,213, 36,214, 49, 31,215,216,217,218, 28, /* 9X */ + SYM,SYM,219,SYM,SYM,SYM,SYM, 44,SYM,SYM,SYM,220,SYM,SYM,221,222, /* AX */ + 223,224,SYM,SYM,225,226,SYM,SYM, 48,227,228,229,230,231,232,233, /* BX */ + 234, 53,SYM,SYM, 53,235,SYM,SYM,SYM,SYM,SYM,236,237,238,239,240, /* CX */ + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,241,242,243, 41,SYM,SYM, 41,244, /* DX */ + 245, 22,SYM,SYM, 22,246,247, 34,248,249, 37, 23, 23,249, 36, 49, /* EX */ + 249,249,249,249,249,249,249,249, 43, 43,249,249, 48,249,249,SYM, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ -static const int Unicode_Char_size = 58; +static const int Unicode_Char_size = 50; static const unsigned int Unicode_CharOrder[] = { - 65, 0, 66, 18, 67, 19, 68, 13, 69, 1, 70, 24, 71, 17, 72, 20, - 73, 2, 74, 9, 75, 12, 76, 8, 77, 14, 78, 4, 79, 3, 80, 11, - 81, 28, 82, 5, 83, 6, 84, 7, 85, 16, 86, 10, 87, 25, 88, 27, - 89, 26, 90, 15, 97, 0, 98, 18, 99, 19, 100, 13, 101, 1,102, 24, - 103, 17, 104, 20, 105, 2, 106, 9, 107, 12, 108, 8, 109, 14,110, 4, - 111, 3, 112, 11, 113, 28, 114, 5, 115, 6, 116, 7, 117, 16,118, 10, - 119, 25, 120, 27, 121, 26, 122, 15, 268, 21, 269, 21, 352, 22,353, 22, + 65, 1, 66, 17, 67, 21, 68, 12, 69, 0, 70, 24, 71, 18, 72, 19, + 73, 2, 74, 9, 75, 11, 76, 7, 77, 14, 78, 4, 79, 3, 80, 13, + 82, 5, 83, 6, 84, 8, 85, 15, 86, 10, 90, 16, 97, 1, 98, 17, + 99, 21, 100, 12, 101, 0, 102, 24, 103, 18, 104, 19, 105, 2,106, 9, + 107, 11, 108, 7, 109, 14, 110, 4, 111, 3, 112, 13, 114, 5,115, 6, + 116, 8, 117, 15, 118, 10, 122, 16, 268, 20, 269, 20, 352, 22,353, 22, 381, 23, 382, 23, }; /* Model Table: - * Total sequences: 698 - * First 512 sequences: 0.998296272473889 - * Next 512 sequences (512-1024): 0.00170372752611106 - * Rest: -2.8189256484623115e-17 + * Total considered sequences: 880 / 625 + * - Positive sequences: first 449 (0.9950499684040537) + * - Probable sequences: next 172 (621-449) (0.003957684836286113) + * - Neutral sequences: last 4 (0.000992346759660201) + * - Negative sequences: -255 (off-ratio) * Negative sequences: TODO */ static const PRUint8 SloveneLangModel[] = { - 2,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,2,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,0,2, - 3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,2,3,2,3,3,3,2,2,0,3,3,3,2,2, - 3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,2,3,3,3,0,2,0,3,3,3,3,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,0,3,3,2,3,0,0, - 3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,2,3,3,3,2,3,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,3,2,3,3,3,3,2,2,0,2,2,0, - 3,3,3,3,3,3,3,3,3,0,0,3,3,3,3,0,3,3,2,3,3,2,2,0,0,0,3,2,0, - 3,3,3,3,3,3,3,3,3,3,3,2,0,2,3,0,3,2,2,3,3,0,3,0,0,2,3,0,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,3,2,0, - 3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,0,3,2,3,3,3,2,2,0,2,3,3,2,0, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,2,0,0,2,2,0, - 3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,0,2,3,0, - 3,3,3,3,3,3,3,2,3,0,3,3,0,0,2,2,3,3,3,2,3,0,0,0,0,0,2,2,0, - 3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,0,3,0,3,3,0,3,2,2,2,0,3,0,0, - 3,3,3,3,2,3,3,3,3,0,2,3,3,3,2,2,3,2,3,3,3,0,0,0,2,2,2,2,2, - 3,3,3,3,3,3,2,3,3,0,3,2,3,3,2,3,3,0,2,0,0,2,0,0,2,2,2,0,0, - 3,3,3,3,3,3,0,2,3,3,2,2,3,2,0,0,3,0,2,2,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,2,0,3,3,3,2,3,3,0,0,0,3,0,0,0,0,3,0,0,0,0,0,0,0, - 3,3,3,2,3,2,0,0,2,3,2,0,3,0,0,0,2,2,3,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,2,3,3,3,3,2,0,0,0,0,2,0,3,2,0,0,0,0,0,0,3,2,0,0,0, - 3,3,3,3,3,0,3,2,2,0,0,0,0,2,0,0,2,2,2,0,2,0,0,0,0,3,2,0,0, - 2,3,2,3,3,2,3,2,3,3,0,2,2,2,3,0,2,2,2,3,2,0,0,0,0,2,0,0,0, - 3,2,3,3,0,2,2,3,0,0,2,3,0,2,2,0,3,2,3,3,0,0,0,0,0,2,2,3,0, - 0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 3,3,3,3,3,2,3,2,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,3, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2, + 3,3,3,3,3,3,3,3,3,2,3,3,1,3,3,3,2,3,2,3,2,3,1,1,2, + 3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,3, + 3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,2,3,0,3,0,0,1, + 3,3,3,3,3,3,3,2,3,1,3,3,3,3,3,3,2,3,1,3,2,3,3,0,1, + 3,3,3,3,3,3,3,3,3,3,0,3,3,3,2,3,3,3,3,2,3,3,3,2,0, + 3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,0,1,1,2,0,3,2,0,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,2, + 3,3,3,3,3,3,3,3,3,2,1,2,1,2,1,3,2,1,1,3,2,2,2,0,2, + 3,3,3,3,3,3,3,3,2,1,3,3,2,3,3,3,1,3,1,1,3,3,3,1,2, + 3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,1,3,3,3,3,3,3,3,3,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,2,3,1,1, + 3,3,3,3,3,3,3,3,3,3,3,2,3,1,3,3,3,2,2,1,3,2,2,0,0, + 3,3,3,3,3,3,2,3,2,1,2,2,2,1,2,3,0,2,2,3,0,1,0,0,0, + 3,3,3,3,3,3,2,3,3,0,1,3,2,1,3,3,0,2,2,0,2,0,0,0,2, + 3,3,3,3,3,3,1,3,1,3,3,3,0,0,1,3,0,2,0,0,0,1,0,0,0, + 3,3,3,3,2,3,2,3,3,1,3,3,1,0,2,3,1,1,1,3,0,3,0,0,2, + 3,3,3,3,3,2,0,3,3,3,3,3,1,3,2,3,0,0,0,0,3,2,1,0,0, + 3,3,3,3,3,2,0,0,0,3,1,2,0,0,1,3,0,3,2,0,0,1,0,0,0, + 3,3,3,3,2,3,3,3,2,2,0,1,0,0,2,3,0,2,2,0,0,0,0,0,3, }; @@ -226,9 +222,9 @@ const SequenceModel Iso_8859_2SloveneModel = { Iso_8859_2_CharToOrderMap, SloveneLangModel, - 29, - (float)0.998296272473889, - PR_TRUE, + 25, + (float)0.9990076532403398, + PR_FALSE, "ISO-8859-2", "sl" }; @@ -237,9 +233,9 @@ const SequenceModel Iso_8859_16SloveneModel = { Iso_8859_16_CharToOrderMap, SloveneLangModel, - 29, - (float)0.998296272473889, - PR_TRUE, + 25, + (float)0.9990076532403398, + PR_FALSE, "ISO-8859-16", "sl" }; @@ -248,9 +244,9 @@ const SequenceModel Windows_1250SloveneModel = { Windows_1250_CharToOrderMap, SloveneLangModel, - 29, - (float)0.998296272473889, - PR_TRUE, + 25, + (float)0.9990076532403398, + PR_FALSE, "WINDOWS-1250", "sl" }; @@ -259,9 +255,9 @@ const SequenceModel Ibm852SloveneModel = { Ibm852_CharToOrderMap, SloveneLangModel, - 29, - (float)0.998296272473889, - PR_TRUE, + 25, + (float)0.9990076532403398, + PR_FALSE, "IBM852", "sl" }; @@ -270,9 +266,9 @@ const SequenceModel Mac_CentraleuropeSloveneModel = { Mac_Centraleurope_CharToOrderMap, SloveneLangModel, - 29, - (float)0.998296272473889, - PR_TRUE, + 25, + (float)0.9990076532403398, + PR_FALSE, "MAC-CENTRALEUROPE", "sl" }; @@ -281,8 +277,8 @@ const LanguageModel SloveneModel = { "sl", Unicode_CharOrder, - 58, + 50, SloveneLangModel, - 29, - (float)0.998296272473889, + 25, + (float)0.9976974938105708, };