From 210e52d99a2a2b7301362326ce3230b0036a3d33 Mon Sep 17 00:00:00 2001 From: Jehan Date: Wed, 25 May 2016 17:39:10 +0200 Subject: [PATCH] LangModels: update the Greek language models. I did this to improve the model after a user reported a Greek sutitle badly detected (see commit e0eec3b). It didn't help, but well... since I updated it with much more data from Wikipedia. Let's just commit it! --- script/BuildLangModelLogs/LangGreekModel.log | 355 +++++++++++++------ script/langs/el.py | 2 +- src/LangModels/LangGreekModel.cpp | 230 ++++++------ 3 files changed, 376 insertions(+), 211 deletions(-) diff --git a/script/BuildLangModelLogs/LangGreekModel.log b/script/BuildLangModelLogs/LangGreekModel.log index c7b5e6f..a61b2ec 100644 --- a/script/BuildLangModelLogs/LangGreekModel.log +++ b/script/BuildLangModelLogs/LangGreekModel.log @@ -1,117 +1,272 @@ = Logs of language model for Greek (el) = - Generated by BuildLangModel.py -- Started: 2015-12-13 17:52:58.225697 -- Maximum depth: 2 -- Max number of pages: 50 +- Started: 2016-05-25 15:16:42.898905 +- Maximum depth: 5 +- Max number of pages: 200 == Parsed pages == Πύλη:Κύρια (revision 5511929) -13 Δεκεμβρίου (revision 5540654) -1545 (revision 5340059) -1937 (revision 5573231) -1943 (revision 5503673) -2007 (revision 5561663) -2009 (revision 5561693) -2012 (revision 5559036) -Sukhoi Su-24 (revision 5582048) -Wiki (revision 5481490) -Wikimedia (revision 5563126) -Αναμνηστικά κέρματα ευρώ €2 (revision 5578001) -Αφρική (revision 5485484) -Γερμανία (revision 5579724) +14 Σεπτεμβρίου (revision 5808678) +16 Σεπτεμβρίου (revision 5810117) +1771 (revision 4940722) +1829 (revision 5863423) +1921 (revision 5819621) +1948 (revision 5785943) +1965 (revision 5846907) +1970 (revision 5816968) +1973 (revision 5423504) +25 Μαΐου (revision 5865973) +Eurovision (revision 5865484) +Scorpions (revision 5586116) +Wiki (revision 5859059) +Wikimedia (revision 5771416) +Αγία Πετρούπολη (revision 5782933) +Αγγλική γλώσσα (revision 5851128) +Αλεξάντρ Πούσκιν (revision 5790131) +Βέλος ΙΙ (Αντιτορπιλικό) (revision 5178914) +Βραζιλία (revision 5857981) +Γαλλική γλώσσα (revision 5851119) +Γαλλική εισβολή στην Ρωσία (revision 5858523) +Γενικές εκλογές στη Δομινικανή Δημοκρατία 2016 (revision 5848770) +Γηραιότερο πρόσωπο στον κόσμο (revision 5852034) +Διαγωνισμός Τραγουδιού Eurovision 2016 (revision 5863783) +Δικτατορία των Συνταγματαρχών (revision 5864405) +Δομινικανή Δημοκρατία (revision 5848627) Εγκυκλοπαίδεια (revision 5566281) -Ελεύθερο περιεχόμενο (revision 5285700) -Ελλάδα (revision 5580388) -Ελληνική γλώσσα (revision 5545135) -Ευρωζώνη (revision 5453082) -Ευρωπαϊκή Ένωση (revision 5562182) -Ευρωπαϊκή Επιτροπή (revision 5535040) -Ευρωπαϊκή Κεντρική Τράπεζα (revision 5352451) -Ευρώ (revision 5535228) -Ιαπωνία (revision 5540508) -Κέρματα ευρώ (revision 5421943) -Κίνα (revision 5538381) -Καθολική Εκκλησία (revision 5345868) -Καλάβρυτα (revision 5562415) -Κεντροαφρικανική Δημοκρατία (revision 5583804) -Κλίμα (revision 5331688) -Ναντσίνγκ (revision 5460512) -Οικουμενικές σύνοδοι (revision 5377374) -ΠΓΔΜ (revision 5577102) -Πάπας Φραγκίσκος (revision 5565143) -Παρίσι (revision 5524991) -Προτεσταντισμός (revision 5564242) -Πρωθυπουργός της Πορτογαλίας (revision 4986657) -Σφαγή της Ναντσίνγκ (revision 5026948) -Σφαγή των Καλαβρύτων (revision 5491100) -Σύνοδος των Ηνωμένων Εθνών για το κλίμα (2015) (revision 5521523) -Τουρκική κατάρριψη ρωσικού Sukhoi Su-24 (revision 5582048) -Χρυσά και ασημένια συλλεκτικά νομίσματα Ευρώ (revision 4458078) -10 Δεκεμβρίου (revision 5556215) -1124 (revision 5556117) -11 Δεκεμβρίου (revision 5537830) -1204 (revision 5234676) -1250 (revision 5445111) -1294 (revision 5563589) -12 Δεκεμβρίου (revision 5539079) +Ελεύθερο περιεχόμενο (revision 5824058) +Ελλάδα (revision 5863759) +Ελληνική γλώσσα (revision 5790854) +Ιππικό (revision 5376587) +Ιταλία (revision 5781867) +Κίεβο (revision 5794613) +Κατάληψη του Παρισιού (1814) (revision 5729368) +Κλάους Μάιν (revision 5668218) +Μάχη της Λειψίας (revision 5729316) +Μάχη της Σαλτάνοφκα (revision 5865460) +Μάχη του Μποροντίνο (revision 5670322) +Μαξ Βερστάπεν (revision 5864745) +Μπλουζ (revision 5846428) +Νίκος Καχτίτσης (revision 5723615) +Νικολάι Νικολάεβιτς Ραέφσκι (revision 5865460) +Ντίλμα Ρούσεφ (revision 5843412) +Ομοσπονδιακό Σοβιέτ της Ρωσικής Αυτοκρατορίας (revision 5865460) +Ουκρανία (revision 5847651) +Πάτρα (revision 5800331) +Ποδόσφαιρο (revision 5864952) +Πριμέρα Ντιβιζιόν (revision 5846965) +Ρωσική Αυτοκρατορία (revision 5858419) +Ρωσική γλώσσα (revision 5818960) +Ρώσοι (revision 5376764) +Σουζάνα Μούσατ Τζόουνς (revision 5848866) +Στοκχόλμη (revision 5670508) +Στρατηγός (revision 5464718) +Τζακ Στάινμπεργκερ (revision 5820361) +Τζαμάλα (revision 5863755) +ΦΚ Μπαρτσελόνα (revision 5862032) +Φόρμουλα Ένα (revision 5809160) +10 Σεπτεμβρίου (revision 5841838) +11 Σεπτεμβρίου (revision 5796866) +12 Σεπτεμβρίου (revision 5795991) +1321 (revision 5811404) +13 Σεπτεμβρίου (revision 5830505) +1435 (revision 5600729) +1498 (revision 5831868) +1523 (revision 5863396) +1527 (revision 5579042) +1580 (revision 5742938) +15 Σεπτεμβρίου (revision 5817369) +1712 (revision 5699806) +1741 (revision 5817896) +1752 (revision 5666171) +1760 (revision 5490201) +1769 (revision 5336004) +17 Σεπτεμβρίου (revision 5843911) +1812 (revision 5703237) +1814 (revision 5751122) +1851 (revision 5854460) +1878 (revision 5863501) +1889 (revision 5795061) +1890 (revision 5705460) +1898 (revision 5863504) +18 Σεπτεμβρίου (revision 5661544) +1901 (revision 5865687) +1902 (revision 5779111) +1905 (revision 5862599) +1910 (revision 5794858) +1916 (revision 5800363) +1917 (revision 5865701) +1925 (revision 5854774) +1927 (revision 5839595) +1928 (revision 5814308) +1933 (revision 5854834) +1936 (revision 5854290) +1937 (revision 5794891) +1943 (revision 5807315) +1944 (revision 5865804) +1950 (revision 5807377) +1956 (revision 5795994) +1960 (revision 5795065) +1963 (revision 5863751) +1966 (revision 5707508) +1969 (revision 5668647) +1980 (revision 5832053) +1981 (revision 5817635) +1982 (revision 5788879) +1983 (revision 5812702) +1984 (revision 5749754) +1989 (revision 5846909) +1994 (revision 5863999) +1999 (revision 5795003) +19 Σεπτεμβρίου (revision 5850863) +1 Σεπτεμβρίου (revision 5630491) +2000 (revision 5779037) +2001 (revision 5779042) +2005 (revision 5779066) +2006 (revision 5808681) +2009 (revision 5827105) +2011 (revision 5808660) +2016 (revision 5801621) +20 Σεπτεμβρίου (revision 5808561) +21 Σεπτεμβρίου (revision 5751207) +22 Σεπτεμβρίου (revision 5807133) +23 Σεπτεμβρίου (revision 5800012) +24 Σεπτεμβρίου (revision 5662618) +258 (revision 4952368) +25 Σεπτεμβρίου (revision 5817621) +26 Σεπτεμβρίου (revision 5817637) +27 Σεπτεμβρίου (revision 5817648) +28 Σεπτεμβρίου (revision 5817677) +29 Σεπτεμβρίου (revision 5703562) +2 Σεπτεμβρίου (revision 5701639) +30 Σεπτεμβρίου (revision 5838312) +326 (revision 5818811) +3 Σεπτεμβρίου (revision 5816313) +407 (revision 4952524) +4 Σεπτεμβρίου (revision 5816970) +5 Σεπτεμβρίου (revision 5817185) +628 (revision 5398024) +680 (revision 5365010) +685 (revision 5819296) +6 Σεπτεμβρίου (revision 5765157) +775 (revision 5373211) +786 (revision 5398031) +7 Σεπτεμβρίου (revision 5749649) +81 (revision 5397958) +891 (revision 4952139) +8 Σεπτεμβρίου (revision 5788878) +9 Σεπτεμβρίου (revision 5817240) +CIA (revision 5857678) +Miyavi (revision 4944860) +Άρμεν Κούπτσιος (revision 5766774) +Έιμι Γουάινχαουζ (revision 5809279) +Έρβιν Θάλμπεργκ (revision 5716376) +Ίων Δραγούμης (revision 5818568) +Αγία Ελένη (revision 5821916) +Αλεξάντερ φον Χούμπολτ (revision 5773636) +Αλμπέρτο Κόρντα (revision 5800055) +Απρίλιος (revision 5766829) +Αυτοκρατορία των Σασσανιδών (revision 5859880) +Αύγουστος (revision 5461793) +Β΄ Παγκόσμιος Πόλεμος (revision 5848530) +Βέρμαχτ (revision 5212228) +Βασίλης Λάσκος (revision 5695445) +Βενεζουέλα (revision 5847962) +Βρετανική Αυτοκρατορία (revision 5606306) +Βόρεια Ελλάδα (revision 5670938) +Γαλλία (revision 5776756) +Γεώργιος Καρατζαφέρης (revision 5803114) +Γιάννης Λάτσης (revision 5692530) +Γιάννος Κρανιδιώτης (revision 5574536) +Γιώργος Παπασιδέρης (μουσικός) (revision 5722203) +Γκέοργκ Φρήντριχ Χαίντελ (revision 5807098) +Γκρέις Κέλι (revision 5807168) +Γρηγοριανό Hμερολόγιο (revision 5793842) +Γρηγοριανό ημερολόγιο (revision 5793842) +Γρηγόρης Λαμπράκης (revision 5752808) +Δάντης Αλιγκέρι (revision 5648882) +Δήμος Βιάννου (revision 4816422) +Δεκέμβριος (revision 5461807) +Δομιτιανός (revision 5735554) +Δράμα (πόλη) (revision 5857326) +Ενιαία Δημοκρατική Αριστερά (revision 5742309) +Ετόρε Σότσας (revision 5785872) +Ζιλ Αντριαμαχαζό (revision 5819706) +Η.Π.Α. (revision 5845171) +Ηράκλειος (revision 5778827) +Θεσσαλονίκη (revision 5844955) +Θεόδωρος Ρούζβελτ (revision 5815087) +Ιανουάριος (revision 5615044) +Ιερουσαλήμ (revision 5824734) +Ιησούς Χριστός (revision 5859687) +Ιούλιος (revision 5712711) +Ιούνιος (revision 5461799) +Ιράκ (revision 5820378) +Ιράν (revision 5861249) +Ισιδώρα Ντάνκαν (revision 5044778) +Ιωάννης ο Χρυσόστομος (revision 5824898) == End of Parsed pages == -- Wikipedia parsing ended at: 2015-12-13 17:54:25.103854 +- Wikipedia parsing ended at: 2016-05-25 15:21:50.071087 -62 characters appeared 551775 times. +63 characters appeared 1875535 times. -First 41 characters: -[ 0] Char α: 9.098636219473518 % -[ 1] Char ο: 8.030447193149381 % -[ 2] Char τ: 7.717819763490554 % -[ 3] Char ι: 6.6942141271351545 % -[ 4] Char ε: 6.213764668569617 % -[ 5] Char ν: 5.920166734629151 % -[ 6] Char ρ: 4.645552988083911 % -[ 7] Char κ: 4.4978478546508995 % -[ 8] Char σ: 4.235422047030039 % -[ 9] Char η: 3.9173576185945356 % -[10] Char ς: 3.821666440125051 % -[11] Char π: 3.59023152553124 % -[12] Char μ: 3.2670925649041727 % -[13] Char υ: 3.258755833446604 % -[14] Char λ: 2.7634452448914866 % -[15] Char ί: 2.437406551583526 % -[16] Char ό: 2.070409134157945 % -[17] Char ά: 1.8300937882288977 % -[18] Char έ: 1.6805763218703276 % -[19] Char γ: 1.6767704227266547 % -[20] Char δ: 1.5888722758370715 % -[21] Char ω: 1.4756014679896698 % -[22] Char ή: 1.2963617416519415 % -[23] Char χ: 1.1928775316025553 % -[24] Char ύ: 0.9763037469983236 % -[25] Char θ: 0.8885868334012957 % -[26] Char ώ: 0.8104752843097277 % -[27] Char β: 0.7689728603144398 % -[28] Char φ: 0.6885052784196457 % -[29] Char ξ: 0.32549499343029314 % -[30] Char ζ: 0.3108150967332699 % -[31] Char i: 0.22273571655113045 % -[32] Char e: 0.2096869194871098 % -[33] Char a: 0.17742739341216981 % -[34] Char o: 0.14534910062978568 % -[35] Char n: 0.1428118345340039 % -[36] Char s: 0.12432603869330797 % -[37] Char r: 0.12305740564541706 % -[38] Char ϊ: 0.10819627565583799 % -[39] Char t: 0.10819627565583799 % -[40] Char ψ: 0.1040279099270536 % +First 46 characters: +[ 0] Char α: 9.004097497514042 % +[ 1] Char ο: 8.311015256980008 % +[ 2] Char τ: 7.94493304577094 % +[ 3] Char ι: 6.338831320129989 % +[ 4] Char ν: 5.836627948825269 % +[ 5] Char ε: 5.635565318695733 % +[ 6] Char ρ: 4.717907157157825 % +[ 7] Char σ: 4.307197679595422 % +[ 8] Char κ: 4.061294510632965 % +[ 9] Char ς: 3.766551943845356 % +[10] Char η: 3.7565281373048225 % +[11] Char π: 3.4156653968067783 % +[12] Char υ: 3.30956233821283 % +[13] Char μ: 3.1442761665338157 % +[14] Char λ: 3.0899983204792236 % +[15] Char ί: 2.429973314280992 % +[16] Char ό: 2.076100952528212 % +[17] Char ά: 1.922651403466211 % +[18] Char γ: 1.8994047031913561 % +[19] Char έ: 1.6641651582081913 % +[20] Char δ: 1.508582884350332 % +[21] Char ω: 1.2410325587099147 % +[22] Char ή: 1.2077087337746297 % +[23] Char χ: 1.0482342371643292 % +[24] Char ύ: 0.9225101104484854 % +[25] Char β: 0.8928652357860558 % +[26] Char θ: 0.8681256281541001 % +[27] Char φ: 0.806756472153279 % +[28] Char ώ: 0.6969211451665791 % +[29] Char ζ: 0.35515199663029484 % +[30] Char e: 0.35488540603081253 % +[31] Char ξ: 0.314736861748781 % +[32] Char a: 0.2909036621550651 % +[33] Char i: 0.2884510286398281 % +[34] Char o: 0.24137112877125727 % +[35] Char r: 0.23262695710823847 % +[36] Char n: 0.2206303801315358 % +[37] Char t: 0.21668483925919804 % +[38] Char s: 0.2013825388489151 % +[39] Char l: 0.14865091827131993 % +[40] Char d: 0.1359078876160669 % +[41] Char c: 0.12124540464454144 % +[42] Char h: 0.1166600463334462 % +[43] Char u: 0.10381037943840024 % +[44] Char m: 0.09074744006376848 % +[45] Char ψ: 0.08669526295163779 % -The first 41 characters have an accumulated ratio of 0.9915635902315255. +The first 46 characters have an accumulated ratio of 0.993456267145108. -1299 sequences found. +1579 sequences found. -First 512 (typical positive ratio): 0.9690985257709991 -Next 512 (512-1024): 0.008104752843097278 -Rest: 0.0010500394313971116 +First 512 (typical positive ratio): 0.958419074626211 +Next 512 (512-1024): 0.006969211451665791 +Rest: 0.0018920066107342773 -- Processing end: 2015-12-13 17:54:25.303820 +- Processing end: 2016-05-25 15:21:50.812982 diff --git a/script/langs/el.py b/script/langs/el.py index 2726229..4c8352b 100644 --- a/script/langs/el.py +++ b/script/langs/el.py @@ -49,7 +49,7 @@ charsets = ['ISO-8859-7', 'WINDOWS-1253'] ## Optional Properties ## -alphabet = 'αβγδεζηθικλμνξοπρσςτυφχψω' +alphabet = 'αβγδεζηθικλμνξοπρσςτυφχψωάέήίόύώ' start_pages = ['Πύλη:Κύρια'] wikipedia_code = code case_mapping = True diff --git a/src/LangModels/LangGreekModel.cpp b/src/LangModels/LangGreekModel.cpp index 5692e01..499affe 100644 --- a/src/LangModels/LangGreekModel.cpp +++ b/src/LangModels/LangGreekModel.cpp @@ -41,7 +41,7 @@ /** * Generated by BuildLangModel.py - * On: 2015-12-13 17:54:25.105295 + * On: 2016-05-25 15:21:50.073117 **/ /* Character Mapping Table: @@ -67,18 +67,18 @@ static const unsigned char Windows_1253_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 33, 51, 41, 43, 32, 53, 50, 48, 31, 56, 45, 42, 46, 35, 34, /* 4X */ - 47, 60, 37, 36, 39, 44, 54, 49, 57, 52, 59,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 33, 51, 41, 43, 32, 53, 50, 48, 31, 56, 45, 42, 46, 35, 34, /* 6X */ - 47, 60, 37, 36, 39, 44, 54, 49, 57, 52, 59,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 32, 46, 41, 40, 30, 52, 48, 42, 33, 56, 49, 39, 44, 36, 34, /* 4X */ + 47, 59, 35, 38, 37, 43, 54, 50, 58, 53, 57,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 32, 46, 41, 40, 30, 52, 48, 42, 33, 56, 49, 39, 44, 36, 34, /* 6X */ + 47, 59, 35, 38, 37, 43, 54, 50, 58, 53, 57,SYM,SYM,SYM,SYM,CTR, /* 7X */ SYM,ILL,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM,ILL,SYM,ILL,ILL,ILL,ILL, /* 8X */ ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM,ILL,SYM,ILL,ILL,ILL,ILL, /* 9X */ SYM,SYM, 17,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM,SYM,SYM,SYM,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM, 61,SYM,SYM, 18, 22, 15,SYM, 16,SYM, 24, 26, /* BX */ - 55, 0, 27, 19, 20, 4, 30, 9, 25, 3, 7, 14, 12, 5, 29, 1, /* CX */ - 11, 6,ILL, 8, 2, 13, 28, 23, 40, 21, 38, 58, 17, 18, 22, 15, /* DX */ - 62, 0, 27, 19, 20, 4, 30, 9, 25, 3, 7, 14, 12, 5, 29, 1, /* EX */ - 11, 6, 10, 8, 2, 13, 28, 23, 40, 21, 38, 58, 16, 24, 26,ILL, /* FX */ + SYM,SYM,SYM,SYM,SYM, 62,SYM,SYM, 19, 22, 15,SYM, 16,SYM, 24, 28, /* BX */ + 55, 0, 25, 18, 20, 5, 29, 10, 26, 3, 8, 14, 13, 4, 31, 1, /* CX */ + 11, 6,ILL, 7, 2, 12, 27, 23, 45, 21, 51, 60, 17, 19, 22, 15, /* DX */ + 61, 0, 25, 18, 20, 5, 29, 10, 26, 3, 8, 14, 13, 4, 31, 1, /* EX */ + 11, 6, 9, 7, 2, 12, 27, 23, 45, 21, 51, 60, 16, 24, 28,ILL, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ @@ -88,113 +88,123 @@ static const unsigned char Iso_8859_7_CharToOrderMap[] = CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ - SYM, 33, 51, 41, 43, 32, 53, 50, 48, 31, 56, 45, 42, 46, 35, 34, /* 4X */ - 47, 60, 37, 36, 39, 44, 54, 49, 57, 52, 59,SYM,SYM,SYM,SYM,SYM, /* 5X */ - SYM, 33, 51, 41, 43, 32, 53, 50, 48, 31, 56, 45, 42, 46, 35, 34, /* 6X */ - 47, 60, 37, 36, 39, 44, 54, 49, 57, 52, 59,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM, 32, 46, 41, 40, 30, 52, 48, 42, 33, 56, 49, 39, 44, 36, 34, /* 4X */ + 47, 59, 35, 38, 37, 43, 54, 50, 58, 53, 57,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 32, 46, 41, 40, 30, 52, 48, 42, 33, 56, 49, 39, 44, 36, 34, /* 6X */ + 47, 59, 35, 38, 37, 43, 54, 50, 58, 53, 57,SYM,SYM,SYM,SYM,CTR, /* 7X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM, /* AX */ - SYM,SYM,SYM,SYM,SYM,SYM, 17,SYM, 18, 22, 15,SYM, 16,SYM, 24, 26, /* BX */ - 55, 0, 27, 19, 20, 4, 30, 9, 25, 3, 7, 14, 12, 5, 29, 1, /* CX */ - 11, 6,ILL, 8, 2, 13, 28, 23, 40, 21, 38, 58, 17, 18, 22, 15, /* DX */ - 63, 0, 27, 19, 20, 4, 30, 9, 25, 3, 7, 14, 12, 5, 29, 1, /* EX */ - 11, 6, 10, 8, 2, 13, 28, 23, 40, 21, 38, 58, 16, 24, 26,ILL, /* FX */ + SYM,SYM,SYM,SYM,SYM,SYM, 17,SYM, 19, 22, 15,SYM, 16,SYM, 24, 28, /* BX */ + 55, 0, 25, 18, 20, 5, 29, 10, 26, 3, 8, 14, 13, 4, 31, 1, /* CX */ + 11, 6,ILL, 7, 2, 12, 27, 23, 45, 21, 51, 60, 17, 19, 22, 15, /* DX */ + 61, 0, 25, 18, 20, 5, 29, 10, 26, 3, 8, 14, 13, 4, 31, 1, /* EX */ + 11, 6, 9, 7, 2, 12, 27, 23, 45, 21, 51, 60, 16, 24, 28,ILL, /* FX */ }; /*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ /* Model Table: - * Total sequences: 1299 - * First 512 sequences: 0.9690985257709991 - * Next 512 sequences (512-1024): 0.029851434797603802 - * Rest: 0.0010500394313971116 + * Total sequences: 1579 + * First 512 sequences: 0.958419074626211 + * Next 512 sequences (512-1024): 0.03968891876305471 + * Rest: 0.0018920066107342773 * Negative sequences: TODO */ static const PRUint8 GreekLangModel[] = { - 1,2,3,3,3,3,3,3,3,1,3,3,3,3,3,3,2,2,2,3, - 3,0,2,3,3,3,1,3,3,3,3,0,0,0,0,0,0,0,3,0,2, - 2,2,3,3,3,3,3,3,3,2,3,3,3,3,3,3,2,2,2,3, - 3,2,2,3,3,3,2,3,3,2,2,0,0,0,0,0,0,0,2,0,2, - 3,3,2,3,3,2,3,1,3,3,2,0,3,3,2,3,3,3,3,2, - 0,3,3,1,3,1,3,2,1,0,3,0,0,0,1,0,0,0,0,0,0, - 3,3,3,1,3,3,3,3,3,2,3,3,3,1,3,1,3,3,3,3, - 3,3,2,3,0,3,3,3,3,2,3,0,0,0,0,0,0,0,0,0,2, - 2,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,2,2,1,3, - 3,3,1,3,3,3,2,3,3,3,3,0,1,0,0,0,1,0,2,0,2, - 3,3,3,3,3,3,2,2,3,3,2,1,2,2,2,3,3,3,3,3, - 3,3,3,2,2,3,3,1,1,0,2,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,3,3,3,3,3,1,2,3,3,2,3,3,3,3,3, - 3,3,3,3,3,3,3,3,3,2,2,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,2,3,3,2,3,2,2,2,3,3,3,3,3,3,2, - 3,3,3,2,3,2,3,2,2,0,0,0,0,0,1,0,0,0,0,0,0, - 3,3,3,3,3,2,2,3,3,3,0,3,3,3,3,3,3,3,3,1, - 2,3,3,3,3,3,3,2,3,0,0,0,0,0,0,0,0,0,0,0,0, - 2,1,3,0,2,3,3,3,3,1,3,3,3,0,3,0,0,0,1,3, - 2,0,0,2,1,3,0,1,3,2,0,0,0,0,0,0,0,0,0,0,2, - 0,1,1,0,0,0,0,0,1,2,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,2,3,1,1,3,1,2,1,2,3,3,3,3,3,3, - 1,3,3,2,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,0,3,3,3,2,1,2,3,2,3,3,2,2,3,3,3,3,0, - 0,3,3,0,2,0,3,3,3,0,1,0,0,0,0,0,0,0,0,0,2, - 3,2,3,2,2,3,3,3,3,2,3,3,3,0,3,2,2,3,2,3, - 3,0,2,3,0,3,1,3,2,3,3,0,0,0,0,0,0,0,1,0,3, - 3,3,3,3,3,2,0,2,2,3,2,2,3,3,3,3,3,3,3,3, - 2,3,3,2,3,2,3,3,2,1,1,0,0,0,0,0,0,0,0,0,1, - 3,3,3,0,3,3,3,3,3,2,3,3,3,0,3,0,0,0,0,3, - 3,3,0,3,0,3,0,2,2,2,3,0,0,0,0,0,0,0,0,0,2, - 2,2,3,2,3,3,3,3,3,2,3,3,3,0,3,0,0,0,0,3, - 3,1,0,3,0,2,0,2,3,2,2,0,0,0,0,0,0,0,0,0,2, - 2,2,3,3,2,3,3,3,3,2,3,3,3,1,3,0,0,0,0,3, - 3,1,0,3,0,3,0,3,3,3,3,0,0,0,0,0,0,0,1,0,2, - 3,3,3,2,2,3,3,3,3,1,3,3,3,0,3,0,0,0,0,3, - 3,3,0,3,0,2,0,2,3,2,2,0,0,0,0,0,0,0,0,0,2, - 3,3,0,3,3,3,3,3,0,3,0,0,3,2,3,3,3,3,3,3, - 3,3,3,3,2,0,2,1,0,1,0,0,0,0,0,0,0,0,0,0,0, - 3,3,0,3,3,1,3,1,0,3,0,0,3,3,0,3,3,3,3,0, - 1,3,3,0,3,0,2,0,1,0,0,0,0,0,0,0,0,0,0,0,0, - 2,1,3,2,1,3,3,2,3,0,3,3,3,0,2,1,0,2,1,3, - 2,0,2,2,0,3,0,2,2,2,2,0,0,0,0,0,0,0,0,0,0, - 0,0,3,0,1,3,3,3,3,0,3,2,3,1,3,0,0,0,0,3, - 2,0,0,2,0,3,0,1,2,2,1,0,0,0,0,0,0,0,0,0,2, - 3,3,2,3,3,3,3,0,1,3,1,0,2,3,2,3,2,3,3,0, - 0,3,3,0,2,3,3,0,1,0,0,0,0,0,0,0,0,0,0,0,0, - 2,3,3,2,3,3,3,2,3,2,3,3,3,0,3,0,0,0,0,3, - 2,2,0,2,0,3,0,2,2,2,2,0,0,0,0,0,0,0,0,0,2, - 3,3,0,3,3,3,3,0,0,3,0,0,3,3,2,2,3,3,3,0, - 0,2,3,0,2,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 2,2,3,2,0,3,3,2,3,2,3,3,3,0,2,0,0,0,0,2, - 2,2,0,2,0,3,0,2,2,2,2,0,0,0,0,0,0,0,0,0,0, - 3,3,0,3,3,1,3,1,2,3,0,0,1,2,3,3,3,3,3,2, - 2,2,2,0,2,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,3,3,3,1,3,1,2,3,0,1,1,3,2,3,2,3,3,2, - 0,3,3,0,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,2,3,3,0,0,0,0,3,0,1,0,2,0,2,2,3,3,0, - 0,3,2,0,3,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 3,3,0,3,3,2,1,0,0,2,0,0,2,2,1,2,2,2,2,0, - 0,3,2,1,1,0,3,2,0,0,1,0,0,0,0,0,0,0,0,0,0, - 0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,1,2,3,2,3,3,2,0,2,0, - 0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,3,3,3,0,2,0, - 0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, - 1,0,0,0,0,0,0,0,0,0,0,2,2,1,2,3,2,3,0,3,0, - 0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,2,1,1,2,3,2,3,0,2,0, - 0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,2,2,1,0,2,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,2,2,1,0,2,0, - 0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,2,3,2,3,2,2,2,0,2,0, - 2,0,2,0,1,2,2,3,2,0,1,1,2,0,2,0,2,1,0,2, - 1,0,0,1,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0, - 0,1,1,0,0,0,0,0,1,2,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,3,2,2,2,0,2,2,0,2,0, - 2,2,0,2,3,0,0,0,0,3,0,0,0,2,0,2,2,1,1,0, - 0,1,2,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 1,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,2,2,3,2,3,1,2, + 3,3,3,3,3,1,3,0,3,0,0,0,0,0,0,1,0,0,1,0,0,0,2, + 2,2,3,3,3,3,3,3,3,3,2,3,3,3,3,3,1,2,3,2,3,1,2, + 3,3,3,3,3,2,2,0,2,0,0,0,0,0,0,0,0,1,0,0,1,0,2, + 3,3,2,3,2,3,3,3,2,3,3,1,3,2,2,3,3,3,2,3,0,3,3, + 2,2,2,2,2,3,3,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,2,3,3,3,3,3,3,3,3,1,3,3,1,3,3,3,3,3,3,2, + 3,1,3,3,2,3,3,0,2,0,0,1,0,0,0,1,0,0,0,0,0,0,2, + 3,3,3,3,3,3,2,3,2,2,3,1,2,2,2,3,3,3,3,3,3,3,3, + 2,2,1,3,2,3,2,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0, + 2,3,3,3,3,2,3,3,3,3,2,3,3,3,3,3,2,2,3,1,3,3,1, + 3,3,3,3,3,2,2,0,3,0,0,0,0,0,0,1,0,0,0,0,0,0,2, + 3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,3,3,3,3,3,3, + 3,3,2,3,2,3,2,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,1, + 3,3,3,3,2,3,2,3,3,0,3,3,3,3,2,3,3,3,2,3,2,3,3, + 3,3,2,2,3,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,2,3,3,2,3,2,3,2,3,2,3,3,3,3,1,3,3,3,3, + 2,3,2,2,2,3,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0, + 1,1,0,1,1,1,0,1,1,0,2,1,0,1,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0, + 1,1,3,0,3,2,3,3,3,3,0,3,0,3,3,1,0,0,3,1,2,0,0, + 2,1,1,3,2,0,0,0,2,0,0,1,0,0,0,0,0,0,1,0,0,0,2, + 3,3,3,3,2,3,3,2,1,1,3,2,3,1,3,3,3,3,1,3,0,3,3, + 1,2,1,1,1,2,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0, + 3,2,3,2,3,2,3,3,3,3,2,3,0,3,3,2,2,3,3,2,3,1,2, + 3,0,3,3,2,1,3,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,2, + 3,3,1,3,2,3,1,2,1,2,3,3,2,3,1,3,3,3,1,3,1,3,3, + 1,2,3,0,3,2,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,2, + 3,3,3,3,2,3,1,2,2,2,3,2,3,3,3,3,3,3,2,3,2,3,3, + 2,3,2,2,2,3,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1, + 3,3,3,1,3,3,3,3,3,3,2,3,0,3,3,0,0,0,3,0,3,3,0, + 3,0,2,3,2,0,3,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2, + 2,2,3,2,3,3,3,3,3,3,2,3,1,3,3,0,0,0,3,0,3,1,0, + 3,1,2,2,3,0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2, + 2,2,3,3,3,2,3,3,3,3,2,3,1,3,3,0,0,0,3,0,3,1,0, + 3,0,3,3,3,0,3,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2, + 3,3,0,3,3,3,3,0,3,0,3,0,2,3,3,3,3,3,3,3,2,3,3, + 2,2,0,0,0,3,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,2,3,3,3,3,2,3,1,3,3,0,0,0,3,0,3,3,0, + 3,0,3,3,3,0,2,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,2, + 3,3,1,3,2,3,3,1,0,0,3,0,3,1,0,3,3,3,0,3,0,3,3, + 0,3,0,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 2,1,3,2,3,1,3,3,2,3,1,3,1,3,2,2,1,2,3,1,2,0,2, + 2,0,3,3,2,1,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,3,1,3,1,3,3,3,3,1,2,0,3,3,0,0,0,2,0,2,1,0, + 2,0,1,3,2,0,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2, + 3,3,3,3,3,3,3,1,0,1,3,1,2,2,2,3,2,3,0,3,0,3,3, + 0,2,1,3,1,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 2,3,3,2,3,3,3,3,2,3,2,3,0,3,3,0,0,0,3,0,2,1,0, + 2,0,2,3,2,0,2,0,2,0,0,0,0,0,1,0,0,0,0,0,0,0,2, + 3,3,1,3,2,3,3,1,1,1,2,1,2,0,3,3,3,3,2,3,2,2,2, + 0,2,2,0,0,2,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,0,3,3,3,3,1,1,0,3,0,3,3,3,2,2,3,1,3,0,2,3, + 0,2,0,0,1,3,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,2,3,1,3,3,3,2,0,3,1,3,1,2,3,3,3,2,3,0,3,3, + 0,2,0,2,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 1,1,3,2,3,0,3,3,2,3,2,3,0,3,2,0,0,0,1,0,2,1,0, + 1,0,2,2,1,0,2,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,1,3,1,3,1,1,1,0,2,0,2,2,1,2,2,2,1,2,0,3,2, + 0,2,1,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,1,0,0,1,0,1,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,2,1,2,2,2,3,3,2,3,2,2,2,2,2,2,0, + 3,3,1,3,1,3,0,0,1,0,3,1,2,1,1,2,2,3,1,2,0,2,2, + 0,3,0,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,1,0,1,1,0,0,0,1,0,0,1,1,1,1,0,0,0,1,0,0,0,0, + 0,0,0,1,1,0,0,2,0,2,2,1,3,3,3,2,3,2,2,2,2,2,0, + 0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, + 0,0,1,0,0,0,0,2,0,3,2,3,2,3,3,3,2,2,3,1,2,2,0, + 0,0,1,0,1,0,0,1,0,1,0,0,1,1,0,0,0,0,0,0,1,0,0, + 0,1,0,1,0,0,0,2,0,2,2,2,3,3,2,2,2,2,2,2,2,2,0, + 0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,1,0,0,0,3,0,3,3,3,2,2,2,2,2,2,2,1,2,2,0, + 0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0, + 0,0,0,0,0,0,0,3,0,3,2,2,1,2,2,2,2,3,2,1,2,1,0, + 1,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0, + 0,0,0,0,0,0,1,3,0,3,3,3,2,1,2,2,2,1,1,3,2,2,0, + 0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,2,0,2,2,2,1,1,3,2,2,1,2,2,2,2,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,3,0,3,3,2,1,1,2,2,2,2,1,1,2,2,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,3,0,2,2,2,2,1,1,2,2,1,2,1,2,1,0, + 0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,2,0,2,2,2,2,1,2,1,2,2,2,3,2,1,0, + 0,0,0,0,0,0,0,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,3,0,2,2,2,2,2,2,1,2,1,1,1,2,2,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,2,0,2,2,1,2,2,2,2,2,2,2,1,1,2,0, + 1,0,0,1,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0, + 0,0,0,0,0,0,0,3,0,2,2,2,1,1,1,2,2,1,1,1,2,2,0, + 2,2,0,2,0,3,0,0,0,0,3,0,2,0,0,2,1,1,0,1,0,1,2, + 0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, }; @@ -202,8 +212,8 @@ const SequenceModel Windows_1253GreekModel = { Windows_1253_CharToOrderMap, GreekLangModel, - 41, - (float)0.9690985257709991, + 46, + (float)0.958419074626211, PR_FALSE, "WINDOWS-1253" }; @@ -212,8 +222,8 @@ const SequenceModel Iso_8859_7GreekModel = { Iso_8859_7_CharToOrderMap, GreekLangModel, - 41, - (float)0.9690985257709991, + 46, + (float)0.958419074626211, PR_FALSE, "ISO-8859-7" -}; \ No newline at end of file +};