diff --git a/script/BuildLangModelLogs/LangSlovakModel.log b/script/BuildLangModelLogs/LangSlovakModel.log new file mode 100644 index 0000000..2c4902e --- /dev/null +++ b/script/BuildLangModelLogs/LangSlovakModel.log @@ -0,0 +1,158 @@ += Logs of language model for Slovak (sk) = + +- Generated by BuildLangModel.py +- Started: 2016-09-21 13:26:28.712674 +- Maximum depth: 5 +- Max number of pages: 100 + +== Parsed pages == + +Dôkaz (matematika) (revision 6358810) +1825 (revision 6122752) +1839 (revision 6165808) +1847 (revision 5941780) +1852 (revision 5941777) +1878 (revision 6221358) +1955 (revision 6226609) +1976 (revision 6310709) +1983 (revision 6356952) +1993 (revision 6348358) +1995 (revision 6277350) +2012 (revision 6291145) +Adrien-Marie Legendre (revision 6060342) +Algebra (revision 6319238) +Algebraická geometria (revision 5964212) +Algebraická rovnica (revision 5288111) +Algebrické číslo (revision 6106622) +Algoritmus (revision 6286937) +Andrew Wiles (revision 5791970) +Arabi (revision 6044956) +Arabčina (revision 6322514) +Aristoteles (revision 6359959) +Arthur Cayley (revision 6332355) +Axióma (revision 6338092) +Babylonia (revision 6168813) +Bernard Bolzano (revision 6261374) +Boh (revision 6282272) +Bolzanova veta (revision 6345299) +Bytie (revision 5274918) +Byzantská ríša (revision 6359782) +Caroline Blundenová (revision 6358810) +Cauchyho postupnosť (revision 6215169) +Celé číslo (revision 6302805) +Charles Hermite (revision 5751036) +Daniel Marcus (revision 5657431) +David Hilbert (revision 5968866) +Dedukcia (revision 6338099) +Definícia (revision 6106684) +Derivácia (funkcia) (revision 5970574) +Desiatková číselná sústava (revision 5924486) +Diofantická rovnica (revision 6327292) +Dynastia Chan (revision 6342042) +Dôkaz (logika) (revision 5495754) +Dôkaz sporom (revision 5940134) +Dôkaz výpočtom (revision 6358810) +Energia (revision 6277761) +Eric Weisstein (revision 6054413) +Ernst Kummer (revision 6001344) +Európa (revision 6295124) +Experiment (revision 6354302) +Fenomén (filozofia) (revision 5420897) +Filozofia (revision 6296369) +Formula (logika) (revision 3916562) +Formálny dôkaz (revision 6358810) +Formálny jazyk (revision 5623029) +Gabriel Cramer (revision 5923903) +Galoisova teória (revision 6353573) +Gentzenovský kalkul (revision 6358810) +Geometria (revision 5970028) +Geometrický dôkaz (revision 6358810) +Georg Ferdinand Cantor (revision 6186696) +Giordano Bruno (revision 6312876) +Gottlob Frege (revision 5968855) +Gödelova veta o neúplnosti (revision 5323549) +Hardvér (revision 6214401) +Henri Poincaré (revision 6315506) +Hilbertovský kalkul (revision 6358810) +Hmotnosť (revision 5979540) +Hypotéza (revision 5983410) +Idea (revision 5960449) +India (revision 6362189) +Intuícia (revision 5837951) +Jazyk (lingvistika) (revision 6073293) +John Taylor (revision 6355518) +Kardinálne číslo (revision 6090126) +Kenneth Appel (revision 5968422) +Klasická mechanika (revision 6295646) +Konečná množina (revision 5276494) +Konfucianizmus (revision 5968816) +Kresťanstvo (revision 6289571) +Langlandsov program (revision 6088475) +Latinčina (revision 6121105) +Leonhard Euler (revision 6339382) +Lineárna algebra (revision 5473535) +Logická axióma (revision 5495754) +Logický kalkul (revision 1608550) + +== End of Parsed pages == + +- Wikipedia parsing ended at: 2016-09-21 13:33:10.330458 + +62 characters appeared 550293 times. + +First 45 characters: +[ 0] Char o: 8.867094438780795 % +[ 1] Char a: 8.59705647718579 % +[ 2] Char e: 8.562347694773512 % +[ 3] Char n: 6.0867574183207855 % +[ 4] Char i: 5.828531346028389 % +[ 5] Char t: 5.366595613609477 % +[ 6] Char r: 4.977711873492848 % +[ 7] Char k: 4.264273759615332 % +[ 8] Char s: 4.257731790155426 % +[ 9] Char v: 4.117079446767449 % +[10] Char l: 3.5979014815743615 % +[11] Char d: 3.416361829061972 % +[12] Char m: 3.2513588215732345 % +[13] Char p: 2.878466562358598 % +[14] Char u: 2.5987973679476206 % +[15] Char c: 2.419438371921867 % +[16] Char z: 2.127412124086623 % +[17] Char h: 2.0687161203213558 % +[18] Char j: 2.0312815173007834 % +[19] Char y: 1.6700194260148686 % +[20] Char b: 1.6574806512167153 % +[21] Char á: 1.6422160558102683 % +[22] Char ý: 1.2564215790497062 % +[23] Char í: 1.1326693234331529 % +[24] Char č: 0.9473135220691523 % +[25] Char é: 0.8913433389121795 % +[26] Char ž: 0.7668641978000811 % +[27] Char ú: 0.6949025337411161 % +[28] Char š: 0.6785476100913513 % +[29] Char f: 0.6514711253822963 % +[30] Char g: 0.6096752093884531 % +[31] Char ť: 0.46375294615777407 % +[32] Char ô: 0.4172322744428877 % +[33] Char ľ: 0.36053520579036985 % +[34] Char x: 0.23114958758334195 % +[35] Char ó: 0.2251527822450949 % +[36] Char ň: 0.09304134342977287 % +[37] Char w: 0.09013380144759246 % +[38] Char ä: 0.0694175648245571 % +[39] Char ď: 0.06560141597294532 % +[40] Char q: 0.01726353051919614 % +[41] Char ě: 0.009994675563745132 % +[42] Char ĺ: 0.009267790068200032 % +[43] Char ö: 0.008904347320427481 % +[44] Char ŕ: 0.00599680533824708 % + +The first 45 characters have an accumulated ratio of 0.9998128269848972. + +1181 sequences found. + +First 512 (typical positive ratio): 0.9733303573968434 +Next 512 (512-1024): 1.8172137388627513e-06 +Rest: 0.0003522983638913346 + +- Processing end: 2016-09-21 13:33:10.831531 diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 9493d17..6244812 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -22,6 +22,7 @@ set( LangModels/LangMalteseModel.cpp LangModels/LangPortugueseModel.cpp LangModels/LangRussianModel.cpp + LangModels/LangSlovakModel.cpp LangModels/LangSpanishModel.cpp LangModels/LangThaiModel.cpp LangModels/LangTurkishModel.cpp diff --git a/src/LangModels/LangSlovakModel.cpp b/src/LangModels/LangSlovakModel.cpp new file mode 100644 index 0000000..cfa94aa --- /dev/null +++ b/src/LangModels/LangSlovakModel.cpp @@ -0,0 +1,289 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is Mozilla Communicator client code. + * + * The Initial Developer of the Original Code is + * Netscape Communications Corporation. + * Portions created by the Initial Developer are Copyright (C) 1998 + * the Initial Developer. All Rights Reserved. + * + * Contributor(s): + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ + +#include "../nsSBCharSetProber.h" + +/********* Language model for: Slovak *********/ + +/** + * Generated by BuildLangModel.py + * On: 2016-09-21 13:33:10.331339 + **/ + +/* Character Mapping Table: + * ILL: illegal character. + * CTR: control character specific to the charset. + * RET: carriage/return. + * SYM: symbol (punctuation) that does not belong to word. + * NUM: 0 - 9. + * + * Other characters are ordered by probabilities + * (0 is the most common character in the language). + * + * Orders are generic to a language. So the codepoint with order X in + * CHARSET1 maps to the same character as the codepoint with the same + * order X in CHARSET2 for the same language. + * As such, it is possible to get missing order. For instance the + * ligature of 'o' and 'e' exists in ISO-8859-15 but not in ISO-8859-1 + * even though they are both used for French. Same for the euro sign. + */ +static const unsigned char Ibm852_CharToOrderMap[] = +{ + CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ + CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ + NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ + SYM, 1, 20, 15, 11, 2, 29, 30, 17, 4, 18, 7, 10, 12, 3, 0, /* 4X */ + 13, 40, 6, 8, 5, 14, 9, 37, 34, 19, 16,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 1, 20, 15, 11, 2, 29, 30, 17, 4, 18, 7, 10, 12, 3, 0, /* 6X */ + 13, 40, 6, 8, 5, 14, 9, 37, 34, 19, 16,SYM,SYM,SYM,SYM,CTR, /* 7X */ + 51, 46, 25, 62, 38, 48, 47, 51, 49, 54, 50, 50, 63, 64, 38, 47, /* 8X */ + 25, 42, 42, 32, 43, 33, 33, 65, 66, 43, 46, 31, 31, 49,SYM, 24, /* 9X */ + 21, 23, 35, 27, 67, 68, 26, 26, 69, 70,SYM, 71, 24, 59,SYM,SYM, /* AX */ + SYM,SYM,SYM,SYM,SYM, 21, 72, 41, 59,SYM,SYM,SYM,SYM, 61, 61,SYM, /* BX */ + SYM,SYM,SYM,SYM,SYM,SYM, 56, 56,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* CX */ + 55, 55, 39, 54, 39, 36, 23, 73, 41,SYM,SYM,SYM,SYM, 74, 48,SYM, /* DX */ + 35, 58, 32, 52, 52, 36, 28, 28, 44, 27, 44, 60, 22, 22, 75,SYM, /* EX */ + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 60, 45, 45,SYM,SYM, /* FX */ +}; +/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ + +static const unsigned char Iso_8859_2_CharToOrderMap[] = +{ + CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ + CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ + NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ + SYM, 1, 20, 15, 11, 2, 29, 30, 17, 4, 18, 7, 10, 12, 3, 0, /* 4X */ + 13, 40, 6, 8, 5, 14, 9, 37, 34, 19, 16,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 1, 20, 15, 11, 2, 29, 30, 17, 4, 18, 7, 10, 12, 3, 0, /* 6X */ + 13, 40, 6, 8, 5, 14, 9, 37, 34, 19, 16,SYM,SYM,SYM,SYM,CTR, /* 7X */ + CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ + CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ + SYM, 76,SYM, 49,SYM, 33, 77,SYM,SYM, 28, 59, 31, 78,SYM, 26, 61, /* AX */ + SYM, 79,SYM, 49,SYM, 33, 80,SYM,SYM, 28, 59, 31, 81,SYM, 26, 61, /* BX */ + 44, 21, 82, 56, 38, 42, 47, 51, 24, 25, 83, 54, 41, 23, 84, 39, /* CX */ + 55, 52, 36, 35, 32, 50, 43,SYM, 45, 48, 27, 60, 46, 22, 85, 58, /* DX */ + 44, 21, 86, 56, 38, 42, 47, 51, 24, 25, 87, 54, 41, 23, 88, 39, /* EX */ + 55, 52, 36, 35, 32, 50, 43,SYM, 45, 48, 27, 60, 46, 22, 89,SYM, /* FX */ +}; +/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ + +static const unsigned char Mac_Centraleurope_CharToOrderMap[] = +{ + CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ + CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ + NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ + SYM, 1, 20, 15, 11, 2, 29, 30, 17, 4, 18, 7, 10, 12, 3, 0, /* 4X */ + 13, 40, 6, 8, 5, 14, 9, 37, 34, 19, 16,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 1, 20, 15, 11, 2, 29, 30, 17, 4, 18, 7, 10, 12, 3, 0, /* 6X */ + 13, 40, 6, 8, 5, 14, 9, 37, 34, 19, 16,SYM,SYM,SYM,SYM,CTR, /* 7X */ + 38, 90, 91, 25, 92, 43, 46, 21, 93, 24, 38, 24, 47, 47, 25, 94, /* 8X */ + 95, 39, 23, 39, 96, 97, 98, 35, 99, 32, 43,100, 27, 41, 41, 46, /* 9X */ + SYM,SYM,101,SYM,SYM,SYM,SYM, 58,SYM,SYM,SYM,102,SYM,SYM,103,104, /* AX */ + 105, 57,SYM,SYM, 57,106,SYM,SYM, 49,107,108, 33, 33, 42, 42,109, /* BX */ + 110, 52,SYM,SYM, 52, 36,SYM,SYM,SYM,SYM,SYM, 36, 50,111, 50, 53, /* CX */ + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 53, 44, 44, 45,SYM,SYM, 45,112, /* DX */ + 113, 28,SYM,SYM, 28,114,115, 21, 31, 31, 23, 26, 26,116, 35, 32, /* EX */ + 117, 48, 27, 48, 60, 60,118,119, 22, 22,120, 61, 49, 61,121,SYM, /* FX */ +}; +/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ + +static const unsigned char Windows_1250_CharToOrderMap[] = +{ + CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ + CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ + NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ + SYM, 1, 20, 15, 11, 2, 29, 30, 17, 4, 18, 7, 10, 12, 3, 0, /* 4X */ + 13, 40, 6, 8, 5, 14, 9, 37, 34, 19, 16,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 1, 20, 15, 11, 2, 29, 30, 17, 4, 18, 7, 10, 12, 3, 0, /* 6X */ + 13, 40, 6, 8, 5, 14, 9, 37, 34, 19, 16,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM,ILL,SYM,ILL,SYM,SYM,SYM,SYM,ILL,SYM, 28,SYM,122, 31, 26,123, /* 8X */ + ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM, 28,SYM,124, 31, 26,125, /* 9X */ + SYM,SYM,SYM, 49,SYM,126,SYM,SYM,SYM,SYM, 59,SYM,SYM,SYM,SYM, 61, /* AX */ + SYM,SYM,SYM, 49,SYM,SYM,SYM,SYM,SYM,127, 59,SYM, 33,SYM, 33, 61, /* BX */ + 44, 21,128, 56, 38, 42, 47, 51, 24, 25,129, 54, 41, 23,130, 39, /* CX */ + 55, 52, 36, 35, 32, 50, 43,SYM, 45, 48, 27, 60, 46, 22,131, 58, /* DX */ + 44, 21,132, 56, 38, 42, 47, 51, 24, 25,133, 54, 41, 23,134, 39, /* EX */ + 55, 52, 36, 35, 32, 50, 43,SYM, 45, 48, 27, 60, 46, 22,135,SYM, /* FX */ +}; +/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ + + +/* Model Table: + * Total sequences: 1181 + * First 512 sequences: 0.9733303573968434 + * Next 512 sequences (512-1024): 0.026317344239265295 + * Rest: 0.0003522983638913346 + * Negative sequences: TODO + */ +static const PRUint8 SlovakLangModel[] = +{ + 2,2,2,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2, + 0,0,3,2,3,1,2,3,3,1,0,3,2,0,3,2,0,1,2,0,0,0,0, + 2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0, + 0,0,3,0,3,0,3,3,3,3,0,2,3,1,2,2,0,2,2,0,0,0,0, + 3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3, + 0,2,3,0,3,2,3,3,3,2,0,3,3,3,3,2,0,3,2,0,0,1,0, + 3,3,3,3,3,3,2,3,3,2,2,3,2,2,3,3,2,2,2,3,2,3, + 3,3,3,3,2,3,3,2,3,0,2,0,0,2,0,2,0,0,2,2,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3, + 0,3,3,2,3,0,3,3,3,3,0,2,2,3,2,2,0,0,2,0,0,0,0, + 3,3,3,3,3,2,3,3,3,3,3,1,3,2,3,2,3,3,2,3,2,3, + 3,3,2,3,0,3,2,2,2,1,0,2,0,3,2,2,2,2,1,2,1,1,2, + 3,3,3,3,3,3,2,3,3,3,2,3,3,2,3,3,2,2,2,3,3,3, + 3,3,3,3,2,2,2,2,3,2,3,0,2,3,2,2,2,0,2,0,0,1,0, + 3,3,3,2,3,3,3,2,2,3,3,3,3,1,3,3,2,2,2,3,2,3, + 3,2,2,3,2,3,2,2,2,0,3,2,0,2,2,2,0,0,0,0,0,2,1, + 3,3,3,3,3,3,2,3,3,3,3,2,3,3,3,3,2,2,2,3,2,3, + 2,3,2,2,1,3,0,2,2,3,2,2,0,2,2,2,0,0,2,0,0,0,0, + 3,3,3,3,3,2,3,2,3,0,3,3,2,3,3,2,3,2,0,3,2,3, + 3,3,2,3,2,2,3,1,2,0,2,0,0,0,2,0,3,2,0,2,2,1,2, + 3,3,3,3,3,3,2,3,3,2,3,2,2,2,3,2,2,2,2,3,3,3, + 3,3,2,3,2,3,2,2,3,0,1,0,0,3,2,0,0,0,0,0,0,0,0, + 3,3,3,3,3,2,3,3,3,3,3,2,3,3,3,3,3,2,2,3,2,3, + 3,3,2,2,2,2,2,2,2,0,3,3,2,2,2,2,0,0,0,2,2,0,0, + 3,3,3,3,3,3,2,2,3,1,2,2,3,3,3,2,0,0,2,3,3,3, + 2,3,0,2,2,2,0,0,2,0,3,0,1,2,1,0,3,0,2,0,0,2,2, + 3,3,3,3,3,3,3,2,2,0,3,2,2,2,3,2,1,2,0,2,2,3, + 2,3,1,2,0,2,2,0,1,2,3,1,0,2,2,0,2,0,0,2,2,0,0, + 2,2,2,3,2,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,3, + 0,3,3,1,3,1,2,2,3,2,0,2,2,0,1,2,0,2,2,0,0,0,0, + 3,3,3,3,3,2,2,3,2,2,2,2,2,0,3,2,2,3,0,2,0,2, + 1,3,0,2,0,3,0,1,2,2,0,0,0,2,2,0,0,0,2,0,0,0,0, + 3,3,3,3,3,2,3,3,3,3,3,3,3,2,3,2,2,3,2,3,2,3, + 3,2,1,2,1,2,2,0,2,2,0,2,0,2,2,2,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,2,2,2,3,2,3,1,3,2,0,2,1,3,2,3, + 3,2,2,2,0,2,2,1,0,0,0,2,0,1,2,2,1,0,0,0,2,1,2, + 3,3,3,3,3,3,2,2,3,3,2,2,3,2,3,2,2,2,2,0,2,2, + 0,3,2,0,0,3,3,0,2,1,0,2,0,2,0,0,1,0,0,0,0,0,0, + 2,2,2,3,2,3,3,3,3,3,3,2,3,3,2,3,3,3,2,0,3,0, + 0,0,2,0,2,2,3,1,2,3,0,1,0,1,2,1,0,0,0,0,0,0,0, + 3,3,3,3,3,1,3,2,3,2,3,3,2,0,3,2,2,2,3,3,2,3, + 2,2,2,2,1,2,2,0,2,0,1,0,0,1,2,0,1,0,0,0,0,1,0, + 0,0,0,3,0,3,3,3,3,3,3,3,3,3,2,3,3,2,3,0,3,0, + 0,0,2,0,2,0,3,0,1,0,0,2,0,0,2,0,0,2,0,0,0,0,0, + 0,0,0,2,0,2,3,2,2,3,2,2,3,2,0,3,3,2,2,0,2,0, + 0,0,1,0,2,0,2,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, + 0,2,0,3,0,3,3,3,3,3,3,2,3,3,0,3,2,2,2,0,2,0, + 0,0,2,0,2,0,3,0,1,2,0,2,0,0,1,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,2,3,2,0,3,0,2,0,2,2,0,0,0,0,0,2, + 0,3,0,1,1,1,3,0,0,0,0,0,0,2,2,0,0,0,0,0,0,0,0, + 1,0,0,2,0,3,3,2,2,2,2,2,3,2,0,3,3,3,0,0,1,0, + 0,0,2,0,0,0,2,2,2,0,0,0,0,0,0,1,0,0,0,0,0,0,0, + 3,3,3,3,3,0,1,2,3,1,1,3,1,0,3,0,0,0,2,0,2,0, + 0,3,0,1,0,0,2,0,0,0,0,0,0,0,2,0,0,2,0,0,0,0,0, + 1,0,0,3,0,3,3,2,3,3,3,3,2,3,0,3,3,2,0,0,2,0, + 0,0,3,0,2,0,2,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,0,2,2,2,3,1,2,3,2,0,0,2,0,0,2,1, + 0,3,2,1,0,1,2,0,0,2,0,2,0,0,1,0,0,0,0,0,0,0,0, + 3,3,3,2,3,2,3,0,2,1,2,0,2,0,3,0,0,2,0,3,0,2, + 0,2,0,2,0,1,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,2,3,2,3,0,2,2,3,2,2,0,3,2,0,2,0,2,0,2, + 0,0,0,2,0,2,0,0,2,0,0,0,0,2,0,1,0,0,0,0,0,2,0, + 3,3,0,0,1,0,2,1,0,0,0,2,0,1,2,0,0,0,0,0,0,1, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,2,3,3,3,3,3,2,1,0,0,2,3,0,0,0,2,0, + 0,0,0,0,3,0,1,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0, + 3,3,0,3,0,0,0,3,2,2,0,0,2,0,3,0,1,1,0,0,2,0, + 0,0,1,0,0,2,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 2,2,2,2,3,2,0,1,0,0,2,0,2,3,2,0,0,0,0,2,0,0, + 0,2,0,0,0,0,0,2,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0, + 0,0,0,3,0,2,3,1,2,1,2,3,3,2,0,2,2,0,0,0,3,0, + 0,0,0,0,1,0,0,1,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,0,0,0,0,0,0,0,0,0,1,2,0,2,0,0,2,0,0,0,2, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 2,2,2,1,3,2,0,1,2,0,2,0,2,1,0,0,0,2,0,1,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0, + 0,0,0,2,0,2,1,1,0,0,0,2,0,0,0,2,2,0,0,0,2,0, + 0,0,3,0,0,0,0,0,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0, + 2,3,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0, + 0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,2,0,0,1,0,0,0,0,2,0,0,0,0,0,0,2, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,2,0,2,2,2,0,0,1,0,1,0,0,1,2,0,2,0,0,0, + 0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,2,0, + 0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0, + 0,0,0,2,0,2,2,0,2,0,1,2,1,0,0,0,0,2,0,0,2,0, + 0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,2,0,0,0,0,0,0,1,0,0,0,0,2,0,0,0,0, + 0,0,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0, +}; + + +const SequenceModel Ibm852SlovakModel = +{ + Ibm852_CharToOrderMap, + SlovakLangModel, + 45, + (float)0.9733303573968434, + PR_TRUE, + "IBM852" +}; + +const SequenceModel Iso_8859_2SlovakModel = +{ + Iso_8859_2_CharToOrderMap, + SlovakLangModel, + 45, + (float)0.9733303573968434, + PR_TRUE, + "ISO-8859-2" +}; + +const SequenceModel Mac_CentraleuropeSlovakModel = +{ + Mac_Centraleurope_CharToOrderMap, + SlovakLangModel, + 45, + (float)0.9733303573968434, + PR_TRUE, + "MAC-CENTRALEUROPE" +}; + +const SequenceModel Windows_1250SlovakModel = +{ + Windows_1250_CharToOrderMap, + SlovakLangModel, + 45, + (float)0.9733303573968434, + PR_TRUE, + "WINDOWS-1250" +}; diff --git a/src/nsSBCSGroupProber.cpp b/src/nsSBCSGroupProber.cpp index ec3fe3c..b68134f 100644 --- a/src/nsSBCSGroupProber.cpp +++ b/src/nsSBCSGroupProber.cpp @@ -131,6 +131,11 @@ nsSBCSGroupProber::nsSBCSGroupProber() mProbers[48] = new nsSingleByteCharSetProber(&Mac_CentraleuropeCzechModel); mProbers[49] = new nsSingleByteCharSetProber(&Ibm852CzechModel); + mProbers[50] = new nsSingleByteCharSetProber(&Windows_1250SlovakModel); + mProbers[51] = new nsSingleByteCharSetProber(&Iso_8859_2SlovakModel); + mProbers[52] = new nsSingleByteCharSetProber(&Mac_CentraleuropeSlovakModel); + mProbers[53] = new nsSingleByteCharSetProber(&Ibm852SlovakModel); + Reset(); } diff --git a/src/nsSBCSGroupProber.h b/src/nsSBCSGroupProber.h index d61225f..1a1266f 100644 --- a/src/nsSBCSGroupProber.h +++ b/src/nsSBCSGroupProber.h @@ -40,7 +40,7 @@ #define nsSBCSGroupProber_h__ -#define NUM_OF_SBCS_PROBERS 50 +#define NUM_OF_SBCS_PROBERS 54 class nsCharSetProber; class nsSBCSGroupProber: public nsCharSetProber { diff --git a/src/nsSBCharSetProber.h b/src/nsSBCharSetProber.h index c3e8432..979b9a7 100644 --- a/src/nsSBCharSetProber.h +++ b/src/nsSBCharSetProber.h @@ -192,5 +192,10 @@ extern const SequenceModel Iso_8859_2CzechModel; extern const SequenceModel Ibm852CzechModel; extern const SequenceModel Mac_CentraleuropeCzechModel; +extern const SequenceModel Windows_1250SlovakModel; +extern const SequenceModel Iso_8859_2SlovakModel; +extern const SequenceModel Ibm852SlovakModel; +extern const SequenceModel Mac_CentraleuropeSlovakModel; + #endif /* nsSingleByteCharSetProber_h__ */ diff --git a/test/sk/ibm852.txt b/test/sk/ibm852.txt new file mode 100644 index 0000000..725a54f --- /dev/null +++ b/test/sk/ibm852.txt @@ -0,0 +1,3 @@ +Jupiter je piata planta v porad od Slnka, najvia a najhmotnejia planta +naej slnenej sstavy. Je pomenovan po rmskom bohovi Jupiterovi. Symbolom +planty je tylizovan znzornenie Jupiterovho boskho blesku. diff --git a/test/sk/iso-8859-2.txt b/test/sk/iso-8859-2.txt new file mode 100644 index 0000000..ee3ab14 --- /dev/null +++ b/test/sk/iso-8859-2.txt @@ -0,0 +1,3 @@ +Jupiter je piata planta v porad od Slnka, najvia a najhmotnejia planta +naej slnenej sstavy. Je pomenovan po rmskom bohovi Jupiterovi. Symbolom +planty je tylizovan znzornenie Jupiterovho boskho blesku. diff --git a/test/sk/mac-centraleurope.txt b/test/sk/mac-centraleurope.txt new file mode 100644 index 0000000..cddbba4 --- /dev/null +++ b/test/sk/mac-centraleurope.txt @@ -0,0 +1,3 @@ +Jupiter je piata planta v porad od Slnka, najvia a najhmotnejia planta +naej slnenej sstavy. Je pomenovan po rmskom bohovi Jupiterovi. Symbolom +planty je tylizovan znzornenie Jupiterovho boskho blesku. diff --git a/test/sk/utf-8.txt b/test/sk/utf-8.txt new file mode 100644 index 0000000..eba4382 --- /dev/null +++ b/test/sk/utf-8.txt @@ -0,0 +1,3 @@ +Jupiter je piata planéta v poradí od Slnka, najväčšia a najhmotnejšia planéta +našej slnečnej sústavy. Je pomenovaný po rímskom bohovi Jupiterovi. Symbolom +planéty je štylizované znázornenie Jupiterovho božského blesku. diff --git a/test/sk/windows-1250.txt b/test/sk/windows-1250.txt new file mode 100644 index 0000000..a60d048 --- /dev/null +++ b/test/sk/windows-1250.txt @@ -0,0 +1,3 @@ +Jupiter je piata planta v porad od Slnka, najvia a najhmotnejia planta +naej slnenej sstavy. Je pomenovan po rmskom bohovi Jupiterovi. Symbolom +planty je tylizovan znzornenie Jupiterovho boskho blesku.