diff --git a/script/BuildLangModelLogs/LangHebrewModel.log b/script/BuildLangModelLogs/LangHebrewModel.log new file mode 100644 index 0000000..296d071 --- /dev/null +++ b/script/BuildLangModelLogs/LangHebrewModel.log @@ -0,0 +1,191 @@ += Logs of language model for Hebrew (he) = + +- Generated by BuildLangModel.py +- Started: 2021-03-17 23:11:33.477881 +- Maximum depth: 4 +- Max number of pages: 100 + +== Parsed pages == + +יהדות_בוקרשט (revision 30791735) +10 בנובמבר (revision 30632714) +13 בנובמבר (revision 30810192) +1522 (revision 24674491) +1533 (revision 24674506) +1550 (revision 24674532) +1594 (revision 25165542) +15 בנובמבר (revision 29890141) +1648 (revision 26810233) +1649 (revision 28942371) +1694 (revision 25165654) +1715 (revision 25165678) +1730 (revision 28089168) +1764 (revision 25165736) +1801 (revision 27881514) +1804 (revision 30643161) +1808 (revision 25165782) +1812 (revision 25165786) +1815 (revision 28059812) +1818 (revision 25165792) +1819 (revision 25165793) +1832 (revision 25165806) +1846 (revision 30789696) +1847 (revision 27881515) +1848 (revision 25165827) +1852 (revision 25165831) +1857 (revision 26643435) +1864 (revision 25165844) +1865 (revision 28700557) +1866 (revision 29383815) +1867 (revision 30295888) +1873 (revision 30716465) +1876 (revision 25165858) +1877 (revision 27881506) +1878 (revision 25165861) +1880 (revision 25165863) +1881 (revision 25165864) +1893 (revision 25165878) +1894 (revision 25165879) +1899 (revision 26643326) +18 במרץ (revision 30657076) +1912 (revision 27740363) +1913 (revision 25165902) +1918 (revision 25287021) +1919 (revision 25165908) +1920 (revision 25165910) +1921 (revision 30598446) +1931 (revision 25007812) +1938 (revision 25039793) +1940 (revision 24662839) +1941 (revision 27376428) +1942 (revision 30379738) +1945 (revision 29947601) +1948 (revision 30229979) +1949 (revision 30109414) +1976 (revision 24662876) +1977 (revision 26724861) +1978 (revision 25556703) +1997 (revision 30561757) +2007 (revision 30900003) +2008 (revision 30260606) +21 בינואר (revision 30483313) +22 במרץ (revision 29335566) +23 בדצמבר (revision 30888068) +23 במאי (revision 28677021) +28 באוקטובר (revision 30665513) +28 במאי (revision 30647380) +3 ביולי (revision 30712685) +3 בספטמבר (revision 30732448) +6 בפברואר (revision 30820717) +9 בדצמבר (revision 30650579) +Wayback Machine (revision 30422443) +אבן בניין (revision 28384131) +אברהם גולדפדן (revision 30588411) +אברהם לייבה זיסו (revision 29770127) +אדוארד ג'י רובינסון (revision 30271581) +אדולף שטרן (revision 28829344) +אהרון טאובס (revision 28125670) +אוניברסיטת בוקרשט (revision 30812551) +אוסטרו-הונגריה (revision 30392668) +אופניים (revision 30755077) +אזרחות (revision 30231926) +איסאק פלץ (revision 30750428) +אירופה (revision 30734576) +אלי ברקוביץ (revision 26435411) +אליאס שוורצפלד (revision 27528306) +אליעזר רוקח (revision 30860048) +אלכסנדר איפסילנטי (הנכד) (revision 30132231) +אלכסנדר שפרן (revision 30626532) +אלכסנדרו יואן קוזה (revision 30812553) +אלכסנדרו רובוט (revision 30725110) +אמנות (revision 30463855) +אמנציפציה ליהודים (revision 30769017) +אנג'ליקה רוזיאנו (revision 29943550) +אנטישמי (revision 30734529) +אפיית מצות (revision 30898230) +ארץ ישראל (revision 30777728) +אשר אהרנפלד (revision 30497378) +בוגדן פטריצ'ייקו חאשדאו (revision 29548438) +בוקובינה (revision 29870803) + +== End of Parsed pages == + +- Wikipedia parsing ended at: 2021-03-17 23:15:54.330136 + +79 characters appeared 538173 times. + +First 64 characters: +[ 0] Char י: 12.444697151287782 % +[ 1] Char ו: 10.995534893054835 % +[ 2] Char ר: 7.513197429079496 % +[ 3] Char ה: 7.070960453237156 % +[ 4] Char ב: 6.162702328061794 % +[ 5] Char ל: 5.905164324483019 % +[ 6] Char א: 5.503063141406202 % +[ 7] Char מ: 5.107465443268243 % +[ 8] Char ת: 4.564517357801302 % +[ 9] Char נ: 4.524381565035778 % +[10] Char ש: 3.590295313960381 % +[11] Char פ: 2.6872399767361053 % +[12] Char ד: 2.6859392797483337 % +[13] Char ק: 2.389751994247203 % +[14] Char ט: 2.2137862731872464 % +[15] Char ס: 2.155998164159109 % +[16] Char ם: 2.073310998507915 % +[17] Char ע: 1.9315350268408114 % +[18] Char ח: 1.9018048099774607 % +[19] Char ג: 1.6840309714534172 % +[20] Char ן: 1.4670003883509577 % +[21] Char כ: 1.3939755431803529 % +[22] Char צ: 1.2600037534398791 % +[23] Char ז: 0.8928355751774987 % +[24] Char ץ: 0.2668286963485719 % +[25] Char ך: 0.24397359213487116 % +[26] Char ף: 0.1663034005793676 % +[27] Char i: 0.10963017468360546 % +[28] Char e: 0.10925854697281358 % +[29] Char a: 0.10814366384043793 % +[30] Char r: 0.08268716565119395 % +[31] Char n: 0.08250135179579801 % +[32] Char o: 0.0707950789058537 % +[33] Char t: 0.062247641557640385 % +[34] Char l: 0.05685903975115808 % +[35] Char u: 0.0510988102338839 % +[36] Char s: 0.04199393131948277 % +[37] Char c: 0.038091840356168 % +[38] Char d: 0.03103091385112222 % +[39] Char h: 0.03084509999572628 % +[40] Char m: 0.022855104213700798 % +[41] Char g: 0.022297662647512977 % +[42] Char b: 0.015050922287071259 % +[43] Char N: 0.015050922287071259 % +[44] Char B: 0.014679294576279374 % +[45] Char S: 0.01430766686548749 % +[46] Char C: 0.01393603915469561 % +[47] Char v: 0.01393603915469561 % +[48] Char A: 0.013192783733111842 % +[49] Char E: 0.012449528311528077 % +[50] Char p: 0.010963017468360547 % +[51] Char I: 0.010963017468360547 % +[52] Char M: 0.01021976204677678 % +[53] Char f: 0.010033948191380837 % +[54] Char z: 0.009662320480588956 % +[55] Char R: 0.008733251203609248 % +[56] Char P: 0.008547437348213307 % +[57] Char T: 0.008361623492817365 % +[58] Char L: 0.007804181926629541 % +[59] Char F: 0.0076183680712336 % +[60] Char H: 0.007432554215837659 % +[61] Char k: 0.007246740360441716 % +[62] Char y: 0.0070609265050457755 % +[63] Char w: 0.0070609265050457755 % + +The first 64 characters have an accumulated ratio of 0.9995094514217545. + +1195 sequences found. + +First 512 (typical positive ratio): 0.9890483702848128 +Next 512 (512-1024): 0.04564517357801302 +Rest: 0.0004014423754119586 + +- Processing end: 2021-03-17 23:15:54.469267 diff --git a/script/charsets/iso-8859-8.py b/script/charsets/iso-8859-8.py new file mode 100644 index 0000000..2090b4e --- /dev/null +++ b/script/charsets/iso-8859-8.py @@ -0,0 +1,72 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +# ##### BEGIN LICENSE BLOCK ##### +# Version: MPL 1.1/GPL 2.0/LGPL 2.1 +# +# The contents of this file are subject to the Mozilla Public License Version +# 1.1 (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# http://www.mozilla.org/MPL/ +# +# Software distributed under the License is distributed on an "AS IS" basis, +# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License +# for the specific language governing rights and limitations under the +# License. +# +# The Original Code is Mozilla Universal charset detector code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 2001 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Jehan +# +# Alternatively, the contents of this file may be used under the terms of +# either the GNU General Public License Version 2 or later (the "GPL"), or +# the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), +# in which case the provisions of the GPL or the LGPL are applicable instead +# of those above. If you wish to allow use of your version of this file only +# under the terms of either the GPL or the LGPL, and not to allow others to +# use your version of this file under the terms of the MPL, indicate your +# decision by deleting the provisions above and replace them with the notice +# and other provisions required by the GPL or the LGPL. If you do not delete +# the provisions above, a recipient may use your version of this file under +# the terms of any one of the MPL, the GPL or the LGPL. +# +# ##### END LICENSE BLOCK ##### + +from codepoints import * + +name = 'ISO-8859-8' +aliases = ['ISO_8859-8:1988', 'ISO_8859-8', 'iso-ir-138', + 'csISOLatinHebrew', 'hebrew'] + +language = \ +{ + 'complete': [ 'he' ], + 'incomplete': [] +} + +# X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF # +charmap = \ +[ + CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, # 0X + CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, # 1X + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, # 2X + NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, # 3X + SYM,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # 4X + LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,SYM,SYM,SYM,SYM,SYM, # 5X + SYM,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # 6X + LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,SYM,SYM,SYM,SYM,CTR, # 7X + CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, # 8X + CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, # 9X + SYM,CTR,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, # AX + SYM,SYM,SYM,SYM,SYM,LET,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,CTR, # BX + CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, # CX + CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,SYM, # DX + LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # EX + LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,CTR,CTR,SYM,SYM,CTR, # FX +] diff --git a/script/charsets/windows-1255.py b/script/charsets/windows-1255.py new file mode 100644 index 0000000..7795c27 --- /dev/null +++ b/script/charsets/windows-1255.py @@ -0,0 +1,71 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +# ##### BEGIN LICENSE BLOCK ##### +# Version: MPL 1.1/GPL 2.0/LGPL 2.1 +# +# The contents of this file are subject to the Mozilla Public License Version +# 1.1 (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# http://www.mozilla.org/MPL/ +# +# Software distributed under the License is distributed on an "AS IS" basis, +# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License +# for the specific language governing rights and limitations under the +# License. +# +# The Original Code is Mozilla Universal charset detector code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 2001 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Jehan +# +# Alternatively, the contents of this file may be used under the terms of +# either the GNU General Public License Version 2 or later (the "GPL"), or +# the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), +# in which case the provisions of the GPL or the LGPL are applicable instead +# of those above. If you wish to allow use of your version of this file only +# under the terms of either the GPL or the LGPL, and not to allow others to +# use your version of this file under the terms of the MPL, indicate your +# decision by deleting the provisions above and replace them with the notice +# and other provisions required by the GPL or the LGPL. If you do not delete +# the provisions above, a recipient may use your version of this file under +# the terms of any one of the MPL, the GPL or the LGPL. +# +# ##### END LICENSE BLOCK ##### + +from codepoints import * + +name = 'WINDOWS-1255' +aliases = ['CP1255'] + +language = \ +{ + 'complete': [ 'he' ], + 'incomplete': [] +} + +# X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF # +charmap = \ +[ + CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, # 0X + CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, # 1X + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, # 2X + NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, # 3X + SYM,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # 4X + LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,SYM,SYM,SYM,SYM,SYM, # 5X + SYM,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # 6X + LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,SYM,SYM,SYM,SYM,CTR, # 7X + SYM,CTR,SYM,LET,SYM,SYM,SYM,SYM,LET,SYM,CTR,SYM,CTR,CTR,CTR,CTR, # 8X + CTR,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,CTR,SYM,CTR,CTR,CTR,CTR, # 9X + SYM,SYM,SYM,SYM,LET,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, # AX + SYM,SYM,SYM,SYM,SYM,LET,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, # BX + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, # CX + SYM,SYM,SYM,SYM,LET,LET,LET,SYM,SYM,CTR,CTR,CTR,CTR,CTR,CTR,CTR, # DX + LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # EX + LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,CTR,CTR,SYM,SYM,CTR, # FX +] diff --git a/script/langs/he.py b/script/langs/he.py new file mode 100644 index 0000000..23713cc --- /dev/null +++ b/script/langs/he.py @@ -0,0 +1,63 @@ +#!/bin/python3 +# -*- coding: utf-8 -*- + +# ##### BEGIN LICENSE BLOCK ##### +# Version: MPL 1.1/GPL 2.0/LGPL 2.1 +# +# The contents of this file are subject to the Mozilla Public License Version +# 1.1 (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# http://www.mozilla.org/MPL/ +# +# Software distributed under the License is distributed on an "AS IS" basis, +# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License +# for the specific language governing rights and limitations under the +# License. +# +# The Original Code is Mozilla Universal charset detector code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 2001 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Jehan +# +# Alternatively, the contents of this file may be used under the terms of +# either the GNU General Public License Version 2 or later (the "GPL"), or +# the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), +# in which case the provisions of the GPL or the LGPL are applicable instead +# of those above. If you wish to allow use of your version of this file only +# under the terms of either the GPL or the LGPL, and not to allow others to +# use your version of this file under the terms of the MPL, indicate your +# decision by deleting the provisions above and replace them with the notice +# and other provisions required by the GPL or the LGPL. If you do not delete +# the provisions above, a recipient may use your version of this file under +# the terms of any one of the MPL, the GPL or the LGPL. +# +# ##### END LICENSE BLOCK ##### + +import re + +## Mandatory Properties ## + +# The human name for the language, in English. +name = 'Hebrew' +# Use 2-letter ISO 639-1 if possible, 3-letter ISO code otherwise, +# or use another catalog as a last resort. +code = 'he' +use_ascii = False +# The charsets we want to support and create data for. +charsets = ['ISO-8859-8', 'WINDOWS-1255'] + +## Optional Properties ## + +# The start page. Though optional, it is advised to choose one yourself. +start_pages = ['יהדות_בוקרשט'] +# give possibility to select another code for the Wikipedia URL. +wikipedia_code = code +# 'a' and 'A' will be considered the same character, and so on. +# This uses Python algorithm to determine upper/lower-case of a given +# character. +case_mapping = False diff --git a/src/LangModels/LangHebrewModel.cpp b/src/LangModels/LangHebrewModel.cpp index 811c048..d040701 100644 --- a/src/LangModels/LangHebrewModel.cpp +++ b/src/LangModels/LangHebrewModel.cpp @@ -12,16 +12,14 @@ * for the specific language governing rights and limitations under the * License. * - * The Original Code is Mozilla Universal charset detector code. + * The Original Code is Mozilla Communicator client code. * * The Initial Developer of the Original Code is - * Simon Montagu - * Portions created by the Initial Developer are Copyright (C) 2005 + * Netscape Communications Corporation. + * Portions created by the Initial Developer are Copyright (C) 1998 * the Initial Developer. All Rights Reserved. * * Contributor(s): - * Shoshannah Forbes - * Shy Shalom * * Alternatively, the contents of this file may be used under the terms of * either the GNU General Public License Version 2 or later (the "GPL"), or @@ -38,183 +36,256 @@ * ***** END LICENSE BLOCK ***** */ #include "../nsSBCharSetProber.h" +#include "../nsLanguageDetector.h" +/********* Language model for: Hebrew *********/ -/**************************************************************** -CTR: Control characters that usually does not exist in any text -RET: Carriage/Return -SYM: symbol (punctuation) that does not belong to word -NUM: 0 - 9 +/** + * Generated by BuildLangModel.py + * On: 2021-03-17 23:15:54.331334 + **/ -*****************************************************************/ - -//Windows-1255 language model -//Character Mapping Table: -static const unsigned char win1255_CharToOrderMap[] = +/* Character Mapping Table: + * ILL: illegal character. + * CTR: control character specific to the charset. + * RET: carriage/return. + * SYM: symbol (punctuation) that does not belong to word. + * NUM: 0 - 9. + * + * Other characters are ordered by probabilities + * (0 is the most common character in the language). + * + * Orders are generic to a language. So the codepoint with order X in + * CHARSET1 maps to the same character as the codepoint with the same + * order X in CHARSET2 for the same language. + * As such, it is possible to get missing order. For instance the + * ligature of 'o' and 'e' exists in ISO-8859-15 but not in ISO-8859-1 + * even though they are both used for French. Same for the euro sign. + */ +static const unsigned char Iso_8859_8_CharToOrderMap[] = { -CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, //00 -CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, //10 -SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, //20 -NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, //30 -SYM, 69, 91, 79, 80, 92, 89, 97, 90, 68,111,112, 82, 73, 95, 85, //40 - 78,121, 86, 71, 67,102,107, 84,114,103,115,SYM,SYM,SYM,SYM,SYM, //50 -SYM, 50, 74, 60, 61, 42, 76, 70, 64, 53,105, 93, 56, 65, 54, 49, //60 - 66,110, 51, 43, 44, 63, 81, 77, 98, 75,108,SYM,SYM,SYM,SYM,SYM, //70 -124,ILL,203,204,205, 40, 58,206,207,208,ILL,210,ILL,ILL,ILL,ILL, -ILL, 83, 52, 47, 46, 72, 32, 94,216,113,ILL,109,ILL,ILL,ILL,ILL, - 34,116,222,118,100,223,224,117,119,104,125,225,226, 87, 99,227, -106,122,123,228, 55,229,230,101,231,232,120,233, 48, 39, 57,234, - 30, 59, 41, 88, 33, 37, 36, 31, 29, 35,235, 62, 28,236,126,237, -238, 38, 45,239,240,241,242,243,127,ILL,ILL,ILL,ILL,ILL,ILL,ILL, - 9, 8, 20, 16, 3, 2, 24, 14, 22, 1, 25, 15, 4, 11, 6, 23, - 12, 19, 13, 26, 18, 27, 21, 17, 7, 10, 5,ILL,ILL,128, 96,ILL, + CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ + CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ + NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ + SYM, 48, 44, 46, 64, 49, 59, 65, 60, 51, 69, 70, 58, 52, 43, 72, /* 4X */ + 56, 77, 55, 45, 57, 68, 67, 66, 75, 74, 73,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 29, 42, 37, 38, 28, 53, 41, 39, 27, 76, 61, 34, 40, 31, 32, /* 6X */ + 50, 79, 30, 36, 33, 35, 47, 63, 71, 62, 54,SYM,SYM,SYM,SYM,CTR, /* 7X */ + CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */ + CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */ + SYM,CTR,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ + SYM,SYM,SYM,SYM,SYM, 80,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,CTR, /* BX */ + CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* CX */ + CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,SYM, /* DX */ + 6, 4, 19, 12, 3, 1, 23, 18, 14, 0, 25, 21, 5, 16, 7, 20, /* EX */ + 9, 15, 17, 26, 11, 24, 22, 13, 2, 10, 8,CTR,CTR,SYM,SYM,CTR, /* FX */ +}; +/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ + +static const unsigned char Windows_1255_CharToOrderMap[] = +{ + CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ + CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */ + NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */ + SYM, 48, 44, 46, 64, 49, 59, 65, 60, 51, 69, 70, 58, 52, 43, 72, /* 4X */ + 56, 77, 55, 45, 57, 68, 67, 66, 75, 74, 73,SYM,SYM,SYM,SYM,SYM, /* 5X */ + SYM, 29, 42, 37, 38, 28, 53, 41, 39, 27, 76, 61, 34, 40, 31, 32, /* 6X */ + 50, 81, 30, 36, 33, 35, 47, 63, 71, 62, 54,SYM,SYM,SYM,SYM,CTR, /* 7X */ + SYM,CTR,SYM, 82,SYM,SYM,SYM,SYM, 83,SYM,CTR,SYM,CTR,CTR,CTR,CTR, /* 8X */ + CTR,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,CTR,SYM,CTR,CTR,CTR,CTR, /* 9X */ + SYM,SYM,SYM,SYM, 84,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */ + SYM,SYM,SYM,SYM,SYM, 85,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */ + SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* CX */ + SYM,SYM,SYM,SYM, 78, 86, 87,SYM,SYM,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* DX */ + 6, 4, 19, 12, 3, 1, 23, 18, 14, 0, 25, 21, 5, 16, 7, 20, /* EX */ + 9, 15, 17, 26, 11, 24, 22, 13, 2, 10, 8,CTR,CTR,SYM,SYM,CTR, /* FX */ +}; +/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */ + +static const int Unicode_Char_size = 64; +static const unsigned int Unicode_CharOrder[] = +{ + 65, 48, 66, 44, 67, 46, 69, 49, 70, 59, 72, 60, 73, 51, 76, 58, + 77, 52, 78, 43, 80, 56, 82, 55, 83, 45, 84, 57, 97, 29, 98, 42, + 99, 37, 100, 38, 101, 28, 102, 53, 103, 41, 104, 39, 105, 27, 107, 61, + 108, 34, 109, 40, 110, 31, 111, 32, 112, 50, 114, 30, 115, 36, 116, 33, + 117, 35, 118, 47, 119, 63, 121, 62, 122, 54, 1488, 6, 1489, 4,1490, 19, + 1491, 12, 1492, 3, 1493, 1, 1494, 23, 1495, 18, 1496, 14, 1497, 0,1498, 25, + 1499, 21, 1500, 5, 1501, 16, 1502, 7, 1503, 20, 1504, 9, 1505, 15,1506, 17, + 1507, 26, 1508, 11, 1509, 24, 1510, 22, 1511, 13, 1512, 2, 1513, 10,1514, 8, }; -//Model Table: -//total sequences: 100% -//first 512 sequences: 98.4004% -//first 1024 sequences: 1.5981% -//rest sequences: 0.087% -//negative sequences: 0.0015% -static const PRUint8 HebrewLangModel[] = + +/* Model Table: + * Total sequences: 1195 + * First 512 sequences: 0.9890483702848128 + * Next 512 sequences (512-1024): 0.010550187339775191 + * Rest: 0.0004014423754119586 + * Negative sequences: TODO + */ +static const PRUint8 HebrewLangModel[] = { -0,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,3,2,1,2,0,1,0,0, -3,0,3,1,0,0,1,3,2,0,1,1,2,0,2,2,2,1,1,1,1,2,1,1,1,2,0,0,2,2,0,1, -3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2, -1,2,1,2,1,2,0,0,2,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0, -3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2, -1,2,1,3,1,1,0,0,2,0,0,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, -3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,0,1,2,2,1,3, -1,2,1,1,2,2,0,0,2,2,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,1,0,1,1,0, -3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,2,2,2,3,2, -1,2,1,2,2,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0, -3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,2,2,3,2,2,2,1,2,2,2,2, -1,2,1,1,2,2,0,1,2,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0, -3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,2,2,2,2,2, -0,2,0,2,2,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0, -3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,0,2,2,2, -0,2,1,2,2,2,0,0,2,1,0,0,0,0,1,0,1,0,0,0,0,0,0,2,0,0,0,0,0,0,1,0, -3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,2,1,2,3,2,2,2, -1,2,1,2,2,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0, -3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,1,0,2,0,2, -0,2,1,2,2,2,0,0,1,2,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,2,0,0,1,0, -3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,2,2,3,2,1,2,1,1,1, -0,1,1,1,1,1,3,0,1,0,0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, -3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0, -0,0,1,0,0,0,0,0,2,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2, -0,2,0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, -3,3,3,3,3,3,3,3,3,2,3,3,3,2,1,2,3,3,2,3,3,3,3,2,3,2,1,2,0,2,1,2, -0,2,0,2,2,2,0,0,1,2,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0, -3,3,3,3,3,3,3,3,3,2,3,3,3,1,2,2,3,3,2,3,2,3,2,2,3,1,2,2,0,2,2,2, -0,2,1,2,2,2,0,0,1,2,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,1,0, -3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,2,2,2,3,3,3,3,1,3,2,2,2, -0,2,0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, -3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,2,3,2,2,2,1,2,2,0,2,2,2,2, -0,2,0,2,2,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, -3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,1,3,2,3,3,2,3,3,2,2,1,2,2,2,2,2,2, -0,2,1,2,1,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0, -3,3,3,3,3,3,2,3,2,3,3,2,3,3,3,3,2,3,2,3,3,3,3,3,2,2,2,2,2,2,2,1, -0,2,0,1,2,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0, -3,3,3,3,3,3,3,3,3,2,1,2,3,3,3,3,3,3,3,2,3,2,3,2,1,2,3,0,2,1,2,2, -0,2,1,1,2,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,2,0, -3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,1,3,1,2,2,2,1,2,3,3,1,2,1,2,2,2,2, -0,1,1,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0, -3,3,3,3,3,3,3,3,3,3,0,2,3,3,3,1,3,3,3,1,2,2,2,2,1,1,2,2,2,2,2,2, -0,2,0,1,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0, -3,3,3,3,3,3,2,3,3,3,2,2,3,3,3,2,1,2,3,2,3,2,2,2,2,1,2,1,1,1,2,2, -0,2,1,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, -3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,1,0,0,0,0,0, -1,0,1,0,0,0,0,0,2,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,3,3,3,3,2,3,3,2,3,1,2,2,2,2,3,2,3,1,1,2,2,1,2,2,1,1,0,2,2,2,2, -0,1,0,1,2,2,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0, -3,0,0,1,1,0,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,2,0, -0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,0,1,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,0,1,0,0,0,0,0, -0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,0,0,0,1,1,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, -3,2,2,1,2,2,2,2,2,2,2,1,2,2,1,2,2,1,1,1,1,1,1,1,1,2,1,1,0,3,3,3, -0,3,0,2,2,2,2,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, -2,2,2,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,2,2,1,2,2,2,1,1,1,2,0,1, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -2,2,2,2,2,2,2,2,2,2,2,1,2,2,2,2,2,2,2,2,2,2,2,0,2,2,0,0,0,0,0,0, -0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -2,3,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,2,1,0,2,1,0, -0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0, -0,3,1,1,2,2,2,2,2,1,2,2,2,1,1,2,2,2,2,2,2,2,1,2,2,1,0,1,1,1,1,0, -0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -3,2,1,1,1,1,2,1,1,2,1,0,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,0, -0,0,2,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,0,0, -2,1,1,2,2,2,2,2,2,2,2,2,2,2,1,2,2,2,2,2,1,2,1,2,1,1,1,1,0,0,0,0, -0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -1,2,1,2,2,2,2,2,2,2,2,2,2,1,2,1,2,1,1,2,1,1,1,2,1,2,1,2,0,1,0,1, -0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,3,1,2,2,2,1,2,2,2,2,2,2,2,2,1,2,1,1,1,1,1,1,2,1,2,1,1,0,1,0,1, -0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -2,1,2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2, -0,2,0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, -3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -2,1,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,2,0,1,1,1,0,1,0,0,0,1,1,0,1,1,0,0,0,0,0,1,1,0,0, -0,1,1,1,2,1,2,2,2,0,2,0,2,0,1,1,2,1,1,1,1,2,1,0,1,1,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0, -1,0,1,0,0,0,0,0,1,0,1,2,2,0,1,0,0,1,1,2,2,1,2,0,2,0,0,0,1,2,0,1, -2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,2,0,2,1,2,0,2,0,0,1,1,1,1,1,1,0,1,0,0,0,1,0,0,1, -2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,1,0,0,0,0,0,1,0,2,1,1,0,1,0,0,1,1,1,2,2,0,0,1,0,0,0,1,0,0,1, -1,1,2,1,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,2,2,1, -0,2,0,1,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -2,1,0,0,1,0,1,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -1,1,1,1,1,1,1,1,1,2,1,0,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1, -2,0,1,0,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,1,0,1,1,1,0,1,0,0,1,1,2,1,1,2,0,1,0,0,0,1,1,0,1, -1,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,1,0,1,1,2,0,1,0,0,0,0,2,1,1,2,0,2,0,0,0,1,1,0,1, -1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,1,0,2,1,1,0,1,0,0,2,2,1,2,1,1,0,1,0,0,0,1,1,0,1, -2,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,1,2,2,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,1,0,1, -1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,1,2,2,0,0,0,0,2,1,1,1,0,2,1,1,0,0,0,2,1,0,1, -1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,1,0,1,1,2,0,1,0,0,1,1,0,2,1,1,0,1,0,0,0,1,1,0,1, -2,2,1,1,1,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,1,0,2,1,1,0,1,0,0,1,1,0,1,2,1,0,2,0,0,0,1,1,0,1, -2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0, -0,1,0,0,2,0,2,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,1,0,1,1,2,0,1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1, -1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -1,0,0,0,0,0,0,0,1,0,1,1,0,0,1,0,0,2,1,1,1,1,1,0,1,0,0,0,0,1,0,1, -0,1,1,1,2,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,1,2,1,0,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,2,2,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,2,3,3,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,3,3,2,2,0,2,3,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,2,2,1,3,2,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,2,3,3,3,2,3,3,3,3,2,3,3,3,3,3,3,3,2,3,2,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,2,0,0,2,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,2,3,2,3,2,2,0,2,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,2,3,2,3,2,3,2,3,3,2,3,3,1,2,0,0,2,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,2,0,2,3,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 2,0,0,0,0,0,2,2,0,2,0,2,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,2,3,3,3,3,2,3,3,3,3,3,0,0,2,3,2,3,3,2,2,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,3,0,0,3,3,2,3,3,2,0,2,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,3,3,3,2,2,3,2,2,3,1,0,2,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 2,0,0,0,0,0,2,2,0,1,0,1,1,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,2,3,2,2,3,2,3,2,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,3,3,1,3,3,3,3,1,2,3,3,3,2,1,2,0,2,0,2,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 3,3,3,3,3,3,3,3,2,3,2,2,2,3,1,2,3,3,2,2,3,3,2,2,0,1,2,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,2,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,3,2,3, + 2,3,2,2,3,3,2,1,2,2,2,0,0,0,1,2,0,0,2,0,0,1,2,0,1,0,0,0,0,1,0,0, + 0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,3,3,3, + 2,2,3,2,2,2,2,1,2,2,2,0,0,0,0,2,0,0,2,0,0,2,2,0,0,0,0,0,0,0,2,2, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,1,3,3, + 0,3,3,2,2,2,2,2,2,2,2,0,0,2,0,2,0,0,2,0,0,2,2,0,0,0,0,0,0,2,2,1, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3,3,2,2, + 2,2,2,2,2,2,2,1,2,2,2,0,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,2,2,1, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3,2,2,2, + 2,2,2,2,2,2,2,1,0,3,2,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,1,2,3,3, + 2,3,2,2,2,2,2,2,3,2,2,0,0,0,0,2,0,0,2,0,0,2,2,0,0,0,0,0,0,2,0,2, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3,2,2,0, + 2,2,2,2,2,0,0,3,0,0,0,0,0,0,0,1,0,0,1,0,0,0,2,0,0,0,0,0,0,0,2,2, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,3,2,0,1, + 2,2,2,2,1,2,2,1,2,0,0,0,0,0,0,2,0,0,1,0,1,0,1,0,0,0,0,0,0,0,2,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,2, + 0,2,3,0,2,2,2,1,2,2,2,0,0,0,0,2,0,0,2,0,0,0,1,0,0,0,0,0,0,2,0,0, + 0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,0, + 2,3,1,2,2,2,2,2,2,0,0,0,0,0,0,1,0,0,2,0,0,1,2,0,0,0,0,0,0,1,2,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,0, + 2,2,2,2,1,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,2,2,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,3,2,2,0, + 2,0,0,2,1,0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,2,2,2,0, + 2,2,1,1,0,0,0,0,2,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,1, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,0,2, + 2,0,0,2,2,0,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,1,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,2, + 2,2,2,2,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,0, + 2,0,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,1, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,2,1,0, + 3,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,2,0,0,1,0,0,0,0,0,2,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,0, + 2,0,1,2,0,0,0,0,0,0,0,0,2,0,1,0,2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,0,0, + 2,2,1,2,0,2,0,0,0,0,1,0,0,2,0,0,2,2,2,0,0,2,1,0,0,2,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,0, + 2,0,0,2,0,0,0,2,0,0,0,0,0,0,0,0,2,0,0,1,0,0,1,2,0,0,0,1,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,1, + 2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2, + 0,0,2,1,1,2,2,2,2,0,2,2,0,2,0,1,0,0,1,1,0,0,0,2,0,2,2,1,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2, + 0,0,2,2,0,0,2,1,0,2,0,0,0,2,0,2,0,2,0,0,1,0,0,2,0,0,0,2,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,0, + 2,2,2,2,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,2, + 1,0,0,2,2,0,0,0,0,0,0,0,1,0,1,1,2,1,1,2,2,0,0,0,0,0,1,2,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,0,0, + 2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,1, + 2,2,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,0,1, + 2,2,0,0,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,0,0, + 2,0,0,1,0,0,0,0,0,0,0,0,0,0,2,0,0,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,2,2,0, + 2,0,2,1,0,0,0,1,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,0, + 2,0,0,0,0,0,0,2,0,0,0,0,0,2,0,0,1,1,0,2,1,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,0,0, + 2,0,0,2,0,0,0,0,0,0,0,0,0,1,2,0,0,0,0,2,0,0,0,0,0,0,2,0,1,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,0, + 2,0,1,0,0,0,0,0,0,0,0,0,1,0,2,0,1,2,0,1,0,0,0,0,0,2,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,2,1,0, + 2,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,2,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,0,0, + 2,0,0,0,2,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,2, + 0,0,0,0,0,2,0,0,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,0,0, + 0,0,0,0,2,1,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2, }; -const SequenceModel Win1255Model = + +const SequenceModel Iso_8859_8HebrewModel = { - win1255_CharToOrderMap, + Iso_8859_8_CharToOrderMap, HebrewLangModel, 64, - (float)0.984004, + (float)0.9890483702848128, + PR_FALSE, + "ISO-8859-8", + "he" +}; + +const SequenceModel Windows_1255HebrewModel = +{ + Windows_1255_CharToOrderMap, + HebrewLangModel, + 64, + (float)0.9890483702848128, PR_FALSE, "WINDOWS-1255", "he" }; + +const LanguageModel HebrewModel = +{ + "he", + Unicode_CharOrder, + 64, + HebrewLangModel, + 64, + (float)0.9890483702848128, +}; diff --git a/src/nsLanguageDetector.h b/src/nsLanguageDetector.h index 5300a4d..4f65c25 100644 --- a/src/nsLanguageDetector.h +++ b/src/nsLanguageDetector.h @@ -119,6 +119,7 @@ extern const LanguageModel FinnishModel; extern const LanguageModel FrenchModel; extern const LanguageModel GermanModel; extern const LanguageModel GreekModel; +extern const LanguageModel HebrewModel; extern const LanguageModel HungarianModel; extern const LanguageModel IrishModel; extern const LanguageModel ItalianModel; diff --git a/src/nsMBCSGroupProber.cpp b/src/nsMBCSGroupProber.cpp index 790d099..0e51306 100644 --- a/src/nsMBCSGroupProber.cpp +++ b/src/nsMBCSGroupProber.cpp @@ -97,6 +97,7 @@ nsMBCSGroupProber::nsMBCSGroupProber(PRUint32 aLanguageFilter) langDetectors[i][j++] = new nsLanguageDetector(&FrenchModel); langDetectors[i][j++] = new nsLanguageDetector(&GermanModel); langDetectors[i][j++] = new nsLanguageDetector(&GreekModel); + langDetectors[i][j++] = new nsLanguageDetector(&HebrewModel); langDetectors[i][j++] = new nsLanguageDetector(&HungarianModel); langDetectors[i][j++] = new nsLanguageDetector(&IrishModel); langDetectors[i][j++] = new nsLanguageDetector(&ItalianModel); diff --git a/src/nsMBCSGroupProber.h b/src/nsMBCSGroupProber.h index 190ef65..bb06a6c 100644 --- a/src/nsMBCSGroupProber.h +++ b/src/nsMBCSGroupProber.h @@ -48,7 +48,7 @@ #include "nsEUCTWProber.h" #define NUM_OF_PROBERS 7 -#define NUM_OF_LANGUAGES 26 +#define NUM_OF_LANGUAGES 27 class nsMBCSGroupProber: public nsCharSetProber { public: diff --git a/src/nsSBCSGroupProber.cpp b/src/nsSBCSGroupProber.cpp index c836488..ca54911 100644 --- a/src/nsSBCSGroupProber.cpp +++ b/src/nsSBCSGroupProber.cpp @@ -63,8 +63,8 @@ nsSBCSGroupProber::nsSBCSGroupProber() // Notice: Any change in these indexes - 10,11,12 must be reflected // in the code below as well. mProbers[10] = hebprober; - mProbers[11] = new nsSingleByteCharSetProber(&Win1255Model, PR_FALSE, hebprober); // Logical Hebrew - mProbers[12] = new nsSingleByteCharSetProber(&Win1255Model, PR_TRUE, hebprober); // Visual Hebrew + mProbers[11] = new nsSingleByteCharSetProber(&Windows_1255HebrewModel, PR_FALSE, hebprober); // Logical Hebrew + mProbers[12] = new nsSingleByteCharSetProber(&Windows_1255HebrewModel, PR_TRUE, hebprober); // Visual Hebrew // Tell the Hebrew prober about the logical and visual probers if (mProbers[10] && mProbers[11] && mProbers[12]) // all are not null { diff --git a/src/nsSBCharSetProber.h b/src/nsSBCharSetProber.h index 53112fc..f52bf2c 100644 --- a/src/nsSBCharSetProber.h +++ b/src/nsSBCharSetProber.h @@ -149,7 +149,7 @@ extern const SequenceModel Win1251BulgarianModel; extern const SequenceModel Iso_8859_2HungarianModel; extern const SequenceModel Windows_1250HungarianModel; -extern const SequenceModel Win1255Model; +extern const SequenceModel Windows_1255HebrewModel; extern const SequenceModel Tis_620ThaiModel; extern const SequenceModel Iso_8859_11ThaiModel;