LangModels: retraining Greek models with my training script.

This fixes our Greek/Windows-1253 test.
This commit is contained in:
Jehan 2015-12-13 18:00:07 +01:00
parent 1b4c62ac21
commit ad2f7212e2
8 changed files with 500 additions and 203 deletions

View File

@ -0,0 +1,117 @@
= Logs of language model for Greek (el) =
- Generated by BuildLangModel.py
- Started: 2015-12-13 17:52:58.225697
- Maximum depth: 2
- Max number of pages: 50
== Parsed pages ==
Πύλη:Κύρια (revision 5511929)
13 Δεκεμβρίου (revision 5540654)
1545 (revision 5340059)
1937 (revision 5573231)
1943 (revision 5503673)
2007 (revision 5561663)
2009 (revision 5561693)
2012 (revision 5559036)
Sukhoi Su-24 (revision 5582048)
Wiki (revision 5481490)
Wikimedia (revision 5563126)
Αναμνηστικά κέρματα ευρώ €2 (revision 5578001)
Αφρική (revision 5485484)
Γερμανία (revision 5579724)
Εγκυκλοπαίδεια (revision 5566281)
Ελεύθερο περιεχόμενο (revision 5285700)
Ελλάδα (revision 5580388)
Ελληνική γλώσσα (revision 5545135)
Ευρωζώνη (revision 5453082)
Ευρωπαϊκή Ένωση (revision 5562182)
Ευρωπαϊκή Επιτροπή (revision 5535040)
Ευρωπαϊκή Κεντρική Τράπεζα (revision 5352451)
Ευρώ (revision 5535228)
Ιαπωνία (revision 5540508)
Κέρματα ευρώ (revision 5421943)
Κίνα (revision 5538381)
Καθολική Εκκλησία (revision 5345868)
Καλάβρυτα (revision 5562415)
Κεντροαφρικανική Δημοκρατία (revision 5583804)
Κλίμα (revision 5331688)
Ναντσίνγκ (revision 5460512)
Οικουμενικές σύνοδοι (revision 5377374)
ΠΓΔΜ (revision 5577102)
Πάπας Φραγκίσκος (revision 5565143)
Παρίσι (revision 5524991)
Προτεσταντισμός (revision 5564242)
Πρωθυπουργός της Πορτογαλίας (revision 4986657)
Σφαγή της Ναντσίνγκ (revision 5026948)
Σφαγή των Καλαβρύτων (revision 5491100)
Σύνοδος των Ηνωμένων Εθνών για το κλίμα (2015) (revision 5521523)
Τουρκική κατάρριψη ρωσικού Sukhoi Su-24 (revision 5582048)
Χρυσά και ασημένια συλλεκτικά νομίσματα Ευρώ (revision 4458078)
10 Δεκεμβρίου (revision 5556215)
1124 (revision 5556117)
11 Δεκεμβρίου (revision 5537830)
1204 (revision 5234676)
1250 (revision 5445111)
1294 (revision 5563589)
12 Δεκεμβρίου (revision 5539079)
== End of Parsed pages ==
- Wikipedia parsing ended at: 2015-12-13 17:54:25.103854
62 characters appeared 551775 times.
First 41 characters:
[ 0] Char α: 9.098636219473518 %
[ 1] Char ο: 8.030447193149381 %
[ 2] Char τ: 7.717819763490554 %
[ 3] Char ι: 6.6942141271351545 %
[ 4] Char ε: 6.213764668569617 %
[ 5] Char ν: 5.920166734629151 %
[ 6] Char ρ: 4.645552988083911 %
[ 7] Char κ: 4.4978478546508995 %
[ 8] Char σ: 4.235422047030039 %
[ 9] Char η: 3.9173576185945356 %
[10] Char ς: 3.821666440125051 %
[11] Char π: 3.59023152553124 %
[12] Char μ: 3.2670925649041727 %
[13] Char υ: 3.258755833446604 %
[14] Char λ: 2.7634452448914866 %
[15] Char ί: 2.437406551583526 %
[16] Char ό: 2.070409134157945 %
[17] Char ά: 1.8300937882288977 %
[18] Char έ: 1.6805763218703276 %
[19] Char γ: 1.6767704227266547 %
[20] Char δ: 1.5888722758370715 %
[21] Char ω: 1.4756014679896698 %
[22] Char ή: 1.2963617416519415 %
[23] Char χ: 1.1928775316025553 %
[24] Char ύ: 0.9763037469983236 %
[25] Char θ: 0.8885868334012957 %
[26] Char ώ: 0.8104752843097277 %
[27] Char β: 0.7689728603144398 %
[28] Char φ: 0.6885052784196457 %
[29] Char ξ: 0.32549499343029314 %
[30] Char ζ: 0.3108150967332699 %
[31] Char i: 0.22273571655113045 %
[32] Char e: 0.2096869194871098 %
[33] Char a: 0.17742739341216981 %
[34] Char o: 0.14534910062978568 %
[35] Char n: 0.1428118345340039 %
[36] Char s: 0.12432603869330797 %
[37] Char r: 0.12305740564541706 %
[38] Char ϊ: 0.10819627565583799 %
[39] Char t: 0.10819627565583799 %
[40] Char ψ: 0.1040279099270536 %
The first 41 characters have an accumulated ratio of 0.9915635902315255.
1299 sequences found.
First 512 (typical positive ratio): 0.9690985257709991
Next 512 (512-1024): 0.008104752843097278
Rest: 0.0010500394313971116
- Processing end: 2015-12-13 17:54:25.303820

View File

@ -0,0 +1,73 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
# ##### BEGIN LICENSE BLOCK #####
# Version: MPL 1.1/GPL 2.0/LGPL 2.1
#
# The contents of this file are subject to the Mozilla Public License Version
# 1.1 (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
# http://www.mozilla.org/MPL/
#
# Software distributed under the License is distributed on an "AS IS" basis,
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
# for the specific language governing rights and limitations under the
# License.
#
# The Original Code is Mozilla Universal charset detector code.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 2001
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Jehan <jehan@girinstud.io>
#
# Alternatively, the contents of this file may be used under the terms of
# either the GNU General Public License Version 2 or later (the "GPL"), or
# the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
# in which case the provisions of the GPL or the LGPL are applicable instead
# of those above. If you wish to allow use of your version of this file only
# under the terms of either the GPL or the LGPL, and not to allow others to
# use your version of this file under the terms of the MPL, indicate your
# decision by deleting the provisions above and replace them with the notice
# and other provisions required by the GPL or the LGPL. If you do not delete
# the provisions above, a recipient may use your version of this file under
# the terms of any one of the MPL, the GPL or the LGPL.
#
# ##### END LICENSE BLOCK #####
from codepoints import *
name = 'ISO-8859-7'
aliases = ['ISO_8859-7:1987', 'ISO_8859-7', 'iso-ir-126',
'ELOT_928', 'ECMA-118', 'greek', 'greek8', 'csISOLatinGreek']
language = \
{
# Dedicated to modern Greek.
'complete': [ 'el' ],
'incomplete': []
}
# X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF #
charmap = \
[
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, # 0X
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, # 1X
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, # 2X
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, # 3X
SYM,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # 4X
LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,SYM,SYM,SYM,SYM,SYM, # 5X
SYM,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # 6X
LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,SYM,SYM,SYM,SYM,CTR, # 7X
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, # 8X
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, # 9X
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM, # AX
SYM,SYM,SYM,SYM,SYM,SYM,LET,SYM,LET,LET,LET,SYM,LET,SYM,LET,LET, # BX
LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # CX
LET,LET,ILL,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # DX
LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # EX
LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,ILL, # FX
]

View File

@ -0,0 +1,72 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
# ##### BEGIN LICENSE BLOCK #####
# Version: MPL 1.1/GPL 2.0/LGPL 2.1
#
# The contents of this file are subject to the Mozilla Public License Version
# 1.1 (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
# http://www.mozilla.org/MPL/
#
# Software distributed under the License is distributed on an "AS IS" basis,
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
# for the specific language governing rights and limitations under the
# License.
#
# The Original Code is Mozilla Universal charset detector code.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 2001
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Jehan <jehan@girinstud.io>
#
# Alternatively, the contents of this file may be used under the terms of
# either the GNU General Public License Version 2 or later (the "GPL"), or
# the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
# in which case the provisions of the GPL or the LGPL are applicable instead
# of those above. If you wish to allow use of your version of this file only
# under the terms of either the GPL or the LGPL, and not to allow others to
# use your version of this file under the terms of the MPL, indicate your
# decision by deleting the provisions above and replace them with the notice
# and other provisions required by the GPL or the LGPL. If you do not delete
# the provisions above, a recipient may use your version of this file under
# the terms of any one of the MPL, the GPL or the LGPL.
#
# ##### END LICENSE BLOCK #####
from codepoints import *
name = 'WINDOWS-1253'
aliases = ['cswindows1253']
language = \
{
# Greek support.
'complete': ['el'],
'incomplete': []
}
# X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF #
charmap = \
[
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, # 0X
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, # 1X
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, # 2X
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, # 3X
SYM,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # 4X
LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,SYM,SYM,SYM,SYM,SYM, # 5X
SYM,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # 6X
LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,SYM,SYM,SYM,SYM,CTR, # 7X
SYM,ILL,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM,ILL,SYM,ILL,ILL,ILL,ILL, # 8X
ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM,ILL,SYM,ILL,ILL,ILL,ILL, # 9X
SYM,SYM,LET,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM,SYM,SYM,SYM,SYM, # AX
SYM,SYM,SYM,SYM,SYM,LET,SYM,SYM,LET,LET,LET,SYM,LET,SYM,LET,LET, # BX
LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # CX
LET,LET,ILL,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # DX
LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # EX
LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,ILL, # FX
]

63
script/langs/el.py Normal file
View File

@ -0,0 +1,63 @@
#!/bin/python3
# -*- coding: utf-8 -*-
# ##### BEGIN LICENSE BLOCK #####
# Version: MPL 1.1/GPL 2.0/LGPL 2.1
#
# The contents of this file are subject to the Mozilla Public License Version
# 1.1 (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
# http://www.mozilla.org/MPL/
#
# Software distributed under the License is distributed on an "AS IS" basis,
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
# for the specific language governing rights and limitations under the
# License.
#
# The Original Code is Mozilla Universal charset detector code.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 2001
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Jehan <jehan@girinstud.io>
#
# Alternatively, the contents of this file may be used under the terms of
# either the GNU General Public License Version 2 or later (the "GPL"), or
# the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
# in which case the provisions of the GPL or the LGPL are applicable instead
# of those above. If you wish to allow use of your version of this file only
# under the terms of either the GPL or the LGPL, and not to allow others to
# use your version of this file under the terms of the MPL, indicate your
# decision by deleting the provisions above and replace them with the notice
# and other provisions required by the GPL or the LGPL. If you do not delete
# the provisions above, a recipient may use your version of this file under
# the terms of any one of the MPL, the GPL or the LGPL.
#
# ##### END LICENSE BLOCK #####
import re
## Mandatory Properties ##
name = 'Greek'
code = 'el'
use_ascii = False
charsets = ['ISO-8859-7', 'WINDOWS-1253']
## Optional Properties ##
alphabet = 'αβγδεζηθικλμνξοπρσςτυφχψω'
start_pages = ['Πύλη:Κύρια']
wikipedia_code = code
case_mapping = True
# A function to clean content returned by the `wikipedia` python lib,
# in case some unwanted data has been overlooked.
def clean_wikipedia_content(content):
cleaned = re.sub(r'(=+) *([^=]+) *Επεξεργασία \1',
r'\2',
content)
return cleaned

View File

@ -36,211 +36,184 @@
* ***** END LICENSE BLOCK ***** */
#include "../nsSBCharSetProber.h"
/****************************************************************
CTR: Control characters that usually does not exist in any text
RET: Carriage/Return
SYM: symbol (punctuation) that does not belong to word
NUM: 0 - 9
*****************************************************************/
/********* Language model for: Greek *********/
//Character Mapping Table:
static const unsigned char Latin7_CharToOrderMap[] =
/**
* Generated by BuildLangModel.py
* On: 2015-12-13 17:54:25.105295
**/
/* Character Mapping Table:
* ILL: illegal character.
* CTR: control character specific to the charset.
* RET: carriage/return.
* SYM: symbol (punctuation) that does not belong to word.
* NUM: 0 - 9.
*
* Other characters are ordered by probabilities
* (0 is the most common character in the language).
*
* Orders are generic to a language. So the codepoint with order X in
* CHARSET1 maps to the same character as the codepoint with the same
* order X in CHARSET2 for the same language.
* As such, it is possible to get missing order. For instance the
* ligature of 'o' and 'e' exists in ISO-8859-15 but not in ISO-8859-1
* even though they are both used for French. Same for the euro sign.
*/
static const unsigned char Windows_1253_CharToOrderMap[] =
{
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, //00
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, //10
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, //20
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, //30
SYM, 82,100,104, 94, 98,101,116,102,111,187,117, 92, 88,113, 85, //40
79,118,105, 83, 67,114,119, 95, 99,109,188,SYM,SYM,SYM,SYM,SYM, //50
SYM, 72, 70, 80, 81, 60, 96, 93, 89, 68,120, 97, 77, 86, 69, 55, //60
78,115, 65, 66, 58, 76,106,103, 87,107,112,SYM,SYM,SYM,SYM,CTR, //70
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, //80
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, //90
SYM,233, 90,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 74,ILL,SYM, //a0
SYM,SYM,SYM,SYM,247,248, 61, 36, 46, 71, 73,SYM, 54,SYM,108,123, //b0
110, 31, 51, 43, 41, 34, 91, 40, 52, 47, 44, 53, 38, 49, 59, 39, //c0
35, 48,ILL, 37, 33, 45, 56, 50, 84, 57,120,121, 17, 18, 22, 15, //d0
124, 1, 29, 20, 21, 3, 32, 13, 25, 5, 11, 16, 10, 6, 30, 4, //e0
9, 8, 14, 7, 2, 12, 28, 23, 42, 24, 64, 75, 19, 26, 27,ILL, //f0
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
SYM, 33, 51, 41, 43, 32, 53, 50, 48, 31, 56, 45, 42, 46, 35, 34, /* 4X */
47, 60, 37, 36, 39, 44, 54, 49, 57, 52, 59,SYM,SYM,SYM,SYM,SYM, /* 5X */
SYM, 33, 51, 41, 43, 32, 53, 50, 48, 31, 56, 45, 42, 46, 35, 34, /* 6X */
47, 60, 37, 36, 39, 44, 54, 49, 57, 52, 59,SYM,SYM,SYM,SYM,CTR, /* 7X */
SYM,ILL,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM,ILL,SYM,ILL,ILL,ILL,ILL, /* 8X */
ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM,ILL,SYM,ILL,ILL,ILL,ILL, /* 9X */
SYM,SYM, 17,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM,SYM,SYM,SYM,SYM, /* AX */
SYM,SYM,SYM,SYM,SYM, 61,SYM,SYM, 18, 22, 15,SYM, 16,SYM, 24, 26, /* BX */
55, 0, 27, 19, 20, 4, 30, 9, 25, 3, 7, 14, 12, 5, 29, 1, /* CX */
11, 6,ILL, 8, 2, 13, 28, 23, 40, 21, 38, 58, 17, 18, 22, 15, /* DX */
62, 0, 27, 19, 20, 4, 30, 9, 25, 3, 7, 14, 12, 5, 29, 1, /* EX */
11, 6, 10, 8, 2, 13, 28, 23, 40, 21, 38, 58, 16, 24, 26,ILL, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
static const unsigned char Iso_8859_7_CharToOrderMap[] =
{
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
SYM, 33, 51, 41, 43, 32, 53, 50, 48, 31, 56, 45, 42, 46, 35, 34, /* 4X */
47, 60, 37, 36, 39, 44, 54, 49, 57, 52, 59,SYM,SYM,SYM,SYM,SYM, /* 5X */
SYM, 33, 51, 41, 43, 32, 53, 50, 48, 31, 56, 45, 42, 46, 35, 34, /* 6X */
47, 60, 37, 36, 39, 44, 54, 49, 57, 52, 59,SYM,SYM,SYM,SYM,CTR, /* 7X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM, /* AX */
SYM,SYM,SYM,SYM,SYM,SYM, 17,SYM, 18, 22, 15,SYM, 16,SYM, 24, 26, /* BX */
55, 0, 27, 19, 20, 4, 30, 9, 25, 3, 7, 14, 12, 5, 29, 1, /* CX */
11, 6,ILL, 8, 2, 13, 28, 23, 40, 21, 38, 58, 17, 18, 22, 15, /* DX */
63, 0, 27, 19, 20, 4, 30, 9, 25, 3, 7, 14, 12, 5, 29, 1, /* EX */
11, 6, 10, 8, 2, 13, 28, 23, 40, 21, 38, 58, 16, 24, 26,ILL, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
/* Model Table:
* Total sequences: 1299
* First 512 sequences: 0.9690985257709991
* Next 512 sequences (512-1024): 0.029851434797603802
* Rest: 0.0010500394313971116
* Negative sequences: TODO
*/
static const PRUint8 GreekLangModel[] =
{
1,2,3,3,3,3,3,3,3,1,3,3,3,3,3,3,2,2,2,3,
3,0,2,3,3,3,1,3,3,3,3,0,0,0,0,0,0,0,3,0,2,
2,2,3,3,3,3,3,3,3,2,3,3,3,3,3,3,2,2,2,3,
3,2,2,3,3,3,2,3,3,2,2,0,0,0,0,0,0,0,2,0,2,
3,3,2,3,3,2,3,1,3,3,2,0,3,3,2,3,3,3,3,2,
0,3,3,1,3,1,3,2,1,0,3,0,0,0,1,0,0,0,0,0,0,
3,3,3,1,3,3,3,3,3,2,3,3,3,1,3,1,3,3,3,3,
3,3,2,3,0,3,3,3,3,2,3,0,0,0,0,0,0,0,0,0,2,
2,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,2,2,1,3,
3,3,1,3,3,3,2,3,3,3,3,0,1,0,0,0,1,0,2,0,2,
3,3,3,3,3,3,2,2,3,3,2,1,2,2,2,3,3,3,3,3,
3,3,3,2,2,3,3,1,1,0,2,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,3,3,3,1,2,3,3,2,3,3,3,3,3,
3,3,3,3,3,3,3,3,3,2,2,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,2,3,3,2,3,2,2,2,3,3,3,3,3,3,2,
3,3,3,2,3,2,3,2,2,0,0,0,0,0,1,0,0,0,0,0,0,
3,3,3,3,3,2,2,3,3,3,0,3,3,3,3,3,3,3,3,1,
2,3,3,3,3,3,3,2,3,0,0,0,0,0,0,0,0,0,0,0,0,
2,1,3,0,2,3,3,3,3,1,3,3,3,0,3,0,0,0,1,3,
2,0,0,2,1,3,0,1,3,2,0,0,0,0,0,0,0,0,0,0,2,
0,1,1,0,0,0,0,0,1,2,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,2,3,1,1,3,1,2,1,2,3,3,3,3,3,3,
1,3,3,2,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,0,3,3,3,2,1,2,3,2,3,3,2,2,3,3,3,3,0,
0,3,3,0,2,0,3,3,3,0,1,0,0,0,0,0,0,0,0,0,2,
3,2,3,2,2,3,3,3,3,2,3,3,3,0,3,2,2,3,2,3,
3,0,2,3,0,3,1,3,2,3,3,0,0,0,0,0,0,0,1,0,3,
3,3,3,3,3,2,0,2,2,3,2,2,3,3,3,3,3,3,3,3,
2,3,3,2,3,2,3,3,2,1,1,0,0,0,0,0,0,0,0,0,1,
3,3,3,0,3,3,3,3,3,2,3,3,3,0,3,0,0,0,0,3,
3,3,0,3,0,3,0,2,2,2,3,0,0,0,0,0,0,0,0,0,2,
2,2,3,2,3,3,3,3,3,2,3,3,3,0,3,0,0,0,0,3,
3,1,0,3,0,2,0,2,3,2,2,0,0,0,0,0,0,0,0,0,2,
2,2,3,3,2,3,3,3,3,2,3,3,3,1,3,0,0,0,0,3,
3,1,0,3,0,3,0,3,3,3,3,0,0,0,0,0,0,0,1,0,2,
3,3,3,2,2,3,3,3,3,1,3,3,3,0,3,0,0,0,0,3,
3,3,0,3,0,2,0,2,3,2,2,0,0,0,0,0,0,0,0,0,2,
3,3,0,3,3,3,3,3,0,3,0,0,3,2,3,3,3,3,3,3,
3,3,3,3,2,0,2,1,0,1,0,0,0,0,0,0,0,0,0,0,0,
3,3,0,3,3,1,3,1,0,3,0,0,3,3,0,3,3,3,3,0,
1,3,3,0,3,0,2,0,1,0,0,0,0,0,0,0,0,0,0,0,0,
2,1,3,2,1,3,3,2,3,0,3,3,3,0,2,1,0,2,1,3,
2,0,2,2,0,3,0,2,2,2,2,0,0,0,0,0,0,0,0,0,0,
0,0,3,0,1,3,3,3,3,0,3,2,3,1,3,0,0,0,0,3,
2,0,0,2,0,3,0,1,2,2,1,0,0,0,0,0,0,0,0,0,2,
3,3,2,3,3,3,3,0,1,3,1,0,2,3,2,3,2,3,3,0,
0,3,3,0,2,3,3,0,1,0,0,0,0,0,0,0,0,0,0,0,0,
2,3,3,2,3,3,3,2,3,2,3,3,3,0,3,0,0,0,0,3,
2,2,0,2,0,3,0,2,2,2,2,0,0,0,0,0,0,0,0,0,2,
3,3,0,3,3,3,3,0,0,3,0,0,3,3,2,2,3,3,3,0,
0,2,3,0,2,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2,2,3,2,0,3,3,2,3,2,3,3,3,0,2,0,0,0,0,2,
2,2,0,2,0,3,0,2,2,2,2,0,0,0,0,0,0,0,0,0,0,
3,3,0,3,3,1,3,1,2,3,0,0,1,2,3,3,3,3,3,2,
2,2,2,0,2,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,1,3,1,2,3,0,1,1,3,2,3,2,3,3,2,
0,3,3,0,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,2,3,3,0,0,0,0,3,0,1,0,2,0,2,2,3,3,0,
0,3,2,0,3,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,0,3,3,2,1,0,0,2,0,0,2,2,1,2,2,2,2,0,
0,3,2,1,1,0,3,2,0,0,1,0,0,0,0,0,0,0,0,0,0,
0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,1,2,3,2,3,3,2,0,2,0,
0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,3,3,3,0,2,0,
0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
1,0,0,0,0,0,0,0,0,0,0,2,2,1,2,3,2,3,0,3,0,
0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,2,1,1,2,3,2,3,0,2,0,
0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,2,2,1,0,2,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,2,2,1,0,2,0,
0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,2,3,2,3,2,2,2,0,2,0,
2,0,2,0,1,2,2,3,2,0,1,1,2,0,2,0,2,1,0,2,
1,0,0,1,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,
0,1,1,0,0,0,0,0,1,2,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,3,2,2,2,0,2,2,0,2,0,
2,2,0,2,3,0,0,0,0,3,0,0,0,2,0,2,2,1,1,0,
0,1,2,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
};
static const unsigned char win1253_CharToOrderMap[] =
const SequenceModel Windows_1253GreekModel =
{
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, //00
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, //10
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, //20
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, //30
SYM, 82,100,104, 94, 98,101,116,102,111,187,117, 92, 88,113, 85, //40
79,118,105, 83, 67,114,119, 95, 99,109,188,SYM,SYM,SYM,SYM,SYM, //50
SYM, 72, 70, 80, 81, 60, 96, 93, 89, 68,120, 97, 77, 86, 69, 55, //60
78,115, 65, 66, 58, 76,106,103, 87,107,112,SYM,SYM,SYM,SYM,SYM, //70
CTR,ILL,CTR,CTR,CTR,CTR,CTR,CTR,ILL,CTR,ILL,CTR,ILL,ILL,ILL,ILL, //80
ILL,CTR,CTR,CTR,CTR,CTR,CTR,CTR,ILL,CTR,ILL,CTR,ILL,ILL,ILL,ILL, //90
SYM,233, 61,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 74,SYM,SYM, //a0
SYM,SYM,SYM,SYM,247,SYM,SYM, 36, 46, 71, 73,SYM, 54,SYM,108,123, //b0
110, 31, 51, 43, 41, 34, 91, 40, 52, 47, 44, 53, 38, 49, 59, 39, //c0
35, 48,ILL, 37, 33, 45, 56, 50, 84, 57,120,121, 17, 18, 22, 15, //d0
124, 1, 29, 20, 21, 3, 32, 13, 25, 5, 11, 16, 10, 6, 30, 4, //e0
9, 8, 14, 7, 2, 12, 28, 23, 42, 24, 64, 75, 19, 26, 27,ILL, //f0
};
//Model Table:
//total sequences: 100%
//first 512 sequences: 98.2851%
//first 1024 sequences:1.7001%
//rest sequences: 0.0359%
//negative sequences: 0.0148%
static const PRUint8 GreekLangModel[] =
{
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,3,2,2,3,3,3,3,3,3,3,3,1,3,3,3,0,2,2,3,3,0,3,0,3,2,0,3,3,3,0,
3,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,3,3,3,3,3,0,3,3,0,3,2,3,3,0,3,2,3,3,3,0,0,3,0,3,0,3,3,2,0,0,0,
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,
0,2,3,2,2,3,3,3,3,3,3,3,3,0,3,3,3,3,0,2,3,3,0,3,3,3,3,2,3,3,3,0,
2,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,2,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,0,2,1,3,3,3,3,2,3,3,2,3,3,2,0,
0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,3,3,3,3,0,3,3,3,3,3,3,0,3,3,0,3,3,3,3,3,3,3,3,3,3,0,3,2,3,3,0,
2,0,1,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
0,3,3,3,3,3,2,3,0,0,0,0,3,3,0,3,1,3,3,3,0,3,3,0,3,3,3,3,0,0,0,0,
2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,3,3,3,3,3,0,3,0,3,3,3,3,3,0,3,2,2,2,3,0,2,3,3,3,3,3,2,3,3,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,3,3,3,3,3,3,2,2,2,3,3,3,3,0,3,1,3,3,3,3,2,3,3,3,3,3,3,3,2,2,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,3,3,3,3,3,2,0,3,0,0,0,3,3,2,3,3,3,3,3,0,0,3,2,3,0,2,3,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,3,0,3,3,3,3,0,0,3,3,0,2,3,0,3,0,3,3,3,0,0,3,0,3,0,2,2,3,3,0,0,
0,0,1,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,3,3,3,3,3,2,0,3,2,3,3,3,3,0,3,3,3,3,3,0,3,3,2,3,2,3,3,2,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,3,3,2,3,2,3,3,3,3,3,3,0,2,3,2,3,2,2,2,3,2,3,3,2,3,0,2,2,2,3,0,
2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,3,0,0,0,3,3,3,2,3,3,0,0,3,0,3,0,0,0,3,2,0,3,0,3,0,0,2,0,2,0,
0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,3,3,3,3,0,3,3,3,3,3,3,0,3,3,0,3,0,0,0,3,3,0,3,3,3,0,0,1,2,3,0,
3,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,3,3,3,3,3,2,0,0,3,2,2,3,3,0,3,3,3,3,3,2,1,3,0,3,2,3,3,2,1,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,3,3,0,2,3,3,3,3,3,3,0,0,3,0,3,0,0,0,3,3,0,3,2,3,0,0,3,3,3,0,
3,0,0,0,2,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,3,3,3,3,0,3,3,3,3,3,3,0,0,3,0,3,0,0,0,3,2,0,3,2,3,0,0,3,2,3,0,
2,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,3,1,2,2,3,3,3,3,3,3,0,2,3,0,3,0,0,0,3,3,0,3,0,2,0,0,2,3,1,0,
2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,3,0,3,3,3,3,0,3,0,3,3,2,3,0,3,3,3,3,3,3,0,3,3,3,0,2,3,0,0,3,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,3,0,3,3,3,0,0,3,0,0,0,3,3,0,3,0,2,3,3,0,0,3,0,3,0,3,3,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,3,0,0,0,3,3,3,3,3,3,0,0,3,0,2,0,0,0,3,3,0,3,0,3,0,0,2,0,2,0,
0,0,0,0,1,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,3,3,3,3,3,3,0,3,0,2,0,3,2,0,3,2,3,2,3,0,0,3,2,3,2,3,3,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,3,0,0,2,3,3,3,3,3,0,0,0,3,0,2,1,0,0,3,2,2,2,0,3,0,0,2,2,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,3,0,3,3,3,2,0,3,0,3,0,3,3,0,2,1,2,3,3,0,0,3,0,3,0,3,3,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,2,3,3,3,0,3,3,3,3,3,3,0,2,3,0,3,0,0,0,2,1,0,2,2,3,0,0,2,2,2,0,
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,3,0,0,2,3,3,3,2,3,0,0,1,3,0,2,0,0,0,0,3,0,1,0,2,0,0,1,1,1,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,3,3,3,3,3,1,0,3,0,0,0,3,2,0,3,2,3,3,3,0,0,3,0,3,2,2,2,1,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,3,0,3,3,3,0,0,3,0,0,0,0,2,0,2,3,3,2,2,2,2,3,0,2,0,2,2,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,3,3,3,3,2,0,0,0,0,0,0,2,3,0,2,0,2,3,2,0,0,3,0,3,0,3,1,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,3,2,3,3,2,2,3,0,2,0,3,0,0,0,2,0,0,0,0,1,2,0,2,0,2,0,
0,2,0,2,0,2,2,0,0,1,0,2,2,2,0,2,2,2,0,2,2,2,0,0,2,0,0,1,0,0,0,0,
0,2,0,3,3,2,0,0,0,0,0,0,1,3,0,2,0,2,2,2,0,0,2,0,3,0,0,2,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,3,0,2,3,2,0,2,2,0,2,0,2,2,0,2,0,2,2,2,0,0,0,0,0,0,2,3,0,0,0,2,
0,1,2,0,0,0,0,2,2,0,0,0,2,1,0,2,2,0,0,0,0,0,0,1,0,2,0,0,0,0,0,0,
0,0,2,1,0,2,3,2,2,3,2,3,2,0,0,3,3,3,0,0,3,2,0,0,0,1,1,0,2,0,2,2,
0,2,0,2,0,2,2,0,0,2,0,2,2,2,0,2,2,2,2,0,0,2,0,0,0,2,0,1,0,0,0,0,
0,3,0,3,3,2,2,0,3,0,0,0,2,2,0,2,2,2,1,2,0,0,1,2,2,0,0,3,0,0,0,2,
0,1,2,0,0,0,1,2,0,0,0,0,0,0,0,2,2,0,1,0,0,2,0,0,0,2,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,2,3,3,2,2,0,0,0,2,0,2,3,3,0,2,0,0,0,0,0,0,2,2,2,0,2,2,0,2,0,2,
0,2,2,0,0,2,2,2,2,1,0,0,2,2,0,2,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,
0,2,0,3,2,3,0,0,0,3,0,0,2,2,0,2,0,2,2,2,0,0,2,0,0,0,0,0,0,0,0,2,
0,0,2,2,0,0,2,2,2,0,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,2,0,0,3,2,0,2,2,2,2,2,0,0,0,2,0,0,0,0,2,0,1,0,0,2,0,1,0,0,0,
0,2,2,2,0,2,2,0,1,2,0,2,2,2,0,2,2,2,2,1,2,2,0,0,2,0,0,0,0,0,0,0,
0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
0,2,0,2,0,2,2,0,0,0,0,1,2,1,0,0,2,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,3,2,3,0,0,2,0,0,0,2,2,0,2,0,0,0,1,0,0,2,0,2,0,2,2,0,0,0,0,
0,0,2,0,0,0,0,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,
0,2,2,3,2,2,0,0,0,0,0,0,1,3,0,2,0,2,2,0,0,0,1,0,2,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,2,0,2,0,3,2,0,2,0,0,0,0,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
0,0,2,0,0,0,0,1,1,0,0,2,1,2,0,2,2,0,1,0,0,1,0,0,0,2,0,0,0,0,0,0,
0,3,0,2,2,2,0,0,2,0,0,0,2,0,0,0,2,3,0,2,0,0,0,0,0,0,2,2,0,0,0,2,
0,1,2,0,0,0,1,2,2,1,0,0,0,2,0,0,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,2,1,2,0,2,2,0,2,0,0,2,0,0,0,0,1,2,1,0,2,1,0,0,0,0,0,0,0,0,0,0,
0,0,2,0,0,0,3,1,2,2,0,2,0,0,0,0,2,0,0,0,2,0,0,3,0,0,0,0,2,2,2,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,2,1,0,2,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,2,
0,2,2,0,0,2,2,2,2,2,0,1,2,0,0,0,2,2,0,1,0,2,0,0,2,2,0,0,0,0,0,0,
0,0,0,0,1,0,0,0,0,0,0,0,3,0,0,2,0,0,0,0,0,0,0,0,2,0,2,0,0,0,0,2,
0,1,2,0,0,0,0,2,2,1,0,1,0,1,0,2,2,2,1,0,0,0,0,0,0,1,0,0,0,0,0,0,
0,2,0,1,2,0,0,0,0,0,0,0,0,0,0,2,0,0,2,2,0,0,0,0,1,0,0,0,0,0,0,2,
0,2,2,0,0,0,0,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,2,0,0,0,
0,2,2,2,2,0,0,0,3,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,1,
0,0,2,0,0,0,0,1,2,0,0,0,0,0,0,2,2,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,
0,2,0,2,2,2,0,0,2,0,0,0,0,0,0,0,2,2,2,0,0,0,2,0,0,0,0,0,0,0,0,2,
0,0,1,0,0,0,0,2,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
0,3,0,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,2,
0,0,2,0,0,0,0,2,2,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,2,0,2,2,1,0,0,0,0,0,0,2,0,0,2,0,2,2,2,0,0,0,0,0,0,2,0,0,0,0,2,
0,0,2,0,0,2,0,2,2,0,0,0,0,2,0,2,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0,
0,0,3,0,0,0,2,2,0,2,2,0,0,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,2,0,0,0,0,0,
0,2,2,2,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,
0,0,0,0,0,0,0,2,1,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,2,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
0,2,0,0,0,2,0,0,0,0,0,1,0,0,0,0,2,2,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,2,0,0,0,
0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,2,0,2,0,0,0,
0,0,0,0,0,0,0,0,2,1,0,0,0,0,0,0,2,0,0,0,1,2,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
};
const SequenceModel Latin7GreekModel =
{
Latin7_CharToOrderMap,
Windows_1253_CharToOrderMap,
GreekLangModel,
64,
(float)0.982851,
PR_FALSE,
"ISO-8859-7"
};
const SequenceModel Win1253GreekModel =
{
win1253_CharToOrderMap,
GreekLangModel,
64,
(float)0.982851,
41,
(float)0.9690985257709991,
PR_FALSE,
"WINDOWS-1253"
};
const SequenceModel Iso_8859_7GreekModel =
{
Iso_8859_7_CharToOrderMap,
GreekLangModel,
41,
(float)0.9690985257709991,
PR_FALSE,
"ISO-8859-7"
};

View File

@ -53,8 +53,8 @@ nsSBCSGroupProber::nsSBCSGroupProber()
mProbers[4] = new nsSingleByteCharSetProber(&Ibm866RussianModel);
mProbers[5] = new nsSingleByteCharSetProber(&Ibm855RussianModel);
mProbers[6] = new nsSingleByteCharSetProber(&Latin7GreekModel);
mProbers[7] = new nsSingleByteCharSetProber(&Win1253GreekModel);
mProbers[6] = new nsSingleByteCharSetProber(&Iso_8859_7GreekModel);
mProbers[7] = new nsSingleByteCharSetProber(&Windows_1253GreekModel);
mProbers[8] = new nsSingleByteCharSetProber(&Latin5BulgarianModel);
mProbers[9] = new nsSingleByteCharSetProber(&Win1251BulgarianModel);

View File

@ -130,8 +130,8 @@ extern const SequenceModel MacCyrillicRussianModel;
extern const SequenceModel Ibm866RussianModel;
extern const SequenceModel Ibm855RussianModel;
extern const SequenceModel Latin7GreekModel;
extern const SequenceModel Win1253GreekModel;
extern const SequenceModel Iso_8859_7GreekModel;
extern const SequenceModel Windows_1253GreekModel;
extern const SequenceModel Latin5BulgarianModel;
extern const SequenceModel Win1251BulgarianModel;

View File

@ -34,8 +34,7 @@ foreach(dir ${dirs})
# enough). We will have to take a closer look and fix these, but
# there is no need to break the whole `make test` right now,
# which may make actual regressions harder to notice.
if ("${lang}:${charset}" STREQUAL "el:windows-1253" OR
"${lang}:${charset}" STREQUAL "ja:utf-16le" OR
if ("${lang}:${charset}" STREQUAL "ja:utf-16le" OR
"${lang}:${charset}" STREQUAL "ja:utf-16be" OR
"${lang}:${charset}" STREQUAL "es:iso-8859-15" OR
"${lang}:${charset}" STREQUAL "he:iso-8859-8")