mirror of
https://gitlab.freedesktop.org/uchardet/uchardet.git
synced 2025-12-13 23:20:08 +08:00
LangModels: retraining Greek models with my training script.
This fixes our Greek/Windows-1253 test.
This commit is contained in:
parent
1b4c62ac21
commit
ad2f7212e2
117
script/BuildLangModelLogs/LangGreekModel.log
Normal file
117
script/BuildLangModelLogs/LangGreekModel.log
Normal file
@ -0,0 +1,117 @@
|
||||
= Logs of language model for Greek (el) =
|
||||
|
||||
- Generated by BuildLangModel.py
|
||||
- Started: 2015-12-13 17:52:58.225697
|
||||
- Maximum depth: 2
|
||||
- Max number of pages: 50
|
||||
|
||||
== Parsed pages ==
|
||||
|
||||
Πύλη:Κύρια (revision 5511929)
|
||||
13 Δεκεμβρίου (revision 5540654)
|
||||
1545 (revision 5340059)
|
||||
1937 (revision 5573231)
|
||||
1943 (revision 5503673)
|
||||
2007 (revision 5561663)
|
||||
2009 (revision 5561693)
|
||||
2012 (revision 5559036)
|
||||
Sukhoi Su-24 (revision 5582048)
|
||||
Wiki (revision 5481490)
|
||||
Wikimedia (revision 5563126)
|
||||
Αναμνηστικά κέρματα ευρώ €2 (revision 5578001)
|
||||
Αφρική (revision 5485484)
|
||||
Γερμανία (revision 5579724)
|
||||
Εγκυκλοπαίδεια (revision 5566281)
|
||||
Ελεύθερο περιεχόμενο (revision 5285700)
|
||||
Ελλάδα (revision 5580388)
|
||||
Ελληνική γλώσσα (revision 5545135)
|
||||
Ευρωζώνη (revision 5453082)
|
||||
Ευρωπαϊκή Ένωση (revision 5562182)
|
||||
Ευρωπαϊκή Επιτροπή (revision 5535040)
|
||||
Ευρωπαϊκή Κεντρική Τράπεζα (revision 5352451)
|
||||
Ευρώ (revision 5535228)
|
||||
Ιαπωνία (revision 5540508)
|
||||
Κέρματα ευρώ (revision 5421943)
|
||||
Κίνα (revision 5538381)
|
||||
Καθολική Εκκλησία (revision 5345868)
|
||||
Καλάβρυτα (revision 5562415)
|
||||
Κεντροαφρικανική Δημοκρατία (revision 5583804)
|
||||
Κλίμα (revision 5331688)
|
||||
Ναντσίνγκ (revision 5460512)
|
||||
Οικουμενικές σύνοδοι (revision 5377374)
|
||||
ΠΓΔΜ (revision 5577102)
|
||||
Πάπας Φραγκίσκος (revision 5565143)
|
||||
Παρίσι (revision 5524991)
|
||||
Προτεσταντισμός (revision 5564242)
|
||||
Πρωθυπουργός της Πορτογαλίας (revision 4986657)
|
||||
Σφαγή της Ναντσίνγκ (revision 5026948)
|
||||
Σφαγή των Καλαβρύτων (revision 5491100)
|
||||
Σύνοδος των Ηνωμένων Εθνών για το κλίμα (2015) (revision 5521523)
|
||||
Τουρκική κατάρριψη ρωσικού Sukhoi Su-24 (revision 5582048)
|
||||
Χρυσά και ασημένια συλλεκτικά νομίσματα Ευρώ (revision 4458078)
|
||||
10 Δεκεμβρίου (revision 5556215)
|
||||
1124 (revision 5556117)
|
||||
11 Δεκεμβρίου (revision 5537830)
|
||||
1204 (revision 5234676)
|
||||
1250 (revision 5445111)
|
||||
1294 (revision 5563589)
|
||||
12 Δεκεμβρίου (revision 5539079)
|
||||
|
||||
== End of Parsed pages ==
|
||||
|
||||
- Wikipedia parsing ended at: 2015-12-13 17:54:25.103854
|
||||
|
||||
62 characters appeared 551775 times.
|
||||
|
||||
First 41 characters:
|
||||
[ 0] Char α: 9.098636219473518 %
|
||||
[ 1] Char ο: 8.030447193149381 %
|
||||
[ 2] Char τ: 7.717819763490554 %
|
||||
[ 3] Char ι: 6.6942141271351545 %
|
||||
[ 4] Char ε: 6.213764668569617 %
|
||||
[ 5] Char ν: 5.920166734629151 %
|
||||
[ 6] Char ρ: 4.645552988083911 %
|
||||
[ 7] Char κ: 4.4978478546508995 %
|
||||
[ 8] Char σ: 4.235422047030039 %
|
||||
[ 9] Char η: 3.9173576185945356 %
|
||||
[10] Char ς: 3.821666440125051 %
|
||||
[11] Char π: 3.59023152553124 %
|
||||
[12] Char μ: 3.2670925649041727 %
|
||||
[13] Char υ: 3.258755833446604 %
|
||||
[14] Char λ: 2.7634452448914866 %
|
||||
[15] Char ί: 2.437406551583526 %
|
||||
[16] Char ό: 2.070409134157945 %
|
||||
[17] Char ά: 1.8300937882288977 %
|
||||
[18] Char έ: 1.6805763218703276 %
|
||||
[19] Char γ: 1.6767704227266547 %
|
||||
[20] Char δ: 1.5888722758370715 %
|
||||
[21] Char ω: 1.4756014679896698 %
|
||||
[22] Char ή: 1.2963617416519415 %
|
||||
[23] Char χ: 1.1928775316025553 %
|
||||
[24] Char ύ: 0.9763037469983236 %
|
||||
[25] Char θ: 0.8885868334012957 %
|
||||
[26] Char ώ: 0.8104752843097277 %
|
||||
[27] Char β: 0.7689728603144398 %
|
||||
[28] Char φ: 0.6885052784196457 %
|
||||
[29] Char ξ: 0.32549499343029314 %
|
||||
[30] Char ζ: 0.3108150967332699 %
|
||||
[31] Char i: 0.22273571655113045 %
|
||||
[32] Char e: 0.2096869194871098 %
|
||||
[33] Char a: 0.17742739341216981 %
|
||||
[34] Char o: 0.14534910062978568 %
|
||||
[35] Char n: 0.1428118345340039 %
|
||||
[36] Char s: 0.12432603869330797 %
|
||||
[37] Char r: 0.12305740564541706 %
|
||||
[38] Char ϊ: 0.10819627565583799 %
|
||||
[39] Char t: 0.10819627565583799 %
|
||||
[40] Char ψ: 0.1040279099270536 %
|
||||
|
||||
The first 41 characters have an accumulated ratio of 0.9915635902315255.
|
||||
|
||||
1299 sequences found.
|
||||
|
||||
First 512 (typical positive ratio): 0.9690985257709991
|
||||
Next 512 (512-1024): 0.008104752843097278
|
||||
Rest: 0.0010500394313971116
|
||||
|
||||
- Processing end: 2015-12-13 17:54:25.303820
|
||||
73
script/charsets/iso-8859-7.py
Normal file
73
script/charsets/iso-8859-7.py
Normal file
@ -0,0 +1,73 @@
|
||||
#!/usr/bin/python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# ##### BEGIN LICENSE BLOCK #####
|
||||
# Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
#
|
||||
# The contents of this file are subject to the Mozilla Public License Version
|
||||
# 1.1 (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
# http://www.mozilla.org/MPL/
|
||||
#
|
||||
# Software distributed under the License is distributed on an "AS IS" basis,
|
||||
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
# for the specific language governing rights and limitations under the
|
||||
# License.
|
||||
#
|
||||
# The Original Code is Mozilla Universal charset detector code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 2001
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Jehan <jehan@girinstud.io>
|
||||
#
|
||||
# Alternatively, the contents of this file may be used under the terms of
|
||||
# either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
# the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
# in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
# of those above. If you wish to allow use of your version of this file only
|
||||
# under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
# use your version of this file under the terms of the MPL, indicate your
|
||||
# decision by deleting the provisions above and replace them with the notice
|
||||
# and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
# the provisions above, a recipient may use your version of this file under
|
||||
# the terms of any one of the MPL, the GPL or the LGPL.
|
||||
#
|
||||
# ##### END LICENSE BLOCK #####
|
||||
|
||||
from codepoints import *
|
||||
|
||||
name = 'ISO-8859-7'
|
||||
aliases = ['ISO_8859-7:1987', 'ISO_8859-7', 'iso-ir-126',
|
||||
'ELOT_928', 'ECMA-118', 'greek', 'greek8', 'csISOLatinGreek']
|
||||
|
||||
language = \
|
||||
{
|
||||
# Dedicated to modern Greek.
|
||||
'complete': [ 'el' ],
|
||||
'incomplete': []
|
||||
}
|
||||
|
||||
# X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF #
|
||||
charmap = \
|
||||
[
|
||||
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, # 0X
|
||||
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, # 1X
|
||||
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, # 2X
|
||||
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, # 3X
|
||||
SYM,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # 4X
|
||||
LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,SYM,SYM,SYM,SYM,SYM, # 5X
|
||||
SYM,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # 6X
|
||||
LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,SYM,SYM,SYM,SYM,CTR, # 7X
|
||||
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, # 8X
|
||||
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, # 9X
|
||||
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM, # AX
|
||||
SYM,SYM,SYM,SYM,SYM,SYM,LET,SYM,LET,LET,LET,SYM,LET,SYM,LET,LET, # BX
|
||||
LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # CX
|
||||
LET,LET,ILL,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # DX
|
||||
LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # EX
|
||||
LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,ILL, # FX
|
||||
]
|
||||
72
script/charsets/windows-1253.py
Normal file
72
script/charsets/windows-1253.py
Normal file
@ -0,0 +1,72 @@
|
||||
#!/usr/bin/python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# ##### BEGIN LICENSE BLOCK #####
|
||||
# Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
#
|
||||
# The contents of this file are subject to the Mozilla Public License Version
|
||||
# 1.1 (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
# http://www.mozilla.org/MPL/
|
||||
#
|
||||
# Software distributed under the License is distributed on an "AS IS" basis,
|
||||
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
# for the specific language governing rights and limitations under the
|
||||
# License.
|
||||
#
|
||||
# The Original Code is Mozilla Universal charset detector code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 2001
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Jehan <jehan@girinstud.io>
|
||||
#
|
||||
# Alternatively, the contents of this file may be used under the terms of
|
||||
# either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
# the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
# in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
# of those above. If you wish to allow use of your version of this file only
|
||||
# under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
# use your version of this file under the terms of the MPL, indicate your
|
||||
# decision by deleting the provisions above and replace them with the notice
|
||||
# and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
# the provisions above, a recipient may use your version of this file under
|
||||
# the terms of any one of the MPL, the GPL or the LGPL.
|
||||
#
|
||||
# ##### END LICENSE BLOCK #####
|
||||
|
||||
from codepoints import *
|
||||
|
||||
name = 'WINDOWS-1253'
|
||||
aliases = ['cswindows1253']
|
||||
|
||||
language = \
|
||||
{
|
||||
# Greek support.
|
||||
'complete': ['el'],
|
||||
'incomplete': []
|
||||
}
|
||||
|
||||
# X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF #
|
||||
charmap = \
|
||||
[
|
||||
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, # 0X
|
||||
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, # 1X
|
||||
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, # 2X
|
||||
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, # 3X
|
||||
SYM,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # 4X
|
||||
LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,SYM,SYM,SYM,SYM,SYM, # 5X
|
||||
SYM,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # 6X
|
||||
LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,SYM,SYM,SYM,SYM,CTR, # 7X
|
||||
SYM,ILL,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM,ILL,SYM,ILL,ILL,ILL,ILL, # 8X
|
||||
ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM,ILL,SYM,ILL,ILL,ILL,ILL, # 9X
|
||||
SYM,SYM,LET,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM,SYM,SYM,SYM,SYM, # AX
|
||||
SYM,SYM,SYM,SYM,SYM,LET,SYM,SYM,LET,LET,LET,SYM,LET,SYM,LET,LET, # BX
|
||||
LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # CX
|
||||
LET,LET,ILL,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # DX
|
||||
LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # EX
|
||||
LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,ILL, # FX
|
||||
]
|
||||
63
script/langs/el.py
Normal file
63
script/langs/el.py
Normal file
@ -0,0 +1,63 @@
|
||||
#!/bin/python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# ##### BEGIN LICENSE BLOCK #####
|
||||
# Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
#
|
||||
# The contents of this file are subject to the Mozilla Public License Version
|
||||
# 1.1 (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
# http://www.mozilla.org/MPL/
|
||||
#
|
||||
# Software distributed under the License is distributed on an "AS IS" basis,
|
||||
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
# for the specific language governing rights and limitations under the
|
||||
# License.
|
||||
#
|
||||
# The Original Code is Mozilla Universal charset detector code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 2001
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Jehan <jehan@girinstud.io>
|
||||
#
|
||||
# Alternatively, the contents of this file may be used under the terms of
|
||||
# either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
# the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
# in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
# of those above. If you wish to allow use of your version of this file only
|
||||
# under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
# use your version of this file under the terms of the MPL, indicate your
|
||||
# decision by deleting the provisions above and replace them with the notice
|
||||
# and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
# the provisions above, a recipient may use your version of this file under
|
||||
# the terms of any one of the MPL, the GPL or the LGPL.
|
||||
#
|
||||
# ##### END LICENSE BLOCK #####
|
||||
|
||||
import re
|
||||
|
||||
## Mandatory Properties ##
|
||||
|
||||
name = 'Greek'
|
||||
code = 'el'
|
||||
use_ascii = False
|
||||
charsets = ['ISO-8859-7', 'WINDOWS-1253']
|
||||
|
||||
## Optional Properties ##
|
||||
|
||||
alphabet = 'αβγδεζηθικλμνξοπρσςτυφχψω'
|
||||
start_pages = ['Πύλη:Κύρια']
|
||||
wikipedia_code = code
|
||||
case_mapping = True
|
||||
|
||||
# A function to clean content returned by the `wikipedia` python lib,
|
||||
# in case some unwanted data has been overlooked.
|
||||
def clean_wikipedia_content(content):
|
||||
cleaned = re.sub(r'(=+) *([^=]+) *Επεξεργασία \1',
|
||||
r'\2',
|
||||
content)
|
||||
return cleaned
|
||||
@ -36,211 +36,184 @@
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
#include "../nsSBCharSetProber.h"
|
||||
/****************************************************************
|
||||
CTR: Control characters that usually does not exist in any text
|
||||
RET: Carriage/Return
|
||||
SYM: symbol (punctuation) that does not belong to word
|
||||
NUM: 0 - 9
|
||||
|
||||
*****************************************************************/
|
||||
/********* Language model for: Greek *********/
|
||||
|
||||
//Character Mapping Table:
|
||||
static const unsigned char Latin7_CharToOrderMap[] =
|
||||
/**
|
||||
* Generated by BuildLangModel.py
|
||||
* On: 2015-12-13 17:54:25.105295
|
||||
**/
|
||||
|
||||
/* Character Mapping Table:
|
||||
* ILL: illegal character.
|
||||
* CTR: control character specific to the charset.
|
||||
* RET: carriage/return.
|
||||
* SYM: symbol (punctuation) that does not belong to word.
|
||||
* NUM: 0 - 9.
|
||||
*
|
||||
* Other characters are ordered by probabilities
|
||||
* (0 is the most common character in the language).
|
||||
*
|
||||
* Orders are generic to a language. So the codepoint with order X in
|
||||
* CHARSET1 maps to the same character as the codepoint with the same
|
||||
* order X in CHARSET2 for the same language.
|
||||
* As such, it is possible to get missing order. For instance the
|
||||
* ligature of 'o' and 'e' exists in ISO-8859-15 but not in ISO-8859-1
|
||||
* even though they are both used for French. Same for the euro sign.
|
||||
*/
|
||||
static const unsigned char Windows_1253_CharToOrderMap[] =
|
||||
{
|
||||
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, //00
|
||||
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, //10
|
||||
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, //20
|
||||
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, //30
|
||||
SYM, 82,100,104, 94, 98,101,116,102,111,187,117, 92, 88,113, 85, //40
|
||||
79,118,105, 83, 67,114,119, 95, 99,109,188,SYM,SYM,SYM,SYM,SYM, //50
|
||||
SYM, 72, 70, 80, 81, 60, 96, 93, 89, 68,120, 97, 77, 86, 69, 55, //60
|
||||
78,115, 65, 66, 58, 76,106,103, 87,107,112,SYM,SYM,SYM,SYM,CTR, //70
|
||||
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, //80
|
||||
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, //90
|
||||
SYM,233, 90,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 74,ILL,SYM, //a0
|
||||
SYM,SYM,SYM,SYM,247,248, 61, 36, 46, 71, 73,SYM, 54,SYM,108,123, //b0
|
||||
110, 31, 51, 43, 41, 34, 91, 40, 52, 47, 44, 53, 38, 49, 59, 39, //c0
|
||||
35, 48,ILL, 37, 33, 45, 56, 50, 84, 57,120,121, 17, 18, 22, 15, //d0
|
||||
124, 1, 29, 20, 21, 3, 32, 13, 25, 5, 11, 16, 10, 6, 30, 4, //e0
|
||||
9, 8, 14, 7, 2, 12, 28, 23, 42, 24, 64, 75, 19, 26, 27,ILL, //f0
|
||||
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
|
||||
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
|
||||
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
|
||||
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
|
||||
SYM, 33, 51, 41, 43, 32, 53, 50, 48, 31, 56, 45, 42, 46, 35, 34, /* 4X */
|
||||
47, 60, 37, 36, 39, 44, 54, 49, 57, 52, 59,SYM,SYM,SYM,SYM,SYM, /* 5X */
|
||||
SYM, 33, 51, 41, 43, 32, 53, 50, 48, 31, 56, 45, 42, 46, 35, 34, /* 6X */
|
||||
47, 60, 37, 36, 39, 44, 54, 49, 57, 52, 59,SYM,SYM,SYM,SYM,CTR, /* 7X */
|
||||
SYM,ILL,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM,ILL,SYM,ILL,ILL,ILL,ILL, /* 8X */
|
||||
ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM,ILL,SYM,ILL,ILL,ILL,ILL, /* 9X */
|
||||
SYM,SYM, 17,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM,SYM,SYM,SYM,SYM, /* AX */
|
||||
SYM,SYM,SYM,SYM,SYM, 61,SYM,SYM, 18, 22, 15,SYM, 16,SYM, 24, 26, /* BX */
|
||||
55, 0, 27, 19, 20, 4, 30, 9, 25, 3, 7, 14, 12, 5, 29, 1, /* CX */
|
||||
11, 6,ILL, 8, 2, 13, 28, 23, 40, 21, 38, 58, 17, 18, 22, 15, /* DX */
|
||||
62, 0, 27, 19, 20, 4, 30, 9, 25, 3, 7, 14, 12, 5, 29, 1, /* EX */
|
||||
11, 6, 10, 8, 2, 13, 28, 23, 40, 21, 38, 58, 16, 24, 26,ILL, /* FX */
|
||||
};
|
||||
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
|
||||
|
||||
static const unsigned char Iso_8859_7_CharToOrderMap[] =
|
||||
{
|
||||
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
|
||||
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
|
||||
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
|
||||
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
|
||||
SYM, 33, 51, 41, 43, 32, 53, 50, 48, 31, 56, 45, 42, 46, 35, 34, /* 4X */
|
||||
47, 60, 37, 36, 39, 44, 54, 49, 57, 52, 59,SYM,SYM,SYM,SYM,SYM, /* 5X */
|
||||
SYM, 33, 51, 41, 43, 32, 53, 50, 48, 31, 56, 45, 42, 46, 35, 34, /* 6X */
|
||||
47, 60, 37, 36, 39, 44, 54, 49, 57, 52, 59,SYM,SYM,SYM,SYM,CTR, /* 7X */
|
||||
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
|
||||
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
|
||||
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM, /* AX */
|
||||
SYM,SYM,SYM,SYM,SYM,SYM, 17,SYM, 18, 22, 15,SYM, 16,SYM, 24, 26, /* BX */
|
||||
55, 0, 27, 19, 20, 4, 30, 9, 25, 3, 7, 14, 12, 5, 29, 1, /* CX */
|
||||
11, 6,ILL, 8, 2, 13, 28, 23, 40, 21, 38, 58, 17, 18, 22, 15, /* DX */
|
||||
63, 0, 27, 19, 20, 4, 30, 9, 25, 3, 7, 14, 12, 5, 29, 1, /* EX */
|
||||
11, 6, 10, 8, 2, 13, 28, 23, 40, 21, 38, 58, 16, 24, 26,ILL, /* FX */
|
||||
};
|
||||
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
|
||||
|
||||
|
||||
/* Model Table:
|
||||
* Total sequences: 1299
|
||||
* First 512 sequences: 0.9690985257709991
|
||||
* Next 512 sequences (512-1024): 0.029851434797603802
|
||||
* Rest: 0.0010500394313971116
|
||||
* Negative sequences: TODO
|
||||
*/
|
||||
static const PRUint8 GreekLangModel[] =
|
||||
{
|
||||
1,2,3,3,3,3,3,3,3,1,3,3,3,3,3,3,2,2,2,3,
|
||||
3,0,2,3,3,3,1,3,3,3,3,0,0,0,0,0,0,0,3,0,2,
|
||||
2,2,3,3,3,3,3,3,3,2,3,3,3,3,3,3,2,2,2,3,
|
||||
3,2,2,3,3,3,2,3,3,2,2,0,0,0,0,0,0,0,2,0,2,
|
||||
3,3,2,3,3,2,3,1,3,3,2,0,3,3,2,3,3,3,3,2,
|
||||
0,3,3,1,3,1,3,2,1,0,3,0,0,0,1,0,0,0,0,0,0,
|
||||
3,3,3,1,3,3,3,3,3,2,3,3,3,1,3,1,3,3,3,3,
|
||||
3,3,2,3,0,3,3,3,3,2,3,0,0,0,0,0,0,0,0,0,2,
|
||||
2,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,2,2,1,3,
|
||||
3,3,1,3,3,3,2,3,3,3,3,0,1,0,0,0,1,0,2,0,2,
|
||||
3,3,3,3,3,3,2,2,3,3,2,1,2,2,2,3,3,3,3,3,
|
||||
3,3,3,2,2,3,3,1,1,0,2,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,3,3,3,3,1,2,3,3,2,3,3,3,3,3,
|
||||
3,3,3,3,3,3,3,3,3,2,2,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,2,3,3,2,3,2,2,2,3,3,3,3,3,3,2,
|
||||
3,3,3,2,3,2,3,2,2,0,0,0,0,0,1,0,0,0,0,0,0,
|
||||
3,3,3,3,3,2,2,3,3,3,0,3,3,3,3,3,3,3,3,1,
|
||||
2,3,3,3,3,3,3,2,3,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,1,3,0,2,3,3,3,3,1,3,3,3,0,3,0,0,0,1,3,
|
||||
2,0,0,2,1,3,0,1,3,2,0,0,0,0,0,0,0,0,0,0,2,
|
||||
0,1,1,0,0,0,0,0,1,2,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,2,3,1,1,3,1,2,1,2,3,3,3,3,3,3,
|
||||
1,3,3,2,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,0,3,3,3,2,1,2,3,2,3,3,2,2,3,3,3,3,0,
|
||||
0,3,3,0,2,0,3,3,3,0,1,0,0,0,0,0,0,0,0,0,2,
|
||||
3,2,3,2,2,3,3,3,3,2,3,3,3,0,3,2,2,3,2,3,
|
||||
3,0,2,3,0,3,1,3,2,3,3,0,0,0,0,0,0,0,1,0,3,
|
||||
3,3,3,3,3,2,0,2,2,3,2,2,3,3,3,3,3,3,3,3,
|
||||
2,3,3,2,3,2,3,3,2,1,1,0,0,0,0,0,0,0,0,0,1,
|
||||
3,3,3,0,3,3,3,3,3,2,3,3,3,0,3,0,0,0,0,3,
|
||||
3,3,0,3,0,3,0,2,2,2,3,0,0,0,0,0,0,0,0,0,2,
|
||||
2,2,3,2,3,3,3,3,3,2,3,3,3,0,3,0,0,0,0,3,
|
||||
3,1,0,3,0,2,0,2,3,2,2,0,0,0,0,0,0,0,0,0,2,
|
||||
2,2,3,3,2,3,3,3,3,2,3,3,3,1,3,0,0,0,0,3,
|
||||
3,1,0,3,0,3,0,3,3,3,3,0,0,0,0,0,0,0,1,0,2,
|
||||
3,3,3,2,2,3,3,3,3,1,3,3,3,0,3,0,0,0,0,3,
|
||||
3,3,0,3,0,2,0,2,3,2,2,0,0,0,0,0,0,0,0,0,2,
|
||||
3,3,0,3,3,3,3,3,0,3,0,0,3,2,3,3,3,3,3,3,
|
||||
3,3,3,3,2,0,2,1,0,1,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,0,3,3,1,3,1,0,3,0,0,3,3,0,3,3,3,3,0,
|
||||
1,3,3,0,3,0,2,0,1,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,1,3,2,1,3,3,2,3,0,3,3,3,0,2,1,0,2,1,3,
|
||||
2,0,2,2,0,3,0,2,2,2,2,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,3,0,1,3,3,3,3,0,3,2,3,1,3,0,0,0,0,3,
|
||||
2,0,0,2,0,3,0,1,2,2,1,0,0,0,0,0,0,0,0,0,2,
|
||||
3,3,2,3,3,3,3,0,1,3,1,0,2,3,2,3,2,3,3,0,
|
||||
0,3,3,0,2,3,3,0,1,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,3,3,2,3,3,3,2,3,2,3,3,3,0,3,0,0,0,0,3,
|
||||
2,2,0,2,0,3,0,2,2,2,2,0,0,0,0,0,0,0,0,0,2,
|
||||
3,3,0,3,3,3,3,0,0,3,0,0,3,3,2,2,3,3,3,0,
|
||||
0,2,3,0,2,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,2,3,2,0,3,3,2,3,2,3,3,3,0,2,0,0,0,0,2,
|
||||
2,2,0,2,0,3,0,2,2,2,2,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,0,3,3,1,3,1,2,3,0,0,1,2,3,3,3,3,3,2,
|
||||
2,2,2,0,2,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,1,3,1,2,3,0,1,1,3,2,3,2,3,3,2,
|
||||
0,3,3,0,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,2,3,3,0,0,0,0,3,0,1,0,2,0,2,2,3,3,0,
|
||||
0,3,2,0,3,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,0,3,3,2,1,0,0,2,0,0,2,2,1,2,2,2,2,0,
|
||||
0,3,2,1,1,0,3,2,0,0,1,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,1,2,3,2,3,3,2,0,2,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,3,3,3,0,2,0,
|
||||
0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
|
||||
1,0,0,0,0,0,0,0,0,0,0,2,2,1,2,3,2,3,0,3,0,
|
||||
0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,2,1,1,2,3,2,3,0,2,0,
|
||||
0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,2,2,1,0,2,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,2,2,1,0,2,0,
|
||||
0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,2,3,2,3,2,2,2,0,2,0,
|
||||
2,0,2,0,1,2,2,3,2,0,1,1,2,0,2,0,2,1,0,2,
|
||||
1,0,0,1,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,
|
||||
0,1,1,0,0,0,0,0,1,2,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,3,2,2,2,0,2,2,0,2,0,
|
||||
2,2,0,2,3,0,0,0,0,3,0,0,0,2,0,2,2,1,1,0,
|
||||
0,1,2,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
};
|
||||
|
||||
|
||||
|
||||
static const unsigned char win1253_CharToOrderMap[] =
|
||||
const SequenceModel Windows_1253GreekModel =
|
||||
{
|
||||
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, //00
|
||||
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, //10
|
||||
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, //20
|
||||
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, //30
|
||||
SYM, 82,100,104, 94, 98,101,116,102,111,187,117, 92, 88,113, 85, //40
|
||||
79,118,105, 83, 67,114,119, 95, 99,109,188,SYM,SYM,SYM,SYM,SYM, //50
|
||||
SYM, 72, 70, 80, 81, 60, 96, 93, 89, 68,120, 97, 77, 86, 69, 55, //60
|
||||
78,115, 65, 66, 58, 76,106,103, 87,107,112,SYM,SYM,SYM,SYM,SYM, //70
|
||||
CTR,ILL,CTR,CTR,CTR,CTR,CTR,CTR,ILL,CTR,ILL,CTR,ILL,ILL,ILL,ILL, //80
|
||||
ILL,CTR,CTR,CTR,CTR,CTR,CTR,CTR,ILL,CTR,ILL,CTR,ILL,ILL,ILL,ILL, //90
|
||||
SYM,233, 61,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 74,SYM,SYM, //a0
|
||||
SYM,SYM,SYM,SYM,247,SYM,SYM, 36, 46, 71, 73,SYM, 54,SYM,108,123, //b0
|
||||
110, 31, 51, 43, 41, 34, 91, 40, 52, 47, 44, 53, 38, 49, 59, 39, //c0
|
||||
35, 48,ILL, 37, 33, 45, 56, 50, 84, 57,120,121, 17, 18, 22, 15, //d0
|
||||
124, 1, 29, 20, 21, 3, 32, 13, 25, 5, 11, 16, 10, 6, 30, 4, //e0
|
||||
9, 8, 14, 7, 2, 12, 28, 23, 42, 24, 64, 75, 19, 26, 27,ILL, //f0
|
||||
};
|
||||
|
||||
//Model Table:
|
||||
//total sequences: 100%
|
||||
//first 512 sequences: 98.2851%
|
||||
//first 1024 sequences:1.7001%
|
||||
//rest sequences: 0.0359%
|
||||
//negative sequences: 0.0148%
|
||||
static const PRUint8 GreekLangModel[] =
|
||||
{
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,3,2,2,3,3,3,3,3,3,3,3,1,3,3,3,0,2,2,3,3,0,3,0,3,2,0,3,3,3,0,
|
||||
3,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,3,3,3,3,0,3,3,0,3,2,3,3,0,3,2,3,3,3,0,0,3,0,3,0,3,3,2,0,0,0,
|
||||
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,
|
||||
0,2,3,2,2,3,3,3,3,3,3,3,3,0,3,3,3,3,0,2,3,3,0,3,3,3,3,2,3,3,3,0,
|
||||
2,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,2,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,0,2,1,3,3,3,3,2,3,3,2,3,3,2,0,
|
||||
0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,3,3,3,0,3,3,3,3,3,3,0,3,3,0,3,3,3,3,3,3,3,3,3,3,0,3,2,3,3,0,
|
||||
2,0,1,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
|
||||
0,3,3,3,3,3,2,3,0,0,0,0,3,3,0,3,1,3,3,3,0,3,3,0,3,3,3,3,0,0,0,0,
|
||||
2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,3,3,3,3,0,3,0,3,3,3,3,3,0,3,2,2,2,3,0,2,3,3,3,3,3,2,3,3,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,3,3,3,3,3,2,2,2,3,3,3,3,0,3,1,3,3,3,3,2,3,3,3,3,3,3,3,2,2,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,3,3,3,3,2,0,3,0,0,0,3,3,2,3,3,3,3,3,0,0,3,2,3,0,2,3,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,0,3,3,3,3,0,0,3,3,0,2,3,0,3,0,3,3,3,0,0,3,0,3,0,2,2,3,3,0,0,
|
||||
0,0,1,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,3,3,3,3,2,0,3,2,3,3,3,3,0,3,3,3,3,3,0,3,3,2,3,2,3,3,2,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,3,2,3,2,3,3,3,3,3,3,0,2,3,2,3,2,2,2,3,2,3,3,2,3,0,2,2,2,3,0,
|
||||
2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,3,0,0,0,3,3,3,2,3,3,0,0,3,0,3,0,0,0,3,2,0,3,0,3,0,0,2,0,2,0,
|
||||
0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,3,3,3,0,3,3,3,3,3,3,0,3,3,0,3,0,0,0,3,3,0,3,3,3,0,0,1,2,3,0,
|
||||
3,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,3,3,3,3,2,0,0,3,2,2,3,3,0,3,3,3,3,3,2,1,3,0,3,2,3,3,2,1,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,3,3,0,2,3,3,3,3,3,3,0,0,3,0,3,0,0,0,3,3,0,3,2,3,0,0,3,3,3,0,
|
||||
3,0,0,0,2,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,3,3,3,0,3,3,3,3,3,3,0,0,3,0,3,0,0,0,3,2,0,3,2,3,0,0,3,2,3,0,
|
||||
2,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,3,1,2,2,3,3,3,3,3,3,0,2,3,0,3,0,0,0,3,3,0,3,0,2,0,0,2,3,1,0,
|
||||
2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,0,3,3,3,3,0,3,0,3,3,2,3,0,3,3,3,3,3,3,0,3,3,3,0,2,3,0,0,3,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,0,3,3,3,0,0,3,0,0,0,3,3,0,3,0,2,3,3,0,0,3,0,3,0,3,3,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,3,0,0,0,3,3,3,3,3,3,0,0,3,0,2,0,0,0,3,3,0,3,0,3,0,0,2,0,2,0,
|
||||
0,0,0,0,1,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,3,3,3,3,3,0,3,0,2,0,3,2,0,3,2,3,2,3,0,0,3,2,3,2,3,3,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,3,0,0,2,3,3,3,3,3,0,0,0,3,0,2,1,0,0,3,2,2,2,0,3,0,0,2,2,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,0,3,3,3,2,0,3,0,3,0,3,3,0,2,1,2,3,3,0,0,3,0,3,0,3,3,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,2,3,3,3,0,3,3,3,3,3,3,0,2,3,0,3,0,0,0,2,1,0,2,2,3,0,0,2,2,2,0,
|
||||
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,3,0,0,2,3,3,3,2,3,0,0,1,3,0,2,0,0,0,0,3,0,1,0,2,0,0,1,1,1,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,3,3,3,3,1,0,3,0,0,0,3,2,0,3,2,3,3,3,0,0,3,0,3,2,2,2,1,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,0,3,3,3,0,0,3,0,0,0,0,2,0,2,3,3,2,2,2,2,3,0,2,0,2,2,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,3,3,3,2,0,0,0,0,0,0,2,3,0,2,0,2,3,2,0,0,3,0,3,0,3,1,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,3,2,3,3,2,2,3,0,2,0,3,0,0,0,2,0,0,0,0,1,2,0,2,0,2,0,
|
||||
0,2,0,2,0,2,2,0,0,1,0,2,2,2,0,2,2,2,0,2,2,2,0,0,2,0,0,1,0,0,0,0,
|
||||
0,2,0,3,3,2,0,0,0,0,0,0,1,3,0,2,0,2,2,2,0,0,2,0,3,0,0,2,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,0,2,3,2,0,2,2,0,2,0,2,2,0,2,0,2,2,2,0,0,0,0,0,0,2,3,0,0,0,2,
|
||||
0,1,2,0,0,0,0,2,2,0,0,0,2,1,0,2,2,0,0,0,0,0,0,1,0,2,0,0,0,0,0,0,
|
||||
0,0,2,1,0,2,3,2,2,3,2,3,2,0,0,3,3,3,0,0,3,2,0,0,0,1,1,0,2,0,2,2,
|
||||
0,2,0,2,0,2,2,0,0,2,0,2,2,2,0,2,2,2,2,0,0,2,0,0,0,2,0,1,0,0,0,0,
|
||||
0,3,0,3,3,2,2,0,3,0,0,0,2,2,0,2,2,2,1,2,0,0,1,2,2,0,0,3,0,0,0,2,
|
||||
0,1,2,0,0,0,1,2,0,0,0,0,0,0,0,2,2,0,1,0,0,2,0,0,0,2,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,2,3,3,2,2,0,0,0,2,0,2,3,3,0,2,0,0,0,0,0,0,2,2,2,0,2,2,0,2,0,2,
|
||||
0,2,2,0,0,2,2,2,2,1,0,0,2,2,0,2,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,
|
||||
0,2,0,3,2,3,0,0,0,3,0,0,2,2,0,2,0,2,2,2,0,0,2,0,0,0,0,0,0,0,0,2,
|
||||
0,0,2,2,0,0,2,2,2,0,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,2,0,0,3,2,0,2,2,2,2,2,0,0,0,2,0,0,0,0,2,0,1,0,0,2,0,1,0,0,0,
|
||||
0,2,2,2,0,2,2,0,1,2,0,2,2,2,0,2,2,2,2,1,2,2,0,0,2,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
|
||||
0,2,0,2,0,2,2,0,0,0,0,1,2,1,0,0,2,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,3,2,3,0,0,2,0,0,0,2,2,0,2,0,0,0,1,0,0,2,0,2,0,2,2,0,0,0,0,
|
||||
0,0,2,0,0,0,0,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,
|
||||
0,2,2,3,2,2,0,0,0,0,0,0,1,3,0,2,0,2,2,0,0,0,1,0,2,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,2,0,2,0,3,2,0,2,0,0,0,0,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
|
||||
0,0,2,0,0,0,0,1,1,0,0,2,1,2,0,2,2,0,1,0,0,1,0,0,0,2,0,0,0,0,0,0,
|
||||
0,3,0,2,2,2,0,0,2,0,0,0,2,0,0,0,2,3,0,2,0,0,0,0,0,0,2,2,0,0,0,2,
|
||||
0,1,2,0,0,0,1,2,2,1,0,0,0,2,0,0,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,2,1,2,0,2,2,0,2,0,0,2,0,0,0,0,1,2,1,0,2,1,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,2,0,0,0,3,1,2,2,0,2,0,0,0,0,2,0,0,0,2,0,0,3,0,0,0,0,2,2,2,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,2,1,0,2,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,2,
|
||||
0,2,2,0,0,2,2,2,2,2,0,1,2,0,0,0,2,2,0,1,0,2,0,0,2,2,0,0,0,0,0,0,
|
||||
0,0,0,0,1,0,0,0,0,0,0,0,3,0,0,2,0,0,0,0,0,0,0,0,2,0,2,0,0,0,0,2,
|
||||
0,1,2,0,0,0,0,2,2,1,0,1,0,1,0,2,2,2,1,0,0,0,0,0,0,1,0,0,0,0,0,0,
|
||||
0,2,0,1,2,0,0,0,0,0,0,0,0,0,0,2,0,0,2,2,0,0,0,0,1,0,0,0,0,0,0,2,
|
||||
0,2,2,0,0,0,0,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,2,0,0,0,
|
||||
0,2,2,2,2,0,0,0,3,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,1,
|
||||
0,0,2,0,0,0,0,1,2,0,0,0,0,0,0,2,2,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,
|
||||
0,2,0,2,2,2,0,0,2,0,0,0,0,0,0,0,2,2,2,0,0,0,2,0,0,0,0,0,0,0,0,2,
|
||||
0,0,1,0,0,0,0,2,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,0,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,2,
|
||||
0,0,2,0,0,0,0,2,2,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,2,0,2,2,1,0,0,0,0,0,0,2,0,0,2,0,2,2,2,0,0,0,0,0,0,2,0,0,0,0,2,
|
||||
0,0,2,0,0,2,0,2,2,0,0,0,0,2,0,2,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0,
|
||||
0,0,3,0,0,0,2,2,0,2,2,0,0,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,2,0,0,0,0,0,
|
||||
0,2,2,2,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,
|
||||
0,0,0,0,0,0,0,2,1,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,2,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
|
||||
0,2,0,0,0,2,0,0,0,0,0,1,0,0,0,0,2,2,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,2,0,0,0,
|
||||
0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,2,0,2,0,0,0,
|
||||
0,0,0,0,0,0,0,0,2,1,0,0,0,0,0,0,2,0,0,0,1,2,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
};
|
||||
|
||||
const SequenceModel Latin7GreekModel =
|
||||
{
|
||||
Latin7_CharToOrderMap,
|
||||
Windows_1253_CharToOrderMap,
|
||||
GreekLangModel,
|
||||
64,
|
||||
(float)0.982851,
|
||||
PR_FALSE,
|
||||
"ISO-8859-7"
|
||||
};
|
||||
|
||||
const SequenceModel Win1253GreekModel =
|
||||
{
|
||||
win1253_CharToOrderMap,
|
||||
GreekLangModel,
|
||||
64,
|
||||
(float)0.982851,
|
||||
41,
|
||||
(float)0.9690985257709991,
|
||||
PR_FALSE,
|
||||
"WINDOWS-1253"
|
||||
};
|
||||
|
||||
const SequenceModel Iso_8859_7GreekModel =
|
||||
{
|
||||
Iso_8859_7_CharToOrderMap,
|
||||
GreekLangModel,
|
||||
41,
|
||||
(float)0.9690985257709991,
|
||||
PR_FALSE,
|
||||
"ISO-8859-7"
|
||||
};
|
||||
@ -53,8 +53,8 @@ nsSBCSGroupProber::nsSBCSGroupProber()
|
||||
mProbers[4] = new nsSingleByteCharSetProber(&Ibm866RussianModel);
|
||||
mProbers[5] = new nsSingleByteCharSetProber(&Ibm855RussianModel);
|
||||
|
||||
mProbers[6] = new nsSingleByteCharSetProber(&Latin7GreekModel);
|
||||
mProbers[7] = new nsSingleByteCharSetProber(&Win1253GreekModel);
|
||||
mProbers[6] = new nsSingleByteCharSetProber(&Iso_8859_7GreekModel);
|
||||
mProbers[7] = new nsSingleByteCharSetProber(&Windows_1253GreekModel);
|
||||
|
||||
mProbers[8] = new nsSingleByteCharSetProber(&Latin5BulgarianModel);
|
||||
mProbers[9] = new nsSingleByteCharSetProber(&Win1251BulgarianModel);
|
||||
|
||||
@ -130,8 +130,8 @@ extern const SequenceModel MacCyrillicRussianModel;
|
||||
extern const SequenceModel Ibm866RussianModel;
|
||||
extern const SequenceModel Ibm855RussianModel;
|
||||
|
||||
extern const SequenceModel Latin7GreekModel;
|
||||
extern const SequenceModel Win1253GreekModel;
|
||||
extern const SequenceModel Iso_8859_7GreekModel;
|
||||
extern const SequenceModel Windows_1253GreekModel;
|
||||
|
||||
extern const SequenceModel Latin5BulgarianModel;
|
||||
extern const SequenceModel Win1251BulgarianModel;
|
||||
|
||||
@ -34,8 +34,7 @@ foreach(dir ${dirs})
|
||||
# enough). We will have to take a closer look and fix these, but
|
||||
# there is no need to break the whole `make test` right now,
|
||||
# which may make actual regressions harder to notice.
|
||||
if ("${lang}:${charset}" STREQUAL "el:windows-1253" OR
|
||||
"${lang}:${charset}" STREQUAL "ja:utf-16le" OR
|
||||
if ("${lang}:${charset}" STREQUAL "ja:utf-16le" OR
|
||||
"${lang}:${charset}" STREQUAL "ja:utf-16be" OR
|
||||
"${lang}:${charset}" STREQUAL "es:iso-8859-15" OR
|
||||
"${lang}:${charset}" STREQUAL "he:iso-8859-8")
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user