mirror of
https://gitlab.freedesktop.org/uchardet/uchardet.git
synced 2025-12-08 01:36:41 +08:00
LangModels: adding Spanish support.
With 3 charsets: ISO-8859-1, ISO-8859-15 and Windows-1252.
This commit is contained in:
parent
055332ac7d
commit
ffabb65712
@ -57,6 +57,10 @@ Techniques used by universalchardet are described at http://www.mozilla.org/proj
|
||||
* MAC-CYRILLIC
|
||||
* IBM866
|
||||
* IBM855
|
||||
* Spanish
|
||||
* ISO-8859-1
|
||||
* ISO-8859-15
|
||||
* WINDOWS-1252
|
||||
* Thai
|
||||
* TIS-620
|
||||
* ISO-8859-11
|
||||
|
||||
109
script/BuildLangModelLogs/LangSpanishModel.log
Normal file
109
script/BuildLangModelLogs/LangSpanishModel.log
Normal file
@ -0,0 +1,109 @@
|
||||
= Logs of language model for Spanish (es) =
|
||||
|
||||
- Generated by BuildLangModel.py
|
||||
- Started: 2015-12-12 18:37:37.085123
|
||||
- Maximum depth: 2
|
||||
- Max number of pages: 50
|
||||
|
||||
== Parsed pages ==
|
||||
|
||||
Wikipedia:Portada (revision 84894710)
|
||||
11 de diciembre (revision 87735970)
|
||||
12 de diciembre (revision 87742023)
|
||||
13 de diciembre (revision 87697780)
|
||||
1474 (revision 66715698)
|
||||
1915 (revision 86935345)
|
||||
2000 (revision 87686385)
|
||||
2015 (revision 87743360)
|
||||
Actuación (revision 87459085)
|
||||
Akiyuki Nosaka (revision 87726149)
|
||||
Alberto Podestá (revision 87729965)
|
||||
Alejandro Magno (revision 87717064)
|
||||
Argentina (revision 87742018)
|
||||
Arnold Peralta (revision 87733100)
|
||||
Atentados del 11 de diciembre de 2007 (revision 87720544)
|
||||
Cantante (revision 86761085)
|
||||
Canto (revision 87664585)
|
||||
Carlo Furno (revision 87726011)
|
||||
Ciencia ficción (revision 87662615)
|
||||
Copa Mundial de Clubes de la FIFA 2015 (revision 87734956)
|
||||
Corona de Castilla (revision 87209578)
|
||||
Crisis migratoria en Europa (revision 87609406)
|
||||
Dictadura de Primo de Rivera (revision 87371131)
|
||||
Dionisio Miguel Recio (revision 87724426)
|
||||
Disneyland (revision 87665192)
|
||||
Dolph Schayes (revision 87730770)
|
||||
Día Internacional de las Montañas (revision 87739490)
|
||||
El discurso del rey (revision 87570241)
|
||||
Elecciones regionales de Francia de 2015 (revision 87744011)
|
||||
Estados Unidos (revision 87510736)
|
||||
Fiction House (revision 87732511)
|
||||
Filoxeno de Eretria (revision 83958621)
|
||||
Frank Sinatra (revision 87742871)
|
||||
Fundación Wikimedia (revision 87703852)
|
||||
Geoffrey Marcy (revision 87706505)
|
||||
Gheorghe Gruia (revision 87737327)
|
||||
Grupo de Acción Republicana (revision 87739104)
|
||||
Guerra contra el Estado Islámico (revision 87648946)
|
||||
Here We Go Again (canción) (revision 87680365)
|
||||
Isaac Asimov (revision 87591711)
|
||||
Isabel I de Castilla (revision 87743713)
|
||||
John "Hot Rod" Williams (revision 87730438)
|
||||
José Subirà-Puig (revision 87740413)
|
||||
Julio Terrazas Sandoval (revision 87736542)
|
||||
Libertad Lamarque (revision 87508996)
|
||||
Mosaico de Issos (revision 87731652)
|
||||
Museo Arqueológico Nacional de Nápoles (revision 87302262)
|
||||
Philip K. Dick (revision 87725371)
|
||||
Planet Comics (revision 86698920)
|
||||
|
||||
== End of Parsed pages ==
|
||||
|
||||
- Wikipedia parsing ended at: 2015-12-12 18:39:02.288858
|
||||
|
||||
52 characters appeared 991829 times.
|
||||
|
||||
First 33 characters:
|
||||
[ 0] Char e: 12.571925200815867 %
|
||||
[ 1] Char a: 11.81988024145291 %
|
||||
[ 2] Char o: 8.07941691561751 %
|
||||
[ 3] Char n: 7.234513207417812 %
|
||||
[ 4] Char s: 7.042242160695039 %
|
||||
[ 5] Char i: 7.040528155559072 %
|
||||
[ 6] Char r: 6.8208330266608455 %
|
||||
[ 7] Char l: 5.722559029832763 %
|
||||
[ 8] Char d: 5.275707808503281 %
|
||||
[ 9] Char t: 4.668647518876742 %
|
||||
[10] Char c: 4.466999855821921 %
|
||||
[11] Char u: 3.673717949364255 %
|
||||
[12] Char m: 2.710547886782903 %
|
||||
[13] Char p: 2.4541528832086983 %
|
||||
[14] Char b: 1.3867309788280036 %
|
||||
[15] Char g: 1.2748165258325779 %
|
||||
[16] Char f: 0.925058654263991 %
|
||||
[17] Char y: 0.9045914164639268 %
|
||||
[18] Char v: 0.8877538365988492 %
|
||||
[19] Char ó: 0.8641610600214351 %
|
||||
[20] Char h: 0.7369213846338432 %
|
||||
[21] Char q: 0.5913317719082624 %
|
||||
[22] Char í: 0.5612862701130941 %
|
||||
[23] Char j: 0.43283670874717317 %
|
||||
[24] Char z: 0.38071078784750195 %
|
||||
[25] Char á: 0.37587124393418625 %
|
||||
[26] Char é: 0.29632124085905936 %
|
||||
[27] Char k: 0.2001353055819098 %
|
||||
[28] Char x: 0.18743150280945606 %
|
||||
[29] Char ñ: 0.17462687620547493 %
|
||||
[30] Char ú: 0.12865120902897575 %
|
||||
[31] Char w: 0.0972949974239511 %
|
||||
[32] Char ü: 0.004436248587206061 %
|
||||
|
||||
The first 33 characters have an accumulated ratio of 0.9999263986029848.
|
||||
|
||||
897 sequences found.
|
||||
|
||||
First 512 (typical positive ratio): 0.9970385677528184
|
||||
Next 512 (512-1024): 1.0082383152741046e-06
|
||||
Rest: 4.597017211338539e-17
|
||||
|
||||
- Processing end: 2015-12-12 18:39:02.460105
|
||||
77
script/langs/es.py
Normal file
77
script/langs/es.py
Normal file
@ -0,0 +1,77 @@
|
||||
#!/bin/python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# ##### BEGIN LICENSE BLOCK #####
|
||||
# Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
#
|
||||
# The contents of this file are subject to the Mozilla Public License Version
|
||||
# 1.1 (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
# http://www.mozilla.org/MPL/
|
||||
#
|
||||
# Software distributed under the License is distributed on an "AS IS" basis,
|
||||
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
# for the specific language governing rights and limitations under the
|
||||
# License.
|
||||
#
|
||||
# The Original Code is Mozilla Universal charset detector code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 2001
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Jehan <jehan@girinstud.io>
|
||||
#
|
||||
# Alternatively, the contents of this file may be used under the terms of
|
||||
# either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
# the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
# in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
# of those above. If you wish to allow use of your version of this file only
|
||||
# under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
# use your version of this file under the terms of the MPL, indicate your
|
||||
# decision by deleting the provisions above and replace them with the notice
|
||||
# and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
# the provisions above, a recipient may use your version of this file under
|
||||
# the terms of any one of the MPL, the GPL or the LGPL.
|
||||
#
|
||||
# ##### END LICENSE BLOCK #####
|
||||
|
||||
import re
|
||||
|
||||
## Mandatory Properties ##
|
||||
|
||||
# The human name for the language, in English.
|
||||
name = 'Spanish'
|
||||
# Use 2-letter ISO 639-1 if possible, 3-letter ISO code otherwise,
|
||||
# or use another catalog as a last resort.
|
||||
code = 'es'
|
||||
# ASCII characters are also used in French.
|
||||
use_ascii = True
|
||||
# The charsets we want to support and create data for.
|
||||
charsets = ['ISO-8859-15', 'ISO-8859-1', 'WINDOWS-1252']
|
||||
|
||||
## Optional Properties ##
|
||||
|
||||
# Alphabet characters.
|
||||
# If use_ascii=True, there is no need to add any ASCII characters.
|
||||
# If case_mapping=True, there is no need to add several cases of a same
|
||||
# character (provided Python algorithms know the right cases).
|
||||
alphabet = 'ñáéíóúü'
|
||||
# The start page. Though optional, it is advised to choose one yourself.
|
||||
start_pages = ['Wikipedia:Portada']
|
||||
# give possibility to select another code for the Wikipedia URL.
|
||||
wikipedia_code = code
|
||||
# 'a' and 'A' will be considered the same character, and so on.
|
||||
# This uses Python algorithm to determine upper/lower-case of a given
|
||||
# character.
|
||||
case_mapping = True
|
||||
|
||||
# A function to clean content returned by the `wikipedia` python lib,
|
||||
# in case some unwanted data has been overlooked.
|
||||
def clean_wikipedia_content(content):
|
||||
cleaned = re.sub(r'(=+) *([^=]+) *Editar \1',
|
||||
r'\2',
|
||||
content)
|
||||
return cleaned
|
||||
@ -16,6 +16,7 @@ set(
|
||||
LangModels/LangGreekModel.cpp
|
||||
LangModels/LangHungarianModel.cpp
|
||||
LangModels/LangHebrewModel.cpp
|
||||
LangModels/LangSpanishModel.cpp
|
||||
LangModels/LangThaiModel.cpp
|
||||
LangModels/LangTurkishModel.cpp
|
||||
nsHebrewProber.cpp
|
||||
|
||||
201
src/LangModels/LangSpanishModel.cpp
Normal file
201
src/LangModels/LangSpanishModel.cpp
Normal file
@ -0,0 +1,201 @@
|
||||
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* The Original Code is Mozilla Communicator client code.
|
||||
*
|
||||
* The Initial Developer of the Original Code is
|
||||
* Netscape Communications Corporation.
|
||||
* Portions created by the Initial Developer are Copyright (C) 1998
|
||||
* the Initial Developer. All Rights Reserved.
|
||||
*
|
||||
* Contributor(s):
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
#include "../nsSBCharSetProber.h"
|
||||
|
||||
/********* Language model for: Spanish *********/
|
||||
|
||||
/**
|
||||
* Generated by BuildLangModel.py
|
||||
* On: 2015-12-12 18:39:02.290370
|
||||
**/
|
||||
|
||||
/* Character Mapping Table:
|
||||
* ILL: illegal character.
|
||||
* CTR: control character specific to the charset.
|
||||
* RET: carriage/return.
|
||||
* SYM: symbol (punctuation) that does not belong to word.
|
||||
* NUM: 0 - 9.
|
||||
*
|
||||
* Other characters are ordered by probabilities
|
||||
* (0 is the most common character in the language).
|
||||
*
|
||||
* Orders are generic to a language. So the codepoint with order X in
|
||||
* CHARSET1 maps to the same character as the codepoint with the same
|
||||
* order X in CHARSET2 for the same language.
|
||||
* As such, it is possible to get missing order. For instance the
|
||||
* ligature of 'o' and 'e' exists in ISO-8859-15 but not in ISO-8859-1
|
||||
* even though they are both used for French. Same for the euro sign.
|
||||
*/
|
||||
static const unsigned char Iso_8859_1_CharToOrderMap[] =
|
||||
{
|
||||
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
|
||||
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
|
||||
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
|
||||
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
|
||||
SYM, 1, 14, 10, 8, 0, 16, 15, 20, 5, 23, 27, 7, 12, 3, 2, /* 4X */
|
||||
13, 21, 6, 4, 9, 11, 18, 31, 28, 17, 24,SYM,SYM,SYM,SYM,SYM, /* 5X */
|
||||
SYM, 1, 14, 10, 8, 0, 16, 15, 20, 5, 23, 27, 7, 12, 3, 2, /* 6X */
|
||||
13, 21, 6, 4, 9, 11, 18, 31, 28, 17, 24,SYM,SYM,SYM,SYM,CTR, /* 7X */
|
||||
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
|
||||
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
|
||||
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
|
||||
SYM,SYM,SYM,SYM,SYM, 52,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
|
||||
33, 25, 39, 46, 37, 45, 47, 35, 36, 26, 48, 40, 53, 22, 41, 43, /* CX */
|
||||
49, 29, 38, 19, 50, 54, 34,SYM, 44, 51, 30, 55, 32, 42, 56, 57, /* DX */
|
||||
33, 25, 39, 46, 37, 45, 47, 35, 36, 26, 48, 40, 58, 22, 41, 43, /* EX */
|
||||
49, 29, 38, 19, 50, 59, 34,SYM, 44, 51, 30, 60, 32, 42, 61, 62, /* FX */
|
||||
};
|
||||
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
|
||||
|
||||
static const unsigned char Iso_8859_15_CharToOrderMap[] =
|
||||
{
|
||||
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
|
||||
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
|
||||
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
|
||||
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
|
||||
SYM, 1, 14, 10, 8, 0, 16, 15, 20, 5, 23, 27, 7, 12, 3, 2, /* 4X */
|
||||
13, 21, 6, 4, 9, 11, 18, 31, 28, 17, 24,SYM,SYM,SYM,SYM,SYM, /* 5X */
|
||||
SYM, 1, 14, 10, 8, 0, 16, 15, 20, 5, 23, 27, 7, 12, 3, 2, /* 6X */
|
||||
13, 21, 6, 4, 9, 11, 18, 31, 28, 17, 24,SYM,SYM,SYM,SYM,CTR, /* 7X */
|
||||
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
|
||||
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
|
||||
SYM,SYM,SYM,SYM,SYM,SYM, 63,SYM, 64,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
|
||||
SYM,SYM,SYM,SYM, 65, 66,SYM,SYM, 67,SYM,SYM,SYM, 68, 69, 70,SYM, /* BX */
|
||||
33, 25, 39, 46, 37, 45, 47, 35, 36, 26, 48, 40, 71, 22, 41, 43, /* CX */
|
||||
49, 29, 38, 19, 50, 72, 34,SYM, 44, 51, 30, 73, 32, 42, 74, 75, /* DX */
|
||||
33, 25, 39, 46, 37, 45, 47, 35, 36, 26, 48, 40, 76, 22, 41, 43, /* EX */
|
||||
49, 29, 38, 19, 50, 77, 34,SYM, 44, 51, 30, 78, 32, 42, 79, 80, /* FX */
|
||||
};
|
||||
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
|
||||
|
||||
static const unsigned char Windows_1252_CharToOrderMap[] =
|
||||
{
|
||||
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
|
||||
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
|
||||
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
|
||||
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
|
||||
SYM, 1, 14, 10, 8, 0, 16, 15, 20, 5, 23, 27, 7, 12, 3, 2, /* 4X */
|
||||
13, 21, 6, 4, 9, 11, 18, 31, 28, 17, 24,SYM,SYM,SYM,SYM,SYM, /* 5X */
|
||||
SYM, 1, 14, 10, 8, 0, 16, 15, 20, 5, 23, 27, 7, 12, 3, 2, /* 6X */
|
||||
13, 21, 6, 4, 9, 11, 18, 31, 28, 17, 24,SYM,SYM,SYM,SYM,CTR, /* 7X */
|
||||
SYM,ILL,SYM, 81,SYM,SYM,SYM,SYM,SYM,SYM, 82,SYM, 83,ILL, 84,ILL, /* 8X */
|
||||
ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 85,SYM, 86,ILL, 87, 88, /* 9X */
|
||||
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
|
||||
SYM,SYM,SYM,SYM,SYM, 89,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
|
||||
33, 25, 39, 46, 37, 45, 47, 35, 36, 26, 48, 40, 90, 22, 41, 43, /* CX */
|
||||
49, 29, 38, 19, 50, 91, 34,SYM, 44, 51, 30, 92, 32, 42, 93, 94, /* DX */
|
||||
33, 25, 39, 46, 37, 45, 47, 35, 36, 26, 48, 40, 95, 22, 41, 43, /* EX */
|
||||
49, 29, 38, 19, 50, 96, 34,SYM, 44, 51, 30, 97, 32, 42, 98, 99, /* FX */
|
||||
};
|
||||
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
|
||||
|
||||
|
||||
/* Model Table:
|
||||
* Total sequences: 897
|
||||
* First 512 sequences: 0.9970385677528184
|
||||
* Next 512 sequences (512-1024): 0.0029614322471815486
|
||||
* Rest: 4.597017211338539e-17
|
||||
* Negative sequences: TODO
|
||||
*/
|
||||
static const PRUint8 SpanishLangModel[] =
|
||||
{
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,3,3,3,2,3,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,2,3,3,3,3,3,3,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,2,3,3,2,2,3,3,2,2,3,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,2,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,0,2,3,3,3,0,0,2,2,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,3,3,3,2,0,3,2,2,
|
||||
3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,2,3,3,2,2,0,2,2,0,
|
||||
3,3,3,2,3,3,3,3,2,2,2,3,3,2,2,3,2,3,3,3,3,2,3,2,2,3,3,2,0,0,2,2,2,
|
||||
3,3,3,3,3,3,3,3,2,3,3,3,2,2,3,2,2,3,2,3,3,0,3,2,2,3,3,0,0,0,2,2,2,
|
||||
3,3,3,3,3,3,3,3,2,3,3,3,2,2,2,2,2,3,0,3,3,2,3,0,2,3,3,3,0,0,2,0,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,2,3,0,2,0,
|
||||
3,3,3,3,3,3,2,2,2,2,2,3,3,3,3,2,2,3,0,3,2,0,3,2,0,3,3,2,2,0,3,2,2,
|
||||
3,3,3,2,3,3,3,3,2,3,3,3,2,3,3,0,2,2,2,3,3,0,3,2,0,3,3,2,0,0,3,0,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,2,2,3,2,3,2,2,3,3,0,3,2,2,0,0,2,2,0,
|
||||
3,3,3,3,3,3,3,3,3,3,0,3,3,0,2,2,2,2,2,3,3,0,3,2,2,2,3,2,0,0,3,2,3,
|
||||
3,3,3,2,2,3,3,3,2,3,2,3,2,2,2,2,3,2,0,3,0,0,3,2,0,2,2,2,0,0,3,2,0,
|
||||
3,3,3,3,3,3,3,3,2,2,2,3,2,2,2,2,2,2,0,3,2,0,0,2,2,2,2,2,0,0,2,2,0,
|
||||
3,3,3,2,2,3,2,2,2,0,2,3,0,2,0,2,2,2,2,3,0,0,3,0,0,2,3,2,0,0,0,0,0,
|
||||
0,0,0,3,3,0,3,3,3,3,3,0,3,3,2,3,2,0,3,0,0,0,0,0,0,0,0,0,2,0,0,0,0,
|
||||
3,3,3,3,2,3,3,3,3,3,2,3,3,0,2,0,2,3,2,2,2,0,3,2,2,2,3,0,2,0,2,2,2,
|
||||
2,3,2,0,2,2,0,2,2,2,0,3,0,0,0,0,0,0,0,0,0,2,2,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,0,2,2,3,3,3,2,3,2,3,3,3,0,2,0,0,2,0,2,2,0,0,0,0,0,0,0,0,
|
||||
3,3,3,2,0,3,2,2,2,2,0,3,2,2,0,0,0,0,0,3,0,0,2,2,0,2,3,0,0,0,2,0,2,
|
||||
3,3,3,2,0,3,2,0,2,2,2,3,2,2,2,3,0,2,0,3,2,3,2,0,3,3,2,2,0,0,2,0,0,
|
||||
2,0,0,3,3,2,3,3,2,3,3,2,3,3,2,3,3,2,2,0,2,2,0,2,2,0,0,0,2,2,0,0,0,
|
||||
2,3,2,3,3,2,3,3,3,3,3,2,2,3,2,3,2,2,2,0,0,0,0,2,0,0,0,0,3,0,0,0,0,
|
||||
3,3,3,2,3,3,3,3,2,2,2,3,3,0,2,2,2,3,2,0,2,0,2,0,0,0,0,2,0,0,2,2,0,
|
||||
3,3,3,2,2,3,2,2,2,3,3,3,2,3,2,0,2,2,3,2,2,2,0,2,0,2,2,2,3,0,0,2,0,
|
||||
3,3,3,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0,3,0,0,2,0,0,0,0,0,0,0,
|
||||
2,3,2,3,3,0,2,3,2,3,2,0,3,2,3,0,2,0,0,0,0,0,0,2,2,0,0,0,0,2,0,0,0,
|
||||
3,3,3,3,2,3,2,2,2,2,2,2,0,0,2,0,2,2,0,0,2,0,0,2,0,2,0,2,0,0,0,2,0,
|
||||
3,0,0,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0,
|
||||
};
|
||||
|
||||
|
||||
const SequenceModel Iso_8859_1SpanishModel =
|
||||
{
|
||||
Iso_8859_1_CharToOrderMap,
|
||||
SpanishLangModel,
|
||||
33,
|
||||
(float)0.9970385677528184,
|
||||
PR_TRUE,
|
||||
"ISO-8859-1"
|
||||
};
|
||||
|
||||
const SequenceModel Iso_8859_15SpanishModel =
|
||||
{
|
||||
Iso_8859_15_CharToOrderMap,
|
||||
SpanishLangModel,
|
||||
33,
|
||||
(float)0.9970385677528184,
|
||||
PR_TRUE,
|
||||
"ISO-8859-15"
|
||||
};
|
||||
|
||||
const SequenceModel Windows_1252SpanishModel =
|
||||
{
|
||||
Windows_1252_CharToOrderMap,
|
||||
SpanishLangModel,
|
||||
33,
|
||||
(float)0.9970385677528184,
|
||||
PR_TRUE,
|
||||
"WINDOWS-1252"
|
||||
};
|
||||
@ -86,16 +86,20 @@ nsSBCSGroupProber::nsSBCSGroupProber()
|
||||
mProbers[16] = new nsSingleByteCharSetProber(&Iso_8859_15FrenchModel);
|
||||
mProbers[17] = new nsSingleByteCharSetProber(&Windows_1252FrenchModel);
|
||||
|
||||
mProbers[18] = new nsSingleByteCharSetProber(&Iso_8859_2HungarianModel);
|
||||
mProbers[19] = new nsSingleByteCharSetProber(&Windows_1250HungarianModel);
|
||||
mProbers[18] = new nsSingleByteCharSetProber(&Iso_8859_1SpanishModel);
|
||||
mProbers[19] = new nsSingleByteCharSetProber(&Iso_8859_15SpanishModel);
|
||||
mProbers[20] = new nsSingleByteCharSetProber(&Windows_1252SpanishModel);
|
||||
|
||||
mProbers[20] = new nsSingleByteCharSetProber(&Iso_8859_1GermanModel);
|
||||
mProbers[21] = new nsSingleByteCharSetProber(&Windows_1252GermanModel);
|
||||
mProbers[21] = new nsSingleByteCharSetProber(&Iso_8859_2HungarianModel);
|
||||
mProbers[22] = new nsSingleByteCharSetProber(&Windows_1250HungarianModel);
|
||||
|
||||
mProbers[22] = new nsSingleByteCharSetProber(&Iso_8859_3EsperantoModel);
|
||||
mProbers[23] = new nsSingleByteCharSetProber(&Iso_8859_1GermanModel);
|
||||
mProbers[24] = new nsSingleByteCharSetProber(&Windows_1252GermanModel);
|
||||
|
||||
mProbers[23] = new nsSingleByteCharSetProber(&Iso_8859_3TurkishModel);
|
||||
mProbers[24] = new nsSingleByteCharSetProber(&Iso_8859_9TurkishModel);
|
||||
mProbers[25] = new nsSingleByteCharSetProber(&Iso_8859_3EsperantoModel);
|
||||
|
||||
mProbers[26] = new nsSingleByteCharSetProber(&Iso_8859_3TurkishModel);
|
||||
mProbers[27] = new nsSingleByteCharSetProber(&Iso_8859_9TurkishModel);
|
||||
|
||||
Reset();
|
||||
}
|
||||
|
||||
@ -40,7 +40,7 @@
|
||||
#define nsSBCSGroupProber_h__
|
||||
|
||||
|
||||
#define NUM_OF_SBCS_PROBERS 25
|
||||
#define NUM_OF_SBCS_PROBERS 28
|
||||
|
||||
class nsCharSetProber;
|
||||
class nsSBCSGroupProber: public nsCharSetProber {
|
||||
|
||||
@ -148,6 +148,10 @@ extern const SequenceModel Iso_8859_15FrenchModel;
|
||||
extern const SequenceModel Iso_8859_1FrenchModel;
|
||||
extern const SequenceModel Windows_1252FrenchModel;
|
||||
|
||||
extern const SequenceModel Iso_8859_15SpanishModel;
|
||||
extern const SequenceModel Iso_8859_1SpanishModel;
|
||||
extern const SequenceModel Windows_1252SpanishModel;
|
||||
|
||||
extern const SequenceModel Iso_8859_1GermanModel;
|
||||
extern const SequenceModel Windows_1252GermanModel;
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user