Adding French Windows-1252 support.

This commit is contained in:
Jehan 2015-12-03 21:22:30 +01:00
parent 5d3fb3dc2f
commit 0270b1e856
7 changed files with 274 additions and 237 deletions

View File

@ -1,188 +1,116 @@
= Logs of language model for French (fr) =
- Generated by BuildLangModel.py
- Started: 2015-11-30 18:53:23.881008
- Started: 2015-12-03 21:07:37.508739
- Maximum depth: 2
- Max number of pages: 10
- Max number of pages: 50
== Parsed pages ==
Wikipédia:Accueil_principal (revision 115957655)
Bœuf (animal) (revision 115500130)
1672 (revision 120907902)
1727 (revision 120908296)
1500 av. J.-C. (revision 110583603)
1898 dans les chemins de fer (revision 106801806)
1913 dans les chemins de fer (revision 112852042)
1974 dans les chemins de fer (revision 90170756)
1er décembre (revision 121012781)
2009 dans les chemins de fer (revision 107042206)
2011 dans les chemins de fer (revision 109560866)
24 novembre (revision 120782024)
26 novembre (revision 120833172)
27 novembre (revision 120860032)
28 novembre (revision 120900893)
30 novembre (revision 120934923)
Amsterdam (revision 120834895)
Amérique (revision 120916912)
An mil (revision 120416538)
Ancien Régime (revision 120708739)
Anjou (revision 120590957)
António Costa (revision 120928729)
Armée de l'air turque (revision 120764207)
Artémise II (revision 120920820)
Attentat du 24 novembre 2015 à Tunis (revision 120924574)
Barbro Hiort af Ornäs (revision 120933311)
Bataille dAttu (revision 120942542)
Bretagne (revision 120828180)
Candé (revision 120928722)
Canton de Candé (revision 120383860)
Chef-lieu (revision 119340707)
Chouannerie (revision 119799524)
Commune (France) (revision 120627882)
Conférence de Paris de 2015 sur le climat (revision 120944002)
29 novembre (revision 120918160)
2 décembre (revision 121025437)
30 novembre (revision 120947714)
3 décembre (revision 121030621)
Amphibien (revision 120332329)
Angleterre (revision 120784240)
Anne-Josèphe Théroigne de Méricourt (revision 121009789)
Années 1930 (revision 120558236)
Antonio Troyo Calderón (revision 121028881)
António Costa (revision 120993829)
Attentat du 24 novembre 2015 à Tunis (revision 121015161)
Balard (métro de Paris) (revision 118979088)
Bois de Vincennes (revision 120822909)
Buse à tête blanche (revision 121009499)
Californie (revision 120922479)
Charenton-le-Pont (revision 120210025)
Charenton - Écoles (métro de Paris) (revision 108644873)
Chronique médiévale (revision 100253272)
Concorde (métro de Paris) (revision 120856751)
Conférence de Paris de 2015 sur le climat (revision 121029398)
Crise de la dette publique grecque (revision 120905208)
Crise entre la Colombie et le Venezuela de 2015 (revision 120857143)
Crise migratoire en Europe (revision 120906358)
Crise russo-turque de 2015 (revision 120936864)
Deuxième guerre civile libyenne (revision 120673125)
Déesse mère (revision 120904195)
Départements français (revision 120873309)
Effet Shapiro (revision 120893782)
Eldar Riazanov (revision 120924339)
Fatima Mernissi (revision 120942794)
Crise migratoire en Europe (revision 121002308)
Crise russo-turque de 2015 (revision 121030214)
Créteil (revision 120684618)
Créteil - Préfecture (métro de Paris) (revision 113486387)
Deuxième guerre civile libyenne (revision 121027704)
Devise (monnaie) (revision 121015771)
Droits de tirage spéciaux (revision 121009135)
Décembre 2015 (revision 121010045)
Département français (revision 120993190)
Eldar Riazanov (revision 120996396)
Enfants verts de Woolpit (revision 121002303)
Ernst Larsen (revision 121026772)
Fatima Mernissi (revision 120992271)
Fejervarya cancrivora (revision 120353807)
Fonds monétaire international (revision 120754406)
Français (revision 120883858)
Gerry Byrne (football) (revision 120943526)
Guerre civile sud-soudanaise (revision 120672963)
Guerre civile syrienne (revision 120868598)
Guerre d'Afghanistan (depuis 2015) (revision 120675052)
Guerre du Donbass (revision 120862085)
Guerre du Yémen (depuis 2001) (revision 118472483)
Insurrection djihadiste au Nigeria (revision 120550223)
Irwin Shapiro (revision 116730530)
Ismaïl ben Chérif (revision 120930731)
Ivan Hlevnjak (revision 120917619)
Jean Corti (revision 120935599)
Jean Joubert (revision 120924134)
Karashima Noboru (revision 120892854)
Latin (revision 120360207)
Luc Bondy (revision 120941142)
Maine-et-Loire (revision 120890165)
Marches de Bretagne (revision 115772332)
Mark Behr (revision 120943649)
Maroc (revision 120937137)
Maurice Strong (revision 120927161)
Mausole (revision 120904648)
Moyen Âge (revision 120943615)
Novembre 2015 (revision 120866496)
Olene S. Walker (revision 120927070)
Paternité (revision 119371049)
Pays de la Loire (revision 120719853)
Philippe II Auguste (revision 120910593)
Philippe Washer (revision 120939362)
Premier ministre de Portugal (revision 120888501)
Relativité générale (revision 120814809)
Régions françaises (revision 120692851)
Seconde Guerre mondiale (revision 120884001)
Seconde guerre civile irakienne (revision 120893282)
Shigeru Mizuki (revision 120931351)
Soukhoï Su-24 (revision 120892538)
Spuistraat (revision 119667601)
Syrie (revision 120692724)
Tahir Elçi (revision 120942499)
Tunis (revision 120628797)
Vague de violence israélo-palestinienne de l'automne 2015 (revision 120927782)
Wiki (revision 120671138)
Wikimedia Foundation (revision 120519147)
Wikipédia en français (revision 120692561)
XVIIIe siècle (revision 119843235)
XVIIe siècle (revision 120773755)
Église de Jésus-Christ des saints des derniers jours (revision 120924507)
Agriculture (revision 120943777)
Anesthésie (revision 120319446)
Animal de trait (revision 120819989)
Bien-être animal (revision 120205455)
Bière (revision 119961318)
Bos taurus (revision 119683704)
Bête de somme (revision 117842569)
Bœuf Gras (revision 119942055)
Bœuf de Kobe (revision 120829709)
Castration (revision 119751330)
Chapon (revision 114928344)
Charrette (revision 120909407)
Charrue (revision 120819690)
Colonisation (revision 120146837)
Edme Gaulle (revision 118241504)
Europe de l'Ouest (revision 120854797)
Géant-Bœuf du Carnaval de Paris (revision 118480900)
Hongre (revision 120607208)
Hypoxie (revision 118470557)
Japon (revision 120742182)
Labour (revision 120144019)
Marché des Blancs-Manteaux (revision 106807185)
Monde musulman (revision 120793714)
Mâle (biologie) (revision 111721849)
Mésopotamie (revision 120642895)
Promenade du Bœuf Gras au Carnaval de Paris (revision 120874240)
Rue des Hospitalières-Saint-Gervais (revision 107834996)
Takayama (revision 118810594)
Taureau (revision 120459397)
Testicule (revision 120432335)
Testostérone (revision 119909685)
Traction animale (revision 120819989)
Traction bovine (revision 111651361)
Traîneau (revision 120604907)
Viande (revision 120600247)
Viande bovine (revision 119480442)
Wagyu (revision 120910460)
XXe siècle (revision 120793535)
Élevage bovin (revision 120877235)
Freyja (revision 121028677)
Fusillade du 2 décembre 2015 en Californie (revision 121030353)
== End of Parsed pages ==
- Wikipedia parsing ended at: 2015-11-30 19:05:38.631196
- Wikipedia parsing ended at: 2015-12-03 21:10:27.682316
58 characters appeared 2625348 times.
56 characters appeared 728239 times.
First 38 characters:
[ 0] Char e: 14.297990209297968 %
[ 1] Char s: 8.062245462315854 %
[ 2] Char a: 8.006862328346566 %
[ 3] Char n: 7.458401705221555 %
[ 4] Char i: 7.3982572976992 %
[ 5] Char r: 6.902246864034788 %
[ 6] Char t: 6.851777364372266 %
[ 7] Char l: 5.928699738091865 %
[ 8] Char o: 5.30996271732357 %
[ 9] Char u: 5.181065519694913 %
[10] Char d: 4.153773137884959 %
[11] Char c: 3.1908912647009084 %
[12] Char m: 2.8650297027289335 %
[13] Char p: 2.801228637117822 %
[14] Char é: 2.4742624596815355 %
[15] Char v: 1.2647847066369868 %
[16] Char g: 1.2577761119668707 %
[17] Char f: 1.1079293107047143 %
[18] Char b: 1.030415777260767 %
[19] Char h: 0.9089842565633204 %
[20] Char q: 0.7969610124067362 %
[21] Char x: 0.43415196766295366 %
[22] Char è: 0.398613821862854 %
[23] Char à: 0.38916745513356704 %
[24] Char y: 0.3763310616344957 %
[25] Char j: 0.31298707828447886 %
[26] Char k: 0.20576319786938724 %
[27] Char z: 0.11880329769615304 %
[28] Char ê: 0.11221369509870692 %
[29] Char ç: 0.07610419647223911 %
[30] Char w: 0.06574366522076312 %
[31] Char ô: 0.04845071967602009 %
[32] Char â: 0.0448321517756884 %
[33] Char œ: 0.03778546691714774 %
[34] Char î: 0.03725220427920413 %
[35] Char ï: 0.02704403378142631 %
[36] Char û: 0.02285411305472646 %
[37] Char ù: 0.02034016061870655 %
[ 0] Char e: 14.339660468609894 %
[ 1] Char s: 7.954806045817375 %
[ 2] Char a: 7.864176458552756 %
[ 3] Char n: 7.572102015959047 %
[ 4] Char i: 7.34154583866011 %
[ 5] Char r: 7.020222756540091 %
[ 6] Char t: 6.833608197308851 %
[ 7] Char l: 5.9446143367768 %
[ 8] Char o: 5.386418469760614 %
[ 9] Char u: 5.024861343597363 %
[10] Char d: 4.169235649285468 %
[11] Char c: 3.4240132703686568 %
[12] Char p: 2.8882001650557028 %
[13] Char m: 2.803063280049544 %
[14] Char é: 2.498355622261373 %
[15] Char g: 1.277739862874688 %
[16] Char v: 1.1729665672945284 %
[17] Char f: 1.1614318925517584 %
[18] Char b: 0.9925312981040565 %
[19] Char h: 0.8580974103282026 %
[20] Char q: 0.7740590657737364 %
[21] Char x: 0.43570860665248634 %
[22] Char y: 0.41044217626356183 %
[23] Char è: 0.4100302235941771 %
[24] Char à: 0.363479571953713 %
[25] Char j: 0.29591933417463223 %
[26] Char k: 0.1359443808969308 %
[27] Char ç: 0.11685724054877589 %
[28] Char ê: 0.11218844362908331 %
[29] Char z: 0.10738232915292918 %
[30] Char w: 0.08239053387692777 %
[31] Char ô: 0.04792382720507965 %
[32] Char â: 0.03364280133307884 %
[33] Char î: 0.029385957082770905 %
[34] Char û: 0.024854477719539875 %
[35] Char œ: 0.021146903695078125 %
[36] Char ï: 0.017851282340001016 %
[37] Char ù: 0.015242248767231636 %
The first 38 characters have an accumulated ratio of 0.9997798387109063.
The first 38 characters have an accumulated ratio of 0.999621003544166.
1149 sequences found.
914 sequences found.
First 512 (typical positive ratio): 0.997044499777764
Next 512 (512-1024): 3.8090188424544096e-07
Rest: 5.974086801089403e-05
First 512 (typical positive ratio): 0.997057879992383
Next 512 (512-1024): 1.3731755646154627e-06
Rest: 3.8163916471489756e-17
- Processing end: 2015-11-30 19:05:38.842420
- Processing end: 2015-12-03 21:10:27.987730

View File

@ -0,0 +1,76 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
# ##### BEGIN LICENSE BLOCK #####
# Version: MPL 1.1/GPL 2.0/LGPL 2.1
#
# The contents of this file are subject to the Mozilla Public License Version
# 1.1 (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
# http://www.mozilla.org/MPL/
#
# Software distributed under the License is distributed on an "AS IS" basis,
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
# for the specific language governing rights and limitations under the
# License.
#
# The Original Code is Mozilla Universal charset detector code.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 2001
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Jehan <jehan@girinstud.io>
#
# Alternatively, the contents of this file may be used under the terms of
# either the GNU General Public License Version 2 or later (the "GPL"), or
# the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
# in which case the provisions of the GPL or the LGPL are applicable instead
# of those above. If you wish to allow use of your version of this file only
# under the terms of either the GPL or the LGPL, and not to allow others to
# use your version of this file under the terms of the MPL, indicate your
# decision by deleting the provisions above and replace them with the notice
# and other provisions required by the GPL or the LGPL. If you do not delete
# the provisions above, a recipient may use your version of this file under
# the terms of any one of the MPL, the GPL or the LGPL.
#
# ##### END LICENSE BLOCK #####
from codepoints import *
name = 'WINDOWS-1252'
aliases = ['CP-1252', 'cswindows1252']
language = \
{
# Languages with complete coverage.
# Basically a mix of ISO-8859-1 and ISO-8859-15.
'complete': [ 'af', 'sq', 'eu', 'br', 'co', 'da', 'en', 'fo', 'gl', 'de',
'is', 'id', 'it', 'ku', 'leon1250', 'lb', 'ms', 'gv', 'no',
'oc', 'pt', 'rm', 'gd', 'es', 'sw', 'sv', 'wa', 'ca', 'et',
'fi', 'fr', 'ga', 'la' ],
'incomplete': []
}
# X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF #
charmap = \
[
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, # 0X
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, # 1X
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, # 2X
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, # 3X
SYM,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # 4X
LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,SYM,SYM,SYM,SYM,SYM, # 5X
SYM,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # 6X
LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,SYM,SYM,SYM,SYM,CTR, # 7X
SYM,ILL,SYM,LET,SYM,SYM,SYM,SYM,SYM,SYM,LET,SYM,LET,ILL,LET,ILL, # 8X
ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,LET,SYM,LET,ILL,LET,LET, # 9X
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, # AX
SYM,SYM,SYM,SYM,SYM,LET,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, # BX
LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # CX
LET,LET,LET,LET,LET,LET,LET,SYM,LET,LET,LET,LET,LET,LET,LET,LET, # DX
LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # EX
LET,LET,LET,LET,LET,LET,LET,SYM,LET,LET,LET,LET,LET,LET,LET,LET, # FX
]

View File

@ -50,7 +50,7 @@ code = 'fr'
# ASCII characters are also used in French.
use_ascii = True
# The charsets we want to support and create data for.
charsets = ['ISO-8859-15', 'ISO-8859-1']
charsets = ['ISO-8859-15', 'ISO-8859-1', 'WINDOWS-1252']
## Optional Properties ##

View File

@ -41,7 +41,7 @@
/**
* Generated by BuildLangModel.py
* On: 2015-11-30 19:05:38.632969
* On: 2015-12-03 21:10:27.685575
**/
/* Character Mapping Table:
@ -61,24 +61,24 @@
* ligature of 'o' and 'e' exists in ISO-8859-15 but not in ISO-8859-1
* even though they are both used for French. Same for the euro sign.
*/
static const unsigned char Iso_8859_15_CharToOrderMap[] =
static const unsigned char Windows_1252_CharToOrderMap[] =
{
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
SYM, 2, 18, 11, 10, 0, 17, 16, 19, 4, 25, 26, 7, 12, 3, 8, /* 4X */
13, 20, 5, 1, 6, 9, 15, 30, 21, 24, 27,SYM,SYM,SYM,SYM,SYM, /* 5X */
SYM, 2, 18, 11, 10, 0, 17, 16, 19, 4, 25, 26, 7, 12, 3, 8, /* 6X */
13, 20, 5, 1, 6, 9, 15, 30, 21, 24, 27,SYM,SYM,SYM,SYM,CTR, /* 7X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
SYM,SYM,SYM,SYM,SYM,SYM, 48,SYM, 48,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
SYM,SYM,SYM,SYM, 50, 58,SYM,SYM, 50,SYM,SYM,SYM, 33, 33, 57,SYM, /* BX */
23, 39, 32, 46, 49, 56, 47, 29, 22, 14, 28, 38, 54, 40, 34, 35, /* CX */
59, 45, 60, 41, 31, 53, 43,SYM, 52, 37, 44, 36, 42, 55, 51, 61, /* DX */
23, 39, 32, 46, 49, 56, 47, 29, 22, 14, 28, 38, 54, 40, 34, 35, /* EX */
62, 45, 63, 41, 31, 53, 43,SYM, 52, 37, 44, 36, 42, 55, 51, 57, /* FX */
SYM, 2, 18, 11, 10, 0, 17, 15, 19, 4, 25, 26, 7, 13, 3, 8, /* 4X */
12, 20, 5, 1, 6, 9, 16, 30, 21, 22, 29,SYM,SYM,SYM,SYM,SYM, /* 5X */
SYM, 2, 18, 11, 10, 0, 17, 15, 19, 4, 25, 26, 7, 13, 3, 8, /* 6X */
12, 20, 5, 1, 6, 9, 16, 30, 21, 22, 29,SYM,SYM,SYM,SYM,CTR, /* 7X */
SYM,ILL,SYM, 56,SYM,SYM,SYM,SYM,SYM,SYM, 51,SYM, 35,ILL, 57,ILL, /* 8X */
ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 51,SYM, 35,ILL, 58, 59, /* 9X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
SYM,SYM,SYM,SYM,SYM, 60,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
24, 38, 32, 46, 49, 61, 47, 27, 23, 14, 28, 41, 62, 39, 33, 36, /* CX */
48, 45, 54, 40, 31, 55, 42,SYM, 52, 37, 43, 34, 44, 53, 50, 63, /* DX */
24, 38, 32, 46, 49, 64, 47, 27, 23, 14, 28, 41, 65, 39, 33, 36, /* EX */
48, 45, 54, 40, 31, 55, 42,SYM, 52, 37, 43, 34, 44, 53, 50, 66, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
@ -88,80 +88,101 @@ static const unsigned char Iso_8859_1_CharToOrderMap[] =
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
SYM, 2, 18, 11, 10, 0, 17, 16, 19, 4, 25, 26, 7, 12, 3, 8, /* 4X */
13, 20, 5, 1, 6, 9, 15, 30, 21, 24, 27,SYM,SYM,SYM,SYM,SYM, /* 5X */
SYM, 2, 18, 11, 10, 0, 17, 16, 19, 4, 25, 26, 7, 12, 3, 8, /* 6X */
13, 20, 5, 1, 6, 9, 15, 30, 21, 24, 27,SYM,SYM,SYM,SYM,CTR, /* 7X */
SYM, 2, 18, 11, 10, 0, 17, 15, 19, 4, 25, 26, 7, 13, 3, 8, /* 4X */
12, 20, 5, 1, 6, 9, 16, 30, 21, 22, 29,SYM,SYM,SYM,SYM,SYM, /* 5X */
SYM, 2, 18, 11, 10, 0, 17, 15, 19, 4, 25, 26, 7, 13, 3, 8, /* 6X */
12, 20, 5, 1, 6, 9, 16, 30, 21, 22, 29,SYM,SYM,SYM,SYM,CTR, /* 7X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
SYM,SYM,SYM,SYM,SYM, 64,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
23, 39, 32, 46, 49, 56, 47, 29, 22, 14, 28, 38, 54, 40, 34, 35, /* CX */
65, 45, 66, 41, 31, 53, 43,SYM, 52, 37, 44, 36, 42, 55, 51, 67, /* DX */
23, 39, 32, 46, 49, 56, 47, 29, 22, 14, 28, 38, 54, 40, 34, 35, /* EX */
68, 45, 69, 41, 31, 53, 43,SYM, 52, 37, 44, 36, 42, 55, 51, 57, /* FX */
SYM,SYM,SYM,SYM,SYM, 67,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
24, 38, 32, 46, 49, 68, 47, 27, 23, 14, 28, 41, 69, 39, 33, 36, /* CX */
48, 45, 54, 40, 31, 55, 42,SYM, 52, 37, 43, 34, 44, 53, 50, 70, /* DX */
24, 38, 32, 46, 49, 71, 47, 27, 23, 14, 28, 41, 72, 39, 33, 36, /* EX */
48, 45, 54, 40, 31, 55, 42,SYM, 52, 37, 43, 34, 44, 53, 50, 73, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
static const unsigned char Iso_8859_15_CharToOrderMap[] =
{
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
SYM, 2, 18, 11, 10, 0, 17, 15, 19, 4, 25, 26, 7, 13, 3, 8, /* 4X */
12, 20, 5, 1, 6, 9, 16, 30, 21, 22, 29,SYM,SYM,SYM,SYM,SYM, /* 5X */
SYM, 2, 18, 11, 10, 0, 17, 15, 19, 4, 25, 26, 7, 13, 3, 8, /* 6X */
12, 20, 5, 1, 6, 9, 16, 30, 21, 22, 29,SYM,SYM,SYM,SYM,CTR, /* 7X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
SYM,SYM,SYM,SYM,SYM,SYM, 51,SYM, 51,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
SYM,SYM,SYM,SYM, 74, 75,SYM,SYM, 76,SYM,SYM,SYM, 35, 35, 77,SYM, /* BX */
24, 38, 32, 46, 49, 78, 47, 27, 23, 14, 28, 41, 79, 39, 33, 36, /* CX */
48, 45, 54, 40, 31, 55, 42,SYM, 52, 37, 43, 34, 44, 53, 50, 80, /* DX */
24, 38, 32, 46, 49, 81, 47, 27, 23, 14, 28, 41, 82, 39, 33, 36, /* EX */
48, 45, 54, 40, 31, 55, 42,SYM, 52, 37, 43, 34, 44, 53, 50, 83, /* FX */
};
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
/* Model Table:
* Total sequences: 1149
* First 512 sequences: 0.997044499777764
* Next 512 sequences (512-1024): 0.002895759354225113
* Rest: 5.974086801089403e-05
* Total sequences: 914
* First 512 sequences: 0.997057879992383
* Next 512 sequences (512-1024): 0.002942120007616917
* Rest: 3.8163916471489756e-17
* Negative sequences: TODO
*/
static const PRUint8 FrenchLangModel[] =
{
3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,3,0,0,3,3,3,3,0,3,3,2,1,0,0,2,2,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,3,1,3,0,3,2,3,2,0,0,2,2,2,2,1,0,2,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,3,3,3,3,0,3,3,2,2,0,3,3,0,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,2,3,0,3,3,3,3,2,3,2,2,2,2,2,0,2,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,2,1,2,2,2,2,0,2,0,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,0,3,2,3,3,3,3,2,3,3,0,1,1,2,2,
3,3,3,2,3,3,3,3,3,3,2,3,3,2,3,2,2,2,3,3,1,0,3,0,3,2,2,3,3,0,2,3,2,1,0,0,2,0,
3,3,3,2,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,2,3,2,2,2,2,2,2,1,1,0,2,1,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,0,3,3,3,3,0,0,3,1,1,1,2,3,3,3,
3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,2,2,0,0,0,2,2,0,0,
3,3,3,2,3,3,2,3,3,3,3,2,3,2,3,2,3,2,2,3,1,0,3,0,3,3,2,2,0,0,2,2,2,0,2,0,2,0,
3,3,3,2,3,3,3,3,3,3,2,3,2,2,3,2,2,2,2,3,3,1,3,1,3,0,3,2,2,0,2,3,2,2,0,0,0,0,
3,3,3,3,3,2,3,2,3,3,2,2,3,3,3,2,2,2,3,2,0,1,3,0,3,1,2,2,3,0,2,2,3,2,2,0,0,0,
3,3,3,2,3,3,3,3,3,3,2,2,2,3,3,2,2,2,2,3,0,2,3,0,2,2,2,2,3,2,2,3,2,0,2,0,0,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,0,0,3,2,3,0,2,0,0,3,0,0,0,0,0,
3,2,3,2,3,3,0,2,3,3,2,0,2,0,3,2,2,2,2,0,0,0,3,0,2,1,2,0,3,0,0,2,0,2,2,0,2,0,
3,3,3,3,3,3,3,3,3,3,2,2,3,2,3,2,3,2,2,3,2,2,3,0,3,0,2,2,2,0,2,0,2,2,2,0,0,0,
3,3,3,2,3,3,2,3,3,3,2,2,2,2,3,0,2,3,1,0,0,0,3,0,2,0,2,0,3,0,1,0,2,2,0,0,2,0,
3,3,3,3,3,3,3,3,3,3,3,2,2,2,3,2,2,2,3,2,2,0,3,0,3,3,2,2,2,0,2,0,3,3,2,0,2,0,
3,3,3,3,3,3,3,3,3,3,2,2,3,2,3,2,2,2,2,2,2,0,3,0,3,2,2,2,2,0,2,3,3,2,0,0,0,0,
2,2,3,0,2,1,2,2,2,3,1,2,1,2,0,1,1,2,2,0,2,0,0,0,1,0,0,0,0,0,0,0,2,0,0,0,2,0,
3,0,3,0,3,0,3,2,2,3,0,3,2,3,3,3,0,2,0,2,2,3,2,0,3,0,0,0,0,0,2,0,0,0,0,0,0,0,
0,3,0,3,0,3,3,3,0,0,3,3,3,0,0,3,3,2,3,0,3,0,0,0,1,0,2,2,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,2,3,3,3,3,2,3,3,3,3,3,2,3,2,2,2,1,2,2,0,2,2,2,3,0,2,2,2,2,0,0,2,0,0,
3,2,3,2,3,2,2,2,3,3,2,2,2,2,3,1,1,2,2,2,0,0,0,3,2,2,2,1,0,0,0,1,2,0,0,0,0,0,
3,3,3,2,3,3,2,2,3,3,1,2,3,2,2,2,2,2,2,3,0,0,0,0,3,2,3,1,0,0,2,0,1,1,0,0,0,0,
3,2,3,2,3,2,2,2,3,3,1,2,2,0,3,2,2,2,2,3,2,0,2,0,2,0,2,2,0,0,2,0,2,0,0,0,0,0,
0,0,0,2,0,0,3,2,0,0,0,3,3,2,0,2,0,0,0,0,3,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,
0,0,3,0,2,0,0,0,3,3,0,0,0,0,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,2,3,2,3,2,2,2,3,2,2,2,2,2,2,0,0,2,1,2,2,0,0,0,2,0,2,1,0,0,2,0,2,0,0,0,0,0,
0,2,0,3,0,0,3,3,0,0,0,0,3,2,0,0,1,0,0,2,0,0,0,0,0,2,1,1,0,0,0,0,0,0,0,0,0,0,
0,2,2,2,0,2,3,3,2,0,2,3,2,2,0,0,3,0,2,2,2,0,0,0,2,2,1,2,0,0,0,0,0,0,0,0,0,0,
0,2,0,2,2,1,2,1,0,3,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,2,0,3,0,0,3,3,0,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2,3,2,2,0,2,2,3,1,0,3,2,2,0,0,2,2,2,2,0,2,0,0,0,1,2,2,0,0,0,2,0,0,0,0,0,0,0,
0,1,0,2,0,2,3,2,0,0,1,2,2,2,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,0,0,3,3,3,0,3,3,0,0,0,2,0,2,0,
3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,2,2,3,3,3,3,0,3,3,0,0,3,0,0,2,3,0,0,0,2,2,0,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,3,3,0,3,3,2,2,3,0,0,3,0,
3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,3,3,0,3,3,3,2,3,2,0,2,2,0,0,0,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,0,2,3,0,2,3,2,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,0,2,3,3,3,2,3,3,3,0,2,0,0,0,
3,3,3,2,3,3,3,3,3,3,2,3,3,3,3,2,2,2,3,3,2,2,3,3,2,0,2,0,3,3,2,3,2,0,0,0,0,0,
3,3,3,2,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,0,3,3,3,2,3,0,0,2,2,2,2,0,2,0,0,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,0,3,3,0,0,3,3,0,0,2,3,0,3,3,
3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,3,3,2,0,3,3,2,3,3,2,0,0,0,0,0,2,0,
3,3,3,2,3,3,3,2,3,3,3,2,2,3,3,3,2,2,2,3,0,0,3,3,0,3,0,0,2,2,3,2,2,2,3,0,0,0,
3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,2,2,2,2,3,3,0,3,3,0,0,3,0,2,2,2,3,2,0,0,2,0,0,
3,3,3,2,3,3,3,3,3,3,2,2,3,2,3,0,0,2,2,3,0,0,3,3,0,0,2,2,3,2,2,3,2,0,0,0,0,0,
3,3,3,3,3,2,3,2,3,3,2,3,3,3,3,2,0,2,3,2,0,0,3,3,0,2,2,0,3,0,2,2,3,0,2,2,0,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,0,0,3,2,2,0,3,0,0,2,0,0,0,0,0,
3,3,3,3,3,3,3,3,3,3,2,2,2,3,3,3,2,2,3,3,0,2,3,3,0,0,0,0,2,0,2,0,2,0,0,0,0,0,
3,2,3,2,3,3,0,2,3,3,0,0,0,2,3,0,2,2,0,0,0,0,2,3,0,0,2,0,3,0,0,0,0,0,0,2,0,0,
3,3,3,2,3,3,3,3,3,3,2,2,2,3,3,2,0,3,0,0,0,0,0,3,0,2,0,0,3,0,0,0,0,0,2,2,0,0,
3,3,3,3,3,3,3,3,3,3,3,2,2,2,3,2,2,0,3,2,0,0,3,2,0,3,0,0,0,0,0,0,3,2,0,2,0,0,
3,3,3,3,3,3,3,3,3,3,0,2,0,3,3,0,0,2,2,0,0,0,3,3,0,2,2,0,2,2,2,3,3,0,0,2,0,0,
0,0,2,0,0,0,0,2,2,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,0,3,0,3,0,3,2,3,2,2,3,3,2,3,0,3,2,2,2,2,3,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,
3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,0,2,2,2,0,3,2,0,0,2,2,0,0,0,0,0,0,0,
0,3,0,3,0,3,3,3,0,0,3,3,2,3,0,3,3,2,3,0,3,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,2,3,2,3,2,2,2,3,3,2,2,2,2,3,0,0,0,0,0,0,0,0,0,3,2,0,0,0,0,0,0,2,0,0,0,0,0,
3,3,3,2,3,3,2,3,3,3,0,0,2,3,2,2,2,2,2,3,0,0,3,0,0,0,2,0,0,0,2,0,0,0,0,0,0,0,
0,0,3,0,0,0,0,0,3,3,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,2,0,0,3,2,0,0,0,3,0,3,0,0,2,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,0,3,2,3,2,0,2,3,3,0,2,0,2,2,2,0,0,2,2,2,0,3,0,0,0,2,0,0,3,2,0,0,0,0,0,0,0,
3,2,3,2,3,2,2,2,3,2,0,2,0,0,2,0,0,2,2,2,0,0,2,0,0,0,2,0,0,0,2,0,0,0,0,0,0,0,
0,2,0,3,0,0,3,3,0,0,0,0,2,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,2,0,2,2,0,3,3,0,0,0,3,2,2,0,3,0,0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,3,0,0,3,3,0,0,0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,2,3,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,2,0,0,2,0,2,2,0,3,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2,2,2,2,0,2,2,3,0,0,2,2,0,2,0,2,0,2,2,0,2,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
};
const SequenceModel Iso_8859_15FrenchModel =
const SequenceModel Windows_1252FrenchModel =
{
Iso_8859_15_CharToOrderMap,
Windows_1252_CharToOrderMap,
FrenchLangModel,
38,
(float)0.997044499777764,
(float)0.997057879992383,
PR_TRUE,
"ISO-8859-15"
"WINDOWS-1252"
};
const SequenceModel Iso_8859_1FrenchModel =
@ -169,7 +190,17 @@ const SequenceModel Iso_8859_1FrenchModel =
Iso_8859_1_CharToOrderMap,
FrenchLangModel,
38,
(float)0.997044499777764,
(float)0.997057879992383,
PR_TRUE,
"ISO-8859-1"
};
const SequenceModel Iso_8859_15FrenchModel =
{
Iso_8859_15_CharToOrderMap,
FrenchLangModel,
38,
(float)0.997057879992383,
PR_TRUE,
"ISO-8859-15"
};

View File

@ -80,9 +80,10 @@ nsSBCSGroupProber::nsSBCSGroupProber()
mProbers[14] = new nsSingleByteCharSetProber(&Iso_8859_1FrenchModel);
mProbers[15] = new nsSingleByteCharSetProber(&Iso_8859_15FrenchModel);
mProbers[16] = new nsSingleByteCharSetProber(&Windows_1252FrenchModel);
mProbers[16] = new nsSingleByteCharSetProber(&Latin2HungarianModel);
mProbers[17] = new nsSingleByteCharSetProber(&Win1250HungarianModel);
mProbers[17] = new nsSingleByteCharSetProber(&Latin2HungarianModel);
mProbers[18] = new nsSingleByteCharSetProber(&Win1250HungarianModel);
Reset();
}

View File

@ -40,7 +40,7 @@
#define nsSBCSGroupProber_h__
#define NUM_OF_SBCS_PROBERS 18
#define NUM_OF_SBCS_PROBERS 19
class nsCharSetProber;
class nsSBCSGroupProber: public nsCharSetProber {

View File

@ -138,6 +138,7 @@ extern const SequenceModel Win1255Model;
extern const SequenceModel TIS620ThaiModel;
extern const SequenceModel Iso_8859_15FrenchModel;
extern const SequenceModel Iso_8859_1FrenchModel;
extern const SequenceModel Windows_1252FrenchModel;
#endif /* nsSingleByteCharSetProber_h__ */