mirror of
https://gitlab.freedesktop.org/uchardet/uchardet.git
synced 2025-12-06 16:56:40 +08:00
LangModels: add support for Czech.
Encodings: Windows-1250, ISO-8859-2, IBM852 and Mac-CentralEurope. Other encodings are known to have been used for Czech: Kamenicky, KOI-8 CS2 and Cork. But these are uncommon enough that I decided not to support them (especially since I can't find them supported in iconv either, or at least not under an alias which I could recognize). This web page, which contents was made under the Public Domain, is a good reference for encodings which were used historically for Czech and Slovak: http://luki.sdf-eu.org/txt/cs-encodings-faq.html
This commit is contained in:
parent
183092d048
commit
26e1cebad1
161
script/BuildLangModelLogs/LangCzechModel.log
Normal file
161
script/BuildLangModelLogs/LangCzechModel.log
Normal file
@ -0,0 +1,161 @@
|
||||
= Logs of language model for Czech (cs) =
|
||||
|
||||
- Generated by BuildLangModel.py
|
||||
- Started: 2016-09-21 03:20:56.824516
|
||||
- Maximum depth: 5
|
||||
- Max number of pages: 100
|
||||
|
||||
== Parsed pages ==
|
||||
|
||||
Sociální fobie (revision 13567590)
|
||||
Adaptace (revision 13991192)
|
||||
Agorafobie (revision 13013445)
|
||||
Alkoholismus (revision 13822064)
|
||||
Alprazolam (revision 14082425)
|
||||
Antidepresivum (revision 14113423)
|
||||
Asertivita (revision 14111958)
|
||||
Atenolol (revision 12051880)
|
||||
Automatické negativní myšlenky (revision 13567590)
|
||||
Benzodiazepin (revision 13947546)
|
||||
Beta-blokátory (revision 13428762)
|
||||
Blud (revision 13888988)
|
||||
Bohatství (revision 13556478)
|
||||
Bupropion (revision 13686045)
|
||||
Citaloparam (revision 13567590)
|
||||
Clonazepan (revision 13567590)
|
||||
Crohnova nemoc (revision 13745254)
|
||||
Deprese (psychologie) (revision 13695735)
|
||||
Diagnostický a statický manuál mentálních poruch (revision 13567590)
|
||||
Diagnostický a statistický manuál mentálních poruch (revision 13714660)
|
||||
Diagnóza (medicína) (revision 13052239)
|
||||
Dichotomické myšlení (revision 13567590)
|
||||
Digital object identifier (revision 14138049)
|
||||
Dopamin (revision 13714274)
|
||||
Dystymie (revision 13567267)
|
||||
Důkaz kruhem (revision 13190761)
|
||||
Elektivní mutismus (revision 9940891)
|
||||
Emoce (revision 14110033)
|
||||
Escitalopram (revision 12954987)
|
||||
Evoluce (revision 13951488)
|
||||
Expozice (psychologie) (revision 14119474)
|
||||
Extraverze a introverze (revision 13872996)
|
||||
Fluoxetin (revision 12955006)
|
||||
Fluvoxamin (revision 12955006)
|
||||
Gen (revision 13907182)
|
||||
Generalizovaná úzkostná porucha (revision 14006709)
|
||||
Halucinaci (revision 12188143)
|
||||
Hněv (revision 14057864)
|
||||
Inteligence (revision 14009781)
|
||||
International Standard Serial Number (revision 12869806)
|
||||
Interpersonální psychoterapie (revision 13567590)
|
||||
Iracionalita (revision 4765977)
|
||||
Ján Praško Pavlov (revision 14086840)
|
||||
Klinické testování (revision 13530979)
|
||||
Kognitivní omyl (revision 13107294)
|
||||
Kognitivní psychologie (revision 11629465)
|
||||
Kognitivní restrukturalizace (revision 13567360)
|
||||
Kognitivně behaviorální terapie (revision 13980494)
|
||||
Komorbidita (revision 11351714)
|
||||
Lymská borelióza (revision 14068446)
|
||||
Malé sebevědomí (revision 13567590)
|
||||
Medical Subject Headings (revision 12239331)
|
||||
Meditace (revision 13180783)
|
||||
Mentální černý filtr (revision 13567590)
|
||||
Mezinárodní klasifikace nemocí (revision 12531067)
|
||||
Michael Liebowitz (revision 13567590)
|
||||
Moclobemid (revision 13567590)
|
||||
Moritova terapie (revision 11960292)
|
||||
Musturbace (revision 13567590)
|
||||
Nervozita (revision 13847097)
|
||||
Noradrenalin (revision 14054165)
|
||||
Obsedantně kompulzivní porucha (revision 13950365)
|
||||
Panická ataka (revision 13253537)
|
||||
Panická porucha (revision 13253537)
|
||||
Paranoia (revision 14027052)
|
||||
Paroxetin (revision 12955006)
|
||||
Pohlavnost (revision 13564689)
|
||||
Porucha (revision 11039108)
|
||||
Pravděpodobnost (revision 13596041)
|
||||
Predestinace (revision 12467403)
|
||||
Profese (revision 13975485)
|
||||
Propanolol (revision 12972658)
|
||||
Psychiatr (revision 12767960)
|
||||
Psychické trauma (revision 11227535)
|
||||
Psychoaktivní droga (revision 13939232)
|
||||
Psychodynamická léčba (revision 13567590)
|
||||
Psychofarmaka (revision 9928215)
|
||||
Psycholog (revision 12358728)
|
||||
Psychoterapie (revision 13874178)
|
||||
Puberta (revision 12540014)
|
||||
RIMA (revision 10234728)
|
||||
Remise (revision 9896748)
|
||||
Richard Heimberg (revision 13567590)
|
||||
Rámování myšlenek (revision 13567590)
|
||||
Schizofrenie (revision 13977456)
|
||||
Sebevražda (revision 14053884)
|
||||
Selektivní abstrakce (revision 13567590)
|
||||
Selektivní inhibitor zpětného vychytávání serotoninu (revision 12955027)
|
||||
Serotonin (revision 13975104)
|
||||
Sertralin (revision 12955006)
|
||||
Skupinová terapie (revision 11964235)
|
||||
Sociální chování (revision 13507313)
|
||||
Sociální dovednost (revision 12226347)
|
||||
|
||||
== End of Parsed pages ==
|
||||
|
||||
- Wikipedia parsing ended at: 2016-09-21 03:28:11.731386
|
||||
|
||||
47 characters appeared 594800 times.
|
||||
|
||||
First 41 characters:
|
||||
[ 0] Char o: 8.323806321452588 %
|
||||
[ 1] Char e: 8.040013449899126 %
|
||||
[ 2] Char n: 6.895595158036315 %
|
||||
[ 3] Char a: 6.263113651647613 %
|
||||
[ 4] Char i: 5.650470746469401 %
|
||||
[ 5] Char t: 5.40383322125084 %
|
||||
[ 6] Char s: 4.588937457969065 %
|
||||
[ 7] Char v: 3.8685272360457295 %
|
||||
[ 8] Char p: 3.6914929388029587 %
|
||||
[ 9] Char r: 3.6302958977807664 %
|
||||
[10] Char l: 3.6017148621385338 %
|
||||
[11] Char í: 3.5733019502353733 %
|
||||
[12] Char k: 3.301950235373235 %
|
||||
[13] Char u: 3.1782111634162744 %
|
||||
[14] Char c: 3.1383658372562206 %
|
||||
[15] Char d: 3.120208473436449 %
|
||||
[16] Char m: 2.758406186953598 %
|
||||
[17] Char h: 2.2747141896435776 %
|
||||
[18] Char á: 2.156186953597848 %
|
||||
[19] Char z: 2.0260591795561536 %
|
||||
[20] Char y: 1.9894082044384667 %
|
||||
[21] Char j: 1.8979488903833224 %
|
||||
[22] Char b: 1.8189307330195021 %
|
||||
[23] Char ě: 1.277236045729657 %
|
||||
[24] Char é: 1.2291526563550772 %
|
||||
[25] Char č: 0.9502353732347008 %
|
||||
[26] Char ž: 0.9214862138533961 %
|
||||
[27] Char ř: 0.8955951580363146 %
|
||||
[28] Char ý: 0.7646267652992602 %
|
||||
[29] Char š: 0.6605581708137189 %
|
||||
[30] Char f: 0.6260928043039677 %
|
||||
[31] Char ů: 0.5016812373907196 %
|
||||
[32] Char g: 0.47041022192333554 %
|
||||
[33] Char ú: 0.19502353732347008 %
|
||||
[34] Char x: 0.13685272360457296 %
|
||||
[35] Char ň: 0.05447209145931405 %
|
||||
[36] Char w: 0.04488903833221251 %
|
||||
[37] Char ó: 0.03429724277067922 %
|
||||
[38] Char ť: 0.02269670477471419 %
|
||||
[39] Char ď: 0.012104909213180902 %
|
||||
[40] Char q: 0.007229320780094149 %
|
||||
|
||||
The first 41 characters have an accumulated ratio of 0.9999613315400132.
|
||||
|
||||
1025 sequences found.
|
||||
|
||||
First 512 (typical positive ratio): 0.9786035192432675
|
||||
Next 512 (512-1024): 1.6812373907195695e-06
|
||||
Rest: 2.0246480655940202e-06
|
||||
|
||||
- Processing end: 2016-09-21 03:28:12.235582
|
||||
72
script/charsets/ibm852.py
Normal file
72
script/charsets/ibm852.py
Normal file
@ -0,0 +1,72 @@
|
||||
#!/usr/bin/python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# ##### BEGIN LICENSE BLOCK #####
|
||||
# Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
#
|
||||
# The contents of this file are subject to the Mozilla Public License Version
|
||||
# 1.1 (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
# http://www.mozilla.org/MPL/
|
||||
#
|
||||
# Software distributed under the License is distributed on an "AS IS" basis,
|
||||
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
# for the specific language governing rights and limitations under the
|
||||
# License.
|
||||
#
|
||||
# The Original Code is Mozilla Universal charset detector code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 2001
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Jehan <jehan@girinstud.io>
|
||||
#
|
||||
# Alternatively, the contents of this file may be used under the terms of
|
||||
# either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
# the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
# in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
# of those above. If you wish to allow use of your version of this file only
|
||||
# under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
# use your version of this file under the terms of the MPL, indicate your
|
||||
# decision by deleting the provisions above and replace them with the notice
|
||||
# and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
# the provisions above, a recipient may use your version of this file under
|
||||
# the terms of any one of the MPL, the GPL or the LGPL.
|
||||
#
|
||||
# ##### END LICENSE BLOCK #####
|
||||
|
||||
from codepoints import *
|
||||
|
||||
name = 'IBM852'
|
||||
aliases = ['CP852']
|
||||
|
||||
language = \
|
||||
{
|
||||
'complete': [ 'bs', 'hr', 'cs', 'de', 'hu', 'pl', 'sr', 'sk', 'sl',
|
||||
'hsb', 'dsb', 'tk' ],
|
||||
'incomplete': [ 'ro' ]
|
||||
}
|
||||
|
||||
# X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF #
|
||||
charmap = \
|
||||
[
|
||||
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, # 0X
|
||||
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, # 1X
|
||||
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, # 2X
|
||||
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, # 3X
|
||||
SYM,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # 4X
|
||||
LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,SYM,SYM,SYM,SYM,SYM, # 5X
|
||||
SYM,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # 6X
|
||||
LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,SYM,SYM,SYM,SYM,CTR, # 7X
|
||||
LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # 8X
|
||||
LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,SYM,LET, # 9X
|
||||
LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,SYM,LET,LET,LET,SYM,SYM, # AX
|
||||
SYM,SYM,SYM,SYM,SYM,LET,LET,LET,LET,SYM,SYM,SYM,SYM,LET,LET,SYM, # BX
|
||||
SYM,SYM,SYM,SYM,SYM,SYM,LET,LET,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, # CX
|
||||
LET,LET,LET,LET,LET,LET,LET,LET,LET,SYM,SYM,SYM,SYM,LET,LET,SYM, # DX
|
||||
LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,SYM, # EX
|
||||
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,LET,LET,LET,SYM,SYM, # FX
|
||||
]
|
||||
72
script/charsets/mac-centraleurope.py
Normal file
72
script/charsets/mac-centraleurope.py
Normal file
@ -0,0 +1,72 @@
|
||||
#!/usr/bin/python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# ##### BEGIN LICENSE BLOCK #####
|
||||
# Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
#
|
||||
# The contents of this file are subject to the Mozilla Public License Version
|
||||
# 1.1 (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
# http://www.mozilla.org/MPL/
|
||||
#
|
||||
# Software distributed under the License is distributed on an "AS IS" basis,
|
||||
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
# for the specific language governing rights and limitations under the
|
||||
# License.
|
||||
#
|
||||
# The Original Code is Mozilla Universal charset detector code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 2001
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Jehan <jehan@girinstud.io>
|
||||
#
|
||||
# Alternatively, the contents of this file may be used under the terms of
|
||||
# either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
# the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
# in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
# of those above. If you wish to allow use of your version of this file only
|
||||
# under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
# use your version of this file under the terms of the MPL, indicate your
|
||||
# decision by deleting the provisions above and replace them with the notice
|
||||
# and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
# the provisions above, a recipient may use your version of this file under
|
||||
# the terms of any one of the MPL, the GPL or the LGPL.
|
||||
#
|
||||
# ##### END LICENSE BLOCK #####
|
||||
|
||||
from codepoints import *
|
||||
|
||||
name = 'MAC-CENTRALEUROPE'
|
||||
aliases = []
|
||||
|
||||
language = \
|
||||
{
|
||||
'complete': [ 'bs', 'hr', 'cs', 'de', 'hu', 'pl', 'sr', 'sk', 'sl',
|
||||
'hsb', 'dsb', 'tk' ],
|
||||
'incomplete': [ 'ro' ]
|
||||
}
|
||||
|
||||
# X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF #
|
||||
charmap = \
|
||||
[
|
||||
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, # 0X
|
||||
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, # 1X
|
||||
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, # 2X
|
||||
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, # 3X
|
||||
SYM,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # 4X
|
||||
LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,SYM,SYM,SYM,SYM,SYM, # 5X
|
||||
SYM,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # 6X
|
||||
LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,SYM,SYM,SYM,SYM,CTR, # 7X
|
||||
LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # 8X
|
||||
LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # 9X
|
||||
SYM,SYM,LET,SYM,SYM,SYM,SYM,LET,SYM,SYM,SYM,LET,SYM,SYM,LET,LET, # AX
|
||||
LET,LET,SYM,SYM,LET,LET,SYM,SYM,LET,LET,LET,LET,LET,LET,LET,LET, # BX
|
||||
LET,LET,SYM,SYM,LET,LET,SYM,SYM,SYM,SYM,SYM,LET,LET,LET,LET,LET, # CX
|
||||
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,LET,LET,LET,LET,SYM,SYM,LET,LET, # DX
|
||||
LET,LET,SYM,SYM,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # EX
|
||||
LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,SYM, # FX
|
||||
]
|
||||
80
script/langs/cs.py
Normal file
80
script/langs/cs.py
Normal file
@ -0,0 +1,80 @@
|
||||
#!/bin/python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# ##### BEGIN LICENSE BLOCK #####
|
||||
# Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
#
|
||||
# The contents of this file are subject to the Mozilla Public License Version
|
||||
# 1.1 (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
# http://www.mozilla.org/MPL/
|
||||
#
|
||||
# Software distributed under the License is distributed on an "AS IS" basis,
|
||||
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
# for the specific language governing rights and limitations under the
|
||||
# License.
|
||||
#
|
||||
# The Original Code is Mozilla Universal charset detector code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 2001
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Jehan <jehan@girinstud.io>
|
||||
#
|
||||
# Alternatively, the contents of this file may be used under the terms of
|
||||
# either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
# the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
# in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
# of those above. If you wish to allow use of your version of this file only
|
||||
# under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
# use your version of this file under the terms of the MPL, indicate your
|
||||
# decision by deleting the provisions above and replace them with the notice
|
||||
# and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
# the provisions above, a recipient may use your version of this file under
|
||||
# the terms of any one of the MPL, the GPL or the LGPL.
|
||||
#
|
||||
# ##### END LICENSE BLOCK #####
|
||||
|
||||
import re
|
||||
|
||||
## Mandatory Properties ##
|
||||
|
||||
# The human name for the language, in English.
|
||||
name = 'Czech'
|
||||
# Use 2-letter ISO 639-1 if possible, 3-letter ISO code otherwise,
|
||||
# or use another catalog as a last resort.
|
||||
code = 'cs'
|
||||
# ASCII characters are also used in French.
|
||||
use_ascii = True
|
||||
# The charsets we want to support and create data for.
|
||||
charsets = ['ISO-8859-2', 'Windows-1250', 'IBM852', 'MAC-CENTRALEUROPE']
|
||||
|
||||
## Optional Properties ##
|
||||
|
||||
# Alphabet characters.
|
||||
# If use_ascii=True, there is no need to add any ASCII characters.
|
||||
# If case_mapping=True, there is no need to add several cases of a same
|
||||
# character (provided Python algorithms know the right cases).
|
||||
alphabet = 'áčďéěíňóřšťúůýž'
|
||||
# The starred page which was rewarded on the main page when I created
|
||||
# the data.
|
||||
start_pages = ['Sociální fobie']
|
||||
# give possibility to select another code for the Wikipedia URL.
|
||||
wikipedia_code = code
|
||||
# 'a' and 'A' will be considered the same character, and so on.
|
||||
# This uses Python algorithm to determine upper/lower-case of a given
|
||||
# character.
|
||||
case_mapping = True
|
||||
|
||||
# A function to clean content returned by the `wikipedia` python lib,
|
||||
# in case some unwanted data has been overlooked.
|
||||
# Note that we are already cleaning away the '=' from the title syntax
|
||||
# of Wikipedia, as well as double spaces. But sometimes, Wikipedia in
|
||||
# some language may return weird syntax or UI text which should be
|
||||
# discarded. If you encounter one of these cases, use this function.
|
||||
def clean_wikipedia_content(content):
|
||||
# Do your garbage text cleaning here.
|
||||
return content
|
||||
@ -9,7 +9,7 @@ set(
|
||||
JpCntx.cpp
|
||||
LangModels/LangArabicModel.cpp
|
||||
LangModels/LangBulgarianModel.cpp
|
||||
LangModels/LangRussianModel.cpp
|
||||
LangModels/LangCzechModel.cpp
|
||||
LangModels/LangEsperantoModel.cpp
|
||||
LangModels/LangFrenchModel.cpp
|
||||
LangModels/LangDanishModel.cpp
|
||||
@ -21,6 +21,7 @@ set(
|
||||
LangModels/LangLatvianModel.cpp
|
||||
LangModels/LangMalteseModel.cpp
|
||||
LangModels/LangPortugueseModel.cpp
|
||||
LangModels/LangRussianModel.cpp
|
||||
LangModels/LangSpanishModel.cpp
|
||||
LangModels/LangThaiModel.cpp
|
||||
LangModels/LangTurkishModel.cpp
|
||||
|
||||
281
src/LangModels/LangCzechModel.cpp
Normal file
281
src/LangModels/LangCzechModel.cpp
Normal file
@ -0,0 +1,281 @@
|
||||
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* The Original Code is Mozilla Communicator client code.
|
||||
*
|
||||
* The Initial Developer of the Original Code is
|
||||
* Netscape Communications Corporation.
|
||||
* Portions created by the Initial Developer are Copyright (C) 1998
|
||||
* the Initial Developer. All Rights Reserved.
|
||||
*
|
||||
* Contributor(s):
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
#include "../nsSBCharSetProber.h"
|
||||
|
||||
/********* Language model for: Czech *********/
|
||||
|
||||
/**
|
||||
* Generated by BuildLangModel.py
|
||||
* On: 2016-09-21 03:28:11.733089
|
||||
**/
|
||||
|
||||
/* Character Mapping Table:
|
||||
* ILL: illegal character.
|
||||
* CTR: control character specific to the charset.
|
||||
* RET: carriage/return.
|
||||
* SYM: symbol (punctuation) that does not belong to word.
|
||||
* NUM: 0 - 9.
|
||||
*
|
||||
* Other characters are ordered by probabilities
|
||||
* (0 is the most common character in the language).
|
||||
*
|
||||
* Orders are generic to a language. So the codepoint with order X in
|
||||
* CHARSET1 maps to the same character as the codepoint with the same
|
||||
* order X in CHARSET2 for the same language.
|
||||
* As such, it is possible to get missing order. For instance the
|
||||
* ligature of 'o' and 'e' exists in ISO-8859-15 but not in ISO-8859-1
|
||||
* even though they are both used for French. Same for the euro sign.
|
||||
*/
|
||||
static const unsigned char Windows_1250_CharToOrderMap[] =
|
||||
{
|
||||
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
|
||||
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
|
||||
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
|
||||
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
|
||||
SYM, 3, 22, 14, 15, 1, 30, 32, 17, 4, 21, 12, 10, 16, 2, 0, /* 4X */
|
||||
8, 40, 9, 6, 5, 13, 7, 36, 34, 20, 19,SYM,SYM,SYM,SYM,SYM, /* 5X */
|
||||
SYM, 3, 22, 14, 15, 1, 30, 32, 17, 4, 21, 12, 10, 16, 2, 0, /* 6X */
|
||||
8, 40, 9, 6, 5, 13, 7, 36, 34, 20, 19,SYM,SYM,SYM,SYM,CTR, /* 7X */
|
||||
SYM,ILL,SYM,ILL,SYM,SYM,SYM,SYM,ILL,SYM, 29,SYM, 46, 38, 26, 47, /* 8X */
|
||||
ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM, 29,SYM, 46, 38, 26, 48, /* 9X */
|
||||
SYM,SYM,SYM, 49,SYM, 50,SYM,SYM,SYM,SYM, 51,SYM,SYM,SYM,SYM, 52, /* AX */
|
||||
SYM,SYM,SYM, 53,SYM,SYM,SYM,SYM,SYM, 54, 55,SYM, 45,SYM, 45, 56, /* BX */
|
||||
57, 18, 58, 59, 42, 60, 61, 62, 25, 24, 63, 64, 23, 11, 65, 39, /* CX */
|
||||
66, 67, 35, 37, 68, 69, 41,SYM, 27, 31, 33, 70, 43, 28, 71, 72, /* DX */
|
||||
73, 18, 74, 75, 42, 76, 77, 78, 25, 24, 79, 80, 23, 11, 81, 39, /* EX */
|
||||
82, 83, 35, 37, 84, 85, 41,SYM, 27, 31, 33, 86, 43, 28, 87,SYM, /* FX */
|
||||
};
|
||||
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
|
||||
|
||||
static const unsigned char Mac_Centraleurope_CharToOrderMap[] =
|
||||
{
|
||||
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
|
||||
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
|
||||
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
|
||||
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
|
||||
SYM, 3, 22, 14, 15, 1, 30, 32, 17, 4, 21, 12, 10, 16, 2, 0, /* 4X */
|
||||
8, 40, 9, 6, 5, 13, 7, 36, 34, 20, 19,SYM,SYM,SYM,SYM,SYM, /* 5X */
|
||||
SYM, 3, 22, 14, 15, 1, 30, 32, 17, 4, 21, 12, 10, 16, 2, 0, /* 6X */
|
||||
8, 40, 9, 6, 5, 13, 7, 36, 34, 20, 19,SYM,SYM,SYM,SYM,CTR, /* 7X */
|
||||
42, 88, 89, 24, 90, 41, 43, 18, 91, 25, 42, 25, 92, 93, 24, 94, /* 8X */
|
||||
95, 39, 11, 39, 44, 44, 96, 37, 97, 98, 41, 99, 33, 23, 23, 43, /* 9X */
|
||||
SYM,SYM,100,SYM,SYM,SYM,SYM,101,SYM,SYM,SYM,102,SYM,SYM,103,104, /* AX */
|
||||
105,106,SYM,SYM,107,108,SYM,SYM,109,110,111, 45, 45,112,113,114, /* BX */
|
||||
115,116,SYM,SYM,117, 35,SYM,SYM,SYM,SYM,SYM, 35,118,119,120,121, /* CX */
|
||||
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,122,123,124, 27,SYM,SYM, 27,125, /* DX */
|
||||
126, 29,SYM,SYM, 29, 46, 46, 18, 38, 38, 11, 26, 26,127, 37,128, /* EX */
|
||||
129, 31, 33, 31,130,131,132,133, 28, 28,134,135,136,137,138,SYM, /* FX */
|
||||
};
|
||||
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
|
||||
|
||||
static const unsigned char Ibm852_CharToOrderMap[] =
|
||||
{
|
||||
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
|
||||
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
|
||||
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
|
||||
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
|
||||
SYM, 3, 22, 14, 15, 1, 30, 32, 17, 4, 21, 12, 10, 16, 2, 0, /* 4X */
|
||||
8, 40, 9, 6, 5, 13, 7, 36, 34, 20, 19,SYM,SYM,SYM,SYM,SYM, /* 5X */
|
||||
SYM, 3, 22, 14, 15, 1, 30, 32, 17, 4, 21, 12, 10, 16, 2, 0, /* 6X */
|
||||
8, 40, 9, 6, 5, 13, 7, 36, 34, 20, 19,SYM,SYM,SYM,SYM,CTR, /* 7X */
|
||||
139, 43, 24,140, 42, 31,141,142,143,144,145,146,147,148, 42,149, /* 8X */
|
||||
24,150,151,152, 41, 45, 45, 46, 46, 41, 43, 38, 38,153,SYM, 25, /* 9X */
|
||||
18, 11, 37, 33,154,155, 26, 26,156,157,SYM,158, 25,159,SYM,SYM, /* AX */
|
||||
SYM,SYM,SYM,SYM,SYM, 18,160, 23,161,SYM,SYM,SYM,SYM,162,163,SYM, /* BX */
|
||||
SYM,SYM,SYM,SYM,SYM,SYM,164,165,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* CX */
|
||||
166,167, 39,168, 39, 35, 11,169, 23,SYM,SYM,SYM,SYM,170, 31,SYM, /* DX */
|
||||
37,171,172,173,174, 35, 29, 29,175, 33,176,177, 28, 28,178,SYM, /* EX */
|
||||
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,179, 27, 27,SYM,SYM, /* FX */
|
||||
};
|
||||
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
|
||||
|
||||
static const unsigned char Iso_8859_2_CharToOrderMap[] =
|
||||
{
|
||||
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
|
||||
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
|
||||
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
|
||||
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
|
||||
SYM, 3, 22, 14, 15, 1, 30, 32, 17, 4, 21, 12, 10, 16, 2, 0, /* 4X */
|
||||
8, 40, 9, 6, 5, 13, 7, 36, 34, 20, 19,SYM,SYM,SYM,SYM,SYM, /* 5X */
|
||||
SYM, 3, 22, 14, 15, 1, 30, 32, 17, 4, 21, 12, 10, 16, 2, 0, /* 6X */
|
||||
8, 40, 9, 6, 5, 13, 7, 36, 34, 20, 19,SYM,SYM,SYM,SYM,CTR, /* 7X */
|
||||
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
|
||||
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
|
||||
SYM,180,SYM,181,SYM, 45, 46,SYM,SYM, 29,182, 38,183,SYM, 26,184, /* AX */
|
||||
SYM,185,SYM,186,SYM, 45, 46,SYM,SYM, 29,187, 38,188,SYM, 26,189, /* BX */
|
||||
190, 18,191,192, 42,193,194,195, 25, 24,196,197, 23, 11,198, 39, /* CX */
|
||||
199,200, 35, 37,201,202, 41,SYM, 27, 31, 33,203, 43, 28,204,205, /* DX */
|
||||
206, 18,207,208, 42,209,210,211, 25, 24,212,213, 23, 11,214, 39, /* EX */
|
||||
215,216, 35, 37,217,218, 41,SYM, 27, 31, 33,219, 43, 28,220,SYM, /* FX */
|
||||
};
|
||||
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
|
||||
|
||||
|
||||
/* Model Table:
|
||||
* Total sequences: 1025
|
||||
* First 512 sequences: 0.9786035192432675
|
||||
* Next 512 sequences (512-1024): 0.02139445610866691
|
||||
* Rest: 2.0246480655940202e-06
|
||||
* Negative sequences: TODO
|
||||
*/
|
||||
static const PRUint8 CzechLangModel[] =
|
||||
{
|
||||
2,2,3,2,3,3,3,3,3,3,3,2,3,3,3,3,3,3,2,3,
|
||||
2,3,3,0,0,3,3,3,0,2,3,0,3,0,3,2,2,0,2,0,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,2,3,
|
||||
2,3,3,0,0,3,3,3,0,3,3,2,3,2,3,2,2,2,2,2,2,
|
||||
3,3,3,3,3,3,3,2,0,2,3,3,3,3,3,3,2,3,3,3,
|
||||
3,2,2,3,3,2,2,0,3,2,3,3,3,0,2,0,0,2,0,0,2,
|
||||
3,3,3,2,2,3,3,3,3,3,3,0,3,3,3,3,3,3,0,3,
|
||||
3,3,3,0,0,3,3,3,0,3,3,0,3,0,3,2,2,0,2,2,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,
|
||||
0,2,3,0,2,3,3,2,0,3,3,0,3,0,2,2,2,2,2,0,2,
|
||||
3,3,3,3,3,2,2,3,2,3,3,3,3,3,2,2,2,3,3,3,
|
||||
3,2,2,3,3,2,0,3,3,3,0,3,2,0,0,2,2,2,0,0,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,2,
|
||||
3,2,3,0,2,2,0,0,2,0,2,2,2,2,0,2,2,0,2,0,0,
|
||||
3,3,3,3,3,2,2,0,2,3,3,3,3,3,2,3,0,2,3,3,
|
||||
3,2,2,3,3,2,2,2,3,3,0,3,0,0,0,2,0,2,0,0,0,
|
||||
3,3,3,3,3,3,3,0,2,3,3,3,2,3,2,2,2,2,3,0,
|
||||
3,2,2,3,2,2,0,3,2,2,2,3,2,0,2,2,0,0,0,0,0,
|
||||
3,3,3,3,3,3,3,3,3,2,2,2,3,3,2,3,3,2,3,3,
|
||||
3,0,3,0,3,3,2,0,3,2,2,3,3,0,0,2,2,2,2,2,2,
|
||||
3,3,3,3,3,3,2,2,2,2,2,3,3,3,2,2,2,2,3,3,
|
||||
3,0,2,0,3,2,2,0,3,3,2,3,2,0,0,2,0,2,0,0,0,
|
||||
0,2,3,0,2,3,3,3,3,3,3,2,3,0,3,2,3,3,0,3,
|
||||
0,3,2,0,0,3,3,2,0,2,0,0,2,0,0,0,0,0,2,0,0,
|
||||
3,3,3,3,3,3,2,3,0,3,3,0,2,3,3,3,2,2,3,2,
|
||||
3,2,3,0,3,2,2,2,3,0,2,3,2,0,0,0,0,2,0,0,0,
|
||||
2,2,3,3,3,3,3,3,3,3,3,0,3,2,3,3,3,3,3,3,
|
||||
2,3,3,0,0,3,3,2,0,3,2,0,2,0,2,2,2,0,2,2,0,
|
||||
3,3,3,3,3,3,2,2,2,2,2,3,3,2,2,3,2,3,2,2,
|
||||
2,0,2,0,2,0,0,0,0,0,2,2,0,0,0,2,0,0,0,0,2,
|
||||
3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,2,3,2,3,2,
|
||||
3,0,2,3,3,2,2,2,2,2,2,3,2,0,2,2,2,0,0,0,0,
|
||||
3,3,3,3,3,2,2,0,3,3,3,3,2,3,2,2,2,2,3,2,
|
||||
3,2,3,3,3,2,3,2,2,2,2,3,2,0,0,2,0,2,0,0,0,
|
||||
3,3,3,3,3,3,2,3,2,3,3,2,2,3,2,2,2,0,3,0,
|
||||
3,2,2,0,2,2,2,2,3,0,2,2,0,0,0,0,2,2,2,0,0,
|
||||
0,0,3,0,0,3,3,3,2,3,3,0,3,0,3,3,3,3,0,3,
|
||||
0,2,2,0,0,2,3,2,0,3,2,0,0,0,0,2,0,0,0,2,0,
|
||||
3,3,3,3,3,3,2,3,3,2,3,3,3,3,2,3,3,3,3,2,
|
||||
3,2,2,0,2,2,0,2,2,2,2,2,0,2,0,2,0,2,0,0,0,
|
||||
2,2,3,2,2,3,3,3,3,2,3,2,3,2,3,2,3,3,0,3,
|
||||
0,2,3,0,0,2,3,2,0,3,2,0,2,2,2,0,0,0,2,0,0,
|
||||
2,3,3,3,3,2,3,2,2,2,2,3,2,2,2,2,3,2,2,2,
|
||||
0,2,2,0,0,2,0,0,0,3,2,2,0,2,0,2,0,2,0,2,0,
|
||||
3,3,3,3,3,3,3,3,0,3,3,3,2,3,2,2,2,2,3,2,
|
||||
3,3,2,3,2,2,0,2,3,2,0,2,0,0,0,2,0,2,0,0,0,
|
||||
0,0,3,2,0,3,3,2,3,3,3,0,3,0,3,3,2,3,0,2,
|
||||
0,3,0,0,0,2,3,3,0,3,0,0,0,0,0,2,0,0,2,2,0,
|
||||
2,0,3,0,0,3,2,2,2,2,2,0,3,0,0,2,3,3,0,3,
|
||||
0,0,0,0,0,3,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,2,0,0,2,3,3,3,3,2,0,0,0,3,0,
|
||||
0,0,3,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,2,3,2,2,0,0,2,2,3,2,2,2,3,2,0,3,0,
|
||||
0,0,2,0,0,0,0,0,0,3,0,2,0,0,0,2,0,0,0,2,0,
|
||||
2,3,2,3,3,0,2,0,0,0,0,3,2,2,0,0,0,0,2,2,
|
||||
0,0,2,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
|
||||
0,0,2,0,0,3,3,3,2,3,2,0,2,2,3,2,3,2,0,3,
|
||||
0,2,3,0,0,2,2,2,0,3,2,0,0,0,0,0,0,0,0,0,0,
|
||||
2,3,3,3,3,3,2,2,2,0,3,3,3,3,0,0,0,0,2,0,
|
||||
0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,2,0,0,
|
||||
3,3,2,3,3,2,2,0,2,3,3,2,0,3,0,2,2,0,2,2,
|
||||
3,0,0,0,2,0,0,0,2,0,2,2,0,2,0,0,0,2,0,0,0,
|
||||
0,0,2,2,0,0,3,3,0,2,2,0,2,0,2,2,3,2,0,3,
|
||||
0,2,2,0,0,2,3,2,0,0,0,0,0,0,0,2,0,0,0,0,0,
|
||||
3,3,3,3,3,2,2,2,2,3,3,0,0,2,2,2,2,2,2,0,
|
||||
2,0,0,0,2,0,0,0,0,0,2,2,2,0,0,0,0,0,0,0,0,
|
||||
0,0,2,0,0,2,3,2,2,2,2,0,2,0,2,2,2,2,0,3,
|
||||
0,2,2,0,0,3,2,2,0,0,2,0,0,0,0,0,0,0,0,0,0,
|
||||
2,2,2,2,3,3,0,0,3,2,2,2,2,2,2,2,2,0,2,0,
|
||||
2,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,2,0,0,0,0,
|
||||
2,0,0,2,0,0,2,0,0,0,0,0,2,3,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
|
||||
2,2,2,2,3,0,2,0,0,0,2,0,2,2,2,0,0,2,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,
|
||||
0,0,2,0,0,0,2,0,0,0,2,0,0,0,0,2,2,0,0,3,
|
||||
0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,
|
||||
2,0,0,2,0,0,0,0,0,0,0,0,2,2,0,0,0,0,2,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,0,0,2,0,2,0,0,0,0,0,0,0,2,0,0,2,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
};
|
||||
|
||||
|
||||
const SequenceModel Windows_1250CzechModel =
|
||||
{
|
||||
Windows_1250_CharToOrderMap,
|
||||
CzechLangModel,
|
||||
41,
|
||||
(float)0.9786035192432675,
|
||||
PR_TRUE,
|
||||
"WINDOWS-1250"
|
||||
};
|
||||
|
||||
const SequenceModel Mac_CentraleuropeCzechModel =
|
||||
{
|
||||
Mac_Centraleurope_CharToOrderMap,
|
||||
CzechLangModel,
|
||||
41,
|
||||
(float)0.9786035192432675,
|
||||
PR_TRUE,
|
||||
"MAC-CENTRALEUROPE"
|
||||
};
|
||||
|
||||
const SequenceModel Ibm852CzechModel =
|
||||
{
|
||||
Ibm852_CharToOrderMap,
|
||||
CzechLangModel,
|
||||
41,
|
||||
(float)0.9786035192432675,
|
||||
PR_TRUE,
|
||||
"IBM852"
|
||||
};
|
||||
|
||||
const SequenceModel Iso_8859_2CzechModel =
|
||||
{
|
||||
Iso_8859_2_CharToOrderMap,
|
||||
CzechLangModel,
|
||||
41,
|
||||
(float)0.9786035192432675,
|
||||
PR_TRUE,
|
||||
"ISO-8859-2"
|
||||
};
|
||||
@ -126,6 +126,11 @@ nsSBCSGroupProber::nsSBCSGroupProber()
|
||||
|
||||
mProbers[45] = new nsSingleByteCharSetProber(&Iso_8859_3MalteseModel);
|
||||
|
||||
mProbers[46] = new nsSingleByteCharSetProber(&Windows_1250CzechModel);
|
||||
mProbers[47] = new nsSingleByteCharSetProber(&Iso_8859_2CzechModel);
|
||||
mProbers[48] = new nsSingleByteCharSetProber(&Mac_CentraleuropeCzechModel);
|
||||
mProbers[49] = new nsSingleByteCharSetProber(&Ibm852CzechModel);
|
||||
|
||||
Reset();
|
||||
}
|
||||
|
||||
|
||||
@ -40,7 +40,7 @@
|
||||
#define nsSBCSGroupProber_h__
|
||||
|
||||
|
||||
#define NUM_OF_SBCS_PROBERS 46
|
||||
#define NUM_OF_SBCS_PROBERS 50
|
||||
|
||||
class nsCharSetProber;
|
||||
class nsSBCSGroupProber: public nsCharSetProber {
|
||||
|
||||
@ -187,5 +187,10 @@ extern const SequenceModel Windows_1252PortugueseModel;
|
||||
|
||||
extern const SequenceModel Iso_8859_3MalteseModel;
|
||||
|
||||
extern const SequenceModel Windows_1250CzechModel;
|
||||
extern const SequenceModel Iso_8859_2CzechModel;
|
||||
extern const SequenceModel Ibm852CzechModel;
|
||||
extern const SequenceModel Mac_CentraleuropeCzechModel;
|
||||
|
||||
#endif /* nsSingleByteCharSetProber_h__ */
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user