mirror of
https://gitlab.freedesktop.org/uchardet/uchardet.git
synced 2025-12-06 16:56:40 +08:00
LangModels: support for Maltese / ISO-8859-3.
Test text from https://mt.wikipedia.org/wiki/Franza.
This commit is contained in:
parent
be01360e92
commit
2700cf3a83
147
script/BuildLangModelLogs/LangMalteseModel.log
Normal file
147
script/BuildLangModelLogs/LangMalteseModel.log
Normal file
@ -0,0 +1,147 @@
|
|||||||
|
= Logs of language model for Maltese (mt) =
|
||||||
|
|
||||||
|
- Generated by BuildLangModel.py
|
||||||
|
- Started: 2016-09-21 02:05:23.411546
|
||||||
|
- Maximum depth: 5
|
||||||
|
- Max number of pages: 100
|
||||||
|
|
||||||
|
== Parsed pages ==
|
||||||
|
|
||||||
|
Unjoni Ewropea (revision 246298)
|
||||||
|
1951 (revision 229183)
|
||||||
|
1952 (revision 229184)
|
||||||
|
1957 (revision 229188)
|
||||||
|
1958 (revision 229189)
|
||||||
|
1973 (revision 223536)
|
||||||
|
1979 (revision 243876)
|
||||||
|
1981 (revision 205545)
|
||||||
|
1985 (revision 216368)
|
||||||
|
1986 (revision 231433)
|
||||||
|
1990 (revision 237666)
|
||||||
|
1992 (revision 244087)
|
||||||
|
1995 (revision 214650)
|
||||||
|
1 ta' Mejju (revision 245374)
|
||||||
|
2007 (revision 214851)
|
||||||
|
2013 (revision 245606)
|
||||||
|
Albanija (revision 243079)
|
||||||
|
Awstrija (revision 243627)
|
||||||
|
Awtonomija (revision 245824)
|
||||||
|
Ażores (revision 246298)
|
||||||
|
Bank Ċentrali Ewropew (revision 246298)
|
||||||
|
Belt kapitali (revision 237400)
|
||||||
|
Belġju (revision 244363)
|
||||||
|
Brussell (revision 243311)
|
||||||
|
Bulgarija (revision 243622)
|
||||||
|
Danimarka (revision 244419)
|
||||||
|
De facto (revision 215102)
|
||||||
|
Estonja (revision 243826)
|
||||||
|
European Free Trade Association (revision 246298)
|
||||||
|
Ewropa (revision 244177)
|
||||||
|
Ex Repubblika Jugoslava tal-Maċedonja (revision 246298)
|
||||||
|
Federazzjoni (revision 246226)
|
||||||
|
Finlandja (revision 245824)
|
||||||
|
Frankfurt (revision 243576)
|
||||||
|
Franza (revision 244461)
|
||||||
|
Greċja (revision 244423)
|
||||||
|
Groenlandja (revision 243829)
|
||||||
|
Indja (revision 244873)
|
||||||
|
Islanda (revision 243771)
|
||||||
|
Isle of Man (revision 246298)
|
||||||
|
Istitut tal-Unjoni Ewropea għall-Istudji dwar is-Sigurtà (revision 244412)
|
||||||
|
Italja (revision 246323)
|
||||||
|
Kilometru kwadru (revision 244871)
|
||||||
|
Komunitajiet Ewropej (revision 246298)
|
||||||
|
Komunità Ekonomika Ewropea (revision 246298)
|
||||||
|
Kroazja (revision 245711)
|
||||||
|
Kummissjoni Ewropea (revision 243311)
|
||||||
|
Kunsill Ewropew (revision 246298)
|
||||||
|
Kunsill tal-Ewropa (revision 243334)
|
||||||
|
Kunsill tal-Unjoni Ewropea (revision 243311)
|
||||||
|
Latvja (revision 245746)
|
||||||
|
Lista ta' pajjiżi skont id-daqs (revision 244419)
|
||||||
|
Lista ta' pajjiżi skont il-popolazzjoni (revision 246128)
|
||||||
|
Litwanja (revision 243114)
|
||||||
|
Liġijiet tal-Unjoni Ewropea (revision 246298)
|
||||||
|
Lussemburgu (revision 244239)
|
||||||
|
Lussemburgu (belt) (revision 243587)
|
||||||
|
Madejra (revision 243625)
|
||||||
|
Malta (revision 247210)
|
||||||
|
Montenegro (revision 243930)
|
||||||
|
Norveġja (revision 243829)
|
||||||
|
Olanda (revision 243989)
|
||||||
|
Organizzazzjoni Internazzjonali (revision 246724)
|
||||||
|
Pajjiżi l-Baxxi (revision 243989)
|
||||||
|
Pajjiżi membri tal-Unjoni Ewropea (revision 243625)
|
||||||
|
Pajjiżi ġirien li jdawru l-Unjoni Ewropea (revision 246298)
|
||||||
|
Parlament Ewropew (revision 243907)
|
||||||
|
Patt ta' Stabilità u Tkabbir (revision 246298)
|
||||||
|
Politika agrikola komuni (revision 244363)
|
||||||
|
Politika reġjonali tal-Unjoni Ewropea (revision 246298)
|
||||||
|
Polonja (revision 244530)
|
||||||
|
Portugall (revision 243625)
|
||||||
|
Relazzjonijiet ta' terzi pajjiżi ma l-UE (revision 246298)
|
||||||
|
Renju Unit (revision 247318)
|
||||||
|
Repubblika Federali tal-Ġermanja (revision 244859)
|
||||||
|
Repubblika tal-Irlanda (revision 243686)
|
||||||
|
Repubblika Ċeka (revision 246832)
|
||||||
|
Rumanija (revision 243623)
|
||||||
|
Segretarjat tal-Parlament Ewropew (revision 246298)
|
||||||
|
Serbja (revision 243728)
|
||||||
|
Slovakkja (revision 243831)
|
||||||
|
Slovenja (revision 244588)
|
||||||
|
Spanja (revision 246856)
|
||||||
|
Stati Uniti tal-Amerika (revision 243926)
|
||||||
|
Stati membri tal-Unjoni Ewropea (revision 243114)
|
||||||
|
Strasburgu (revision 243503)
|
||||||
|
Sui generis (revision 247150)
|
||||||
|
Suq komuni (revision 246298)
|
||||||
|
Svezja (revision 244871)
|
||||||
|
|
||||||
|
== End of Parsed pages ==
|
||||||
|
|
||||||
|
- Wikipedia parsing ended at: 2016-09-21 02:07:45.508113
|
||||||
|
|
||||||
|
48 characters appeared 474337 times.
|
||||||
|
|
||||||
|
First 31 characters:
|
||||||
|
[ 0] Char a: 12.326257492036252 %
|
||||||
|
[ 1] Char i: 12.069899670487438 %
|
||||||
|
[ 2] Char t: 8.064941170518008 %
|
||||||
|
[ 3] Char l: 7.795301652622502 %
|
||||||
|
[ 4] Char e: 6.615971345267184 %
|
||||||
|
[ 5] Char n: 6.128132530247482 %
|
||||||
|
[ 6] Char r: 5.579577389071483 %
|
||||||
|
[ 7] Char u: 4.376424356522894 %
|
||||||
|
[ 8] Char o: 3.8337721915009797 %
|
||||||
|
[ 9] Char j: 3.7378488289971057 %
|
||||||
|
[10] Char m: 3.6084049947611088 %
|
||||||
|
[11] Char s: 3.3533120966738834 %
|
||||||
|
[12] Char k: 2.588033402412209 %
|
||||||
|
[13] Char d: 2.3173397816320462 %
|
||||||
|
[14] Char p: 2.0555006250830106 %
|
||||||
|
[15] Char b: 2.017131280081461 %
|
||||||
|
[16] Char f: 2.004692866042497 %
|
||||||
|
[17] Char ħ: 1.6372326004507345 %
|
||||||
|
[18] Char w: 1.4801712706366992 %
|
||||||
|
[19] Char g: 1.4763765002519307 %
|
||||||
|
[20] Char z: 1.3150987588992635 %
|
||||||
|
[21] Char ż: 0.9910675321554084 %
|
||||||
|
[22] Char h: 0.9750451683086075 %
|
||||||
|
[23] Char ġ: 0.7640137708000851 %
|
||||||
|
[24] Char ċ: 0.6723068198348432 %
|
||||||
|
[25] Char x: 0.5892435125237964 %
|
||||||
|
[26] Char v: 0.5668965313690478 %
|
||||||
|
[27] Char q: 0.5647883255997318 %
|
||||||
|
[28] Char c: 0.2759641352034524 %
|
||||||
|
[29] Char à: 0.10730767365817974 %
|
||||||
|
[30] Char y: 0.059029761540845424 %
|
||||||
|
|
||||||
|
The first 31 characters have an accumulated ratio of 0.9994708403519017.
|
||||||
|
|
||||||
|
870 sequences found.
|
||||||
|
|
||||||
|
First 512 (typical positive ratio): 0.9959115850692665
|
||||||
|
Next 512 (512-1024): 2.108205769315908e-06
|
||||||
|
Rest: -4.423544863740858e-17
|
||||||
|
|
||||||
|
- Processing end: 2016-09-21 02:07:45.646198
|
||||||
80
script/langs/mt.py
Normal file
80
script/langs/mt.py
Normal file
@ -0,0 +1,80 @@
|
|||||||
|
#!/bin/python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
# ##### BEGIN LICENSE BLOCK #####
|
||||||
|
# Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||||
|
#
|
||||||
|
# The contents of this file are subject to the Mozilla Public License Version
|
||||||
|
# 1.1 (the "License"); you may not use this file except in compliance with
|
||||||
|
# the License. You may obtain a copy of the License at
|
||||||
|
# http://www.mozilla.org/MPL/
|
||||||
|
#
|
||||||
|
# Software distributed under the License is distributed on an "AS IS" basis,
|
||||||
|
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||||
|
# for the specific language governing rights and limitations under the
|
||||||
|
# License.
|
||||||
|
#
|
||||||
|
# The Original Code is Mozilla Universal charset detector code.
|
||||||
|
#
|
||||||
|
# The Initial Developer of the Original Code is
|
||||||
|
# Netscape Communications Corporation.
|
||||||
|
# Portions created by the Initial Developer are Copyright (C) 2001
|
||||||
|
# the Initial Developer. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Contributor(s):
|
||||||
|
# Jehan <jehan@girinstud.io>
|
||||||
|
#
|
||||||
|
# Alternatively, the contents of this file may be used under the terms of
|
||||||
|
# either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||||
|
# the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||||
|
# in which case the provisions of the GPL or the LGPL are applicable instead
|
||||||
|
# of those above. If you wish to allow use of your version of this file only
|
||||||
|
# under the terms of either the GPL or the LGPL, and not to allow others to
|
||||||
|
# use your version of this file under the terms of the MPL, indicate your
|
||||||
|
# decision by deleting the provisions above and replace them with the notice
|
||||||
|
# and other provisions required by the GPL or the LGPL. If you do not delete
|
||||||
|
# the provisions above, a recipient may use your version of this file under
|
||||||
|
# the terms of any one of the MPL, the GPL or the LGPL.
|
||||||
|
#
|
||||||
|
# ##### END LICENSE BLOCK #####
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
## Mandatory Properties ##
|
||||||
|
|
||||||
|
# The human name for the language, in English.
|
||||||
|
name = 'Maltese'
|
||||||
|
# Use 2-letter ISO 639-1 if possible, 3-letter ISO code otherwise,
|
||||||
|
# or use another catalog as a last resort.
|
||||||
|
code = 'mt'
|
||||||
|
# ASCII characters are also used in French.
|
||||||
|
use_ascii = True
|
||||||
|
# The charsets we want to support and create data for.
|
||||||
|
charsets = ['ISO-8859-3']
|
||||||
|
|
||||||
|
## Optional Properties ##
|
||||||
|
|
||||||
|
# Alphabet characters.
|
||||||
|
# If use_ascii=True, there is no need to add any ASCII characters.
|
||||||
|
# If case_mapping=True, there is no need to add several cases of a same
|
||||||
|
# character (provided Python algorithms know the right cases).
|
||||||
|
alphabet = 'ċġħż'
|
||||||
|
# The starred page which was rewarded on the main page when I created
|
||||||
|
# the data.
|
||||||
|
start_pages = ['Unjoni Ewropea']
|
||||||
|
# give possibility to select another code for the Wikipedia URL.
|
||||||
|
wikipedia_code = code
|
||||||
|
# 'a' and 'A' will be considered the same character, and so on.
|
||||||
|
# This uses Python algorithm to determine upper/lower-case of a given
|
||||||
|
# character.
|
||||||
|
case_mapping = True
|
||||||
|
|
||||||
|
# A function to clean content returned by the `wikipedia` python lib,
|
||||||
|
# in case some unwanted data has been overlooked.
|
||||||
|
# Note that we are already cleaning away the '=' from the title syntax
|
||||||
|
# of Wikipedia, as well as double spaces. But sometimes, Wikipedia in
|
||||||
|
# some language may return weird syntax or UI text which should be
|
||||||
|
# discarded. If you encounter one of these cases, use this function.
|
||||||
|
def clean_wikipedia_content(content):
|
||||||
|
# Do your garbage text cleaning here.
|
||||||
|
return content
|
||||||
@ -19,6 +19,7 @@ set(
|
|||||||
LangModels/LangHebrewModel.cpp
|
LangModels/LangHebrewModel.cpp
|
||||||
LangModels/LangLithuanianModel.cpp
|
LangModels/LangLithuanianModel.cpp
|
||||||
LangModels/LangLatvianModel.cpp
|
LangModels/LangLatvianModel.cpp
|
||||||
|
LangModels/LangMalteseModel.cpp
|
||||||
LangModels/LangPortugueseModel.cpp
|
LangModels/LangPortugueseModel.cpp
|
||||||
LangModels/LangSpanishModel.cpp
|
LangModels/LangSpanishModel.cpp
|
||||||
LangModels/LangThaiModel.cpp
|
LangModels/LangThaiModel.cpp
|
||||||
|
|||||||
137
src/LangModels/LangMalteseModel.cpp
Normal file
137
src/LangModels/LangMalteseModel.cpp
Normal file
@ -0,0 +1,137 @@
|
|||||||
|
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||||
|
/* ***** BEGIN LICENSE BLOCK *****
|
||||||
|
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||||
|
*
|
||||||
|
* The contents of this file are subject to the Mozilla Public License Version
|
||||||
|
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
* http://www.mozilla.org/MPL/
|
||||||
|
*
|
||||||
|
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||||
|
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||||
|
* for the specific language governing rights and limitations under the
|
||||||
|
* License.
|
||||||
|
*
|
||||||
|
* The Original Code is Mozilla Communicator client code.
|
||||||
|
*
|
||||||
|
* The Initial Developer of the Original Code is
|
||||||
|
* Netscape Communications Corporation.
|
||||||
|
* Portions created by the Initial Developer are Copyright (C) 1998
|
||||||
|
* the Initial Developer. All Rights Reserved.
|
||||||
|
*
|
||||||
|
* Contributor(s):
|
||||||
|
*
|
||||||
|
* Alternatively, the contents of this file may be used under the terms of
|
||||||
|
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||||
|
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||||
|
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||||
|
* of those above. If you wish to allow use of your version of this file only
|
||||||
|
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||||
|
* use your version of this file under the terms of the MPL, indicate your
|
||||||
|
* decision by deleting the provisions above and replace them with the notice
|
||||||
|
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||||
|
* the provisions above, a recipient may use your version of this file under
|
||||||
|
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||||
|
*
|
||||||
|
* ***** END LICENSE BLOCK ***** */
|
||||||
|
|
||||||
|
#include "../nsSBCharSetProber.h"
|
||||||
|
|
||||||
|
/********* Language model for: Maltese *********/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generated by BuildLangModel.py
|
||||||
|
* On: 2016-09-21 02:07:45.509404
|
||||||
|
**/
|
||||||
|
|
||||||
|
/* Character Mapping Table:
|
||||||
|
* ILL: illegal character.
|
||||||
|
* CTR: control character specific to the charset.
|
||||||
|
* RET: carriage/return.
|
||||||
|
* SYM: symbol (punctuation) that does not belong to word.
|
||||||
|
* NUM: 0 - 9.
|
||||||
|
*
|
||||||
|
* Other characters are ordered by probabilities
|
||||||
|
* (0 is the most common character in the language).
|
||||||
|
*
|
||||||
|
* Orders are generic to a language. So the codepoint with order X in
|
||||||
|
* CHARSET1 maps to the same character as the codepoint with the same
|
||||||
|
* order X in CHARSET2 for the same language.
|
||||||
|
* As such, it is possible to get missing order. For instance the
|
||||||
|
* ligature of 'o' and 'e' exists in ISO-8859-15 but not in ISO-8859-1
|
||||||
|
* even though they are both used for French. Same for the euro sign.
|
||||||
|
*/
|
||||||
|
static const unsigned char Iso_8859_3_CharToOrderMap[] =
|
||||||
|
{
|
||||||
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
|
||||||
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
|
||||||
|
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
|
||||||
|
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
|
||||||
|
SYM, 0, 15, 28, 13, 4, 16, 19, 22, 1, 9, 12, 3, 10, 5, 8, /* 4X */
|
||||||
|
14, 27, 6, 11, 2, 7, 26, 18, 25, 30, 20,SYM,SYM,SYM,SYM,SYM, /* 5X */
|
||||||
|
SYM, 0, 15, 28, 13, 4, 16, 19, 22, 1, 9, 12, 3, 10, 5, 8, /* 6X */
|
||||||
|
14, 27, 6, 11, 2, 7, 26, 18, 25, 30, 20,SYM,SYM,SYM,SYM,CTR, /* 7X */
|
||||||
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
|
||||||
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
|
||||||
|
SYM, 17,SYM,SYM,SYM,ILL, 48,SYM,SYM, 49, 50, 51, 52,SYM,ILL, 21, /* AX */
|
||||||
|
SYM, 17,SYM,SYM,SYM,SYM, 53,SYM,SYM, 54, 55, 56, 57,SYM,ILL, 21, /* BX */
|
||||||
|
29, 36, 47,ILL, 58, 24, 59, 40, 33, 31, 60, 39, 45, 35, 61, 62, /* CX */
|
||||||
|
ILL, 37, 32, 34, 44, 23, 38,SYM, 63, 43, 42, 64, 46, 65, 66, 41, /* DX */
|
||||||
|
29, 36, 47,ILL, 67, 24, 68, 40, 33, 31, 69, 39, 45, 35, 70, 71, /* EX */
|
||||||
|
ILL, 37, 32, 34, 44, 23, 38,SYM, 72, 43, 42, 73, 46, 74, 75,SYM, /* FX */
|
||||||
|
};
|
||||||
|
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
|
||||||
|
|
||||||
|
|
||||||
|
/* Model Table:
|
||||||
|
* Total sequences: 870
|
||||||
|
* First 512 sequences: 0.9959115850692665
|
||||||
|
* Next 512 sequences (512-1024): 0.004088414930733575
|
||||||
|
* Rest: -4.423544863740858e-17
|
||||||
|
* Negative sequences: TODO
|
||||||
|
*/
|
||||||
|
static const PRUint8 MalteseLangModel[] =
|
||||||
|
{
|
||||||
|
3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,0,2,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,3,3,0,2,3,3,3,3,3,2,0,3,0,0,3,3,3,2,3,3,
|
||||||
|
3,3,3,3,3,2,2,3,3,3,3,3,3,3,2,3,3,2,3,3,2,0,3,3,0,3,3,3,2,0,2,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,
|
||||||
|
3,3,3,3,3,3,2,3,3,3,2,3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,3,3,3,0,2,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,3,3,2,3,3,3,3,3,2,3,0,3,
|
||||||
|
3,3,3,3,3,3,3,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,2,
|
||||||
|
3,3,2,2,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,0,3,3,2,2,2,2,2,0,0,0,
|
||||||
|
3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,0,2,3,2,2,3,2,2,2,0,2,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,0,0,3,2,0,0,3,3,3,2,2,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,3,3,0,2,3,3,2,3,0,0,0,2,0,3,2,0,0,0,0,2,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,2,3,3,2,2,0,3,0,0,2,2,0,2,2,2,
|
||||||
|
3,3,2,3,3,2,3,3,3,3,2,3,2,2,3,0,0,0,2,3,0,0,3,0,2,0,2,0,2,0,0,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,0,3,3,3,0,3,2,3,3,3,0,3,2,0,0,2,0,3,3,0,2,
|
||||||
|
3,3,3,3,3,3,3,3,3,3,0,3,2,2,0,2,3,0,0,2,0,0,2,0,0,0,0,2,2,0,2,
|
||||||
|
3,3,3,3,3,2,3,3,3,3,3,3,2,3,0,3,2,3,2,0,0,2,3,2,0,2,0,3,0,0,0,
|
||||||
|
3,3,3,3,3,3,3,2,3,2,2,3,3,3,2,2,2,2,3,2,0,2,2,3,2,3,2,2,0,0,2,
|
||||||
|
3,3,2,3,3,3,3,3,3,2,2,2,2,3,2,2,0,3,3,3,2,3,3,0,0,0,3,0,2,2,3,
|
||||||
|
3,3,2,2,3,2,2,3,2,3,2,0,0,0,2,0,0,0,2,2,3,0,0,0,0,0,2,2,0,0,0,
|
||||||
|
3,3,2,3,3,2,0,3,3,3,3,0,0,3,0,2,2,0,2,3,0,3,0,0,0,0,3,0,0,0,0,
|
||||||
|
3,3,3,2,3,2,3,3,3,0,3,2,2,2,2,2,0,0,2,0,2,0,2,0,0,0,0,2,0,0,2,
|
||||||
|
3,3,2,2,3,3,3,3,3,3,2,0,0,3,0,2,0,2,2,3,2,2,0,3,0,0,2,0,0,2,0,
|
||||||
|
3,3,2,2,3,0,2,2,0,3,0,0,2,0,2,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,
|
||||||
|
3,3,3,3,3,2,3,3,3,3,3,0,2,2,0,3,2,0,2,0,0,0,3,0,0,3,2,0,2,0,0,
|
||||||
|
3,3,0,2,3,2,3,3,3,3,0,2,0,3,2,0,0,0,0,0,0,0,0,0,0,0,3,0,2,0,2,
|
||||||
|
3,3,3,2,3,0,3,3,3,3,2,3,2,3,0,3,3,0,3,3,0,0,2,2,2,2,0,3,0,2,0,
|
||||||
|
3,3,3,3,3,0,2,2,3,2,0,3,3,3,0,2,3,0,0,0,2,0,3,0,0,0,0,2,2,0,2,
|
||||||
|
0,2,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
2,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,2,0,0,0,0,0,0,0,2,0,2,0,2,
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
const SequenceModel Iso_8859_3MalteseModel =
|
||||||
|
{
|
||||||
|
Iso_8859_3_CharToOrderMap,
|
||||||
|
MalteseLangModel,
|
||||||
|
31,
|
||||||
|
(float)0.9959115850692665,
|
||||||
|
PR_TRUE,
|
||||||
|
"ISO-8859-3"
|
||||||
|
};
|
||||||
@ -124,6 +124,8 @@ nsSBCSGroupProber::nsSBCSGroupProber()
|
|||||||
mProbers[43] = new nsSingleByteCharSetProber(&Iso_8859_15PortugueseModel);
|
mProbers[43] = new nsSingleByteCharSetProber(&Iso_8859_15PortugueseModel);
|
||||||
mProbers[44] = new nsSingleByteCharSetProber(&Windows_1252PortugueseModel);
|
mProbers[44] = new nsSingleByteCharSetProber(&Windows_1252PortugueseModel);
|
||||||
|
|
||||||
|
mProbers[45] = new nsSingleByteCharSetProber(&Iso_8859_3MalteseModel);
|
||||||
|
|
||||||
Reset();
|
Reset();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -40,7 +40,7 @@
|
|||||||
#define nsSBCSGroupProber_h__
|
#define nsSBCSGroupProber_h__
|
||||||
|
|
||||||
|
|
||||||
#define NUM_OF_SBCS_PROBERS 45
|
#define NUM_OF_SBCS_PROBERS 46
|
||||||
|
|
||||||
class nsCharSetProber;
|
class nsCharSetProber;
|
||||||
class nsSBCSGroupProber: public nsCharSetProber {
|
class nsSBCSGroupProber: public nsCharSetProber {
|
||||||
|
|||||||
@ -185,5 +185,7 @@ extern const SequenceModel Iso_8859_9PortugueseModel;
|
|||||||
extern const SequenceModel Iso_8859_15PortugueseModel;
|
extern const SequenceModel Iso_8859_15PortugueseModel;
|
||||||
extern const SequenceModel Windows_1252PortugueseModel;
|
extern const SequenceModel Windows_1252PortugueseModel;
|
||||||
|
|
||||||
|
extern const SequenceModel Iso_8859_3MalteseModel;
|
||||||
|
|
||||||
#endif /* nsSingleByteCharSetProber_h__ */
|
#endif /* nsSingleByteCharSetProber_h__ */
|
||||||
|
|
||||||
|
|||||||
4
test/mt/iso-8859-3.txt
Normal file
4
test/mt/iso-8859-3.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
Franza (Franåi¿:France), uffiåjalment ir-Repubblika Franåi¿a (Franåi¿:
|
||||||
|
République française), hi pajji¿ fl-Ewropa tal-Punent. Il-belt belt kapitali
|
||||||
|
tag±ha hi Pariõi. Hi membru tal-Unjoni Ewropea. Franza hi maqsuma f'22 régions
|
||||||
|
li huma suddivi¿i f' départements.
|
||||||
4
test/mt/utf-8.txt
Normal file
4
test/mt/utf-8.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
Franza (Franċiż:France), uffiċjalment ir-Repubblika Franċiża (Franċiż:
|
||||||
|
République française), hi pajjiż fl-Ewropa tal-Punent. Il-belt belt kapitali
|
||||||
|
tagħha hi Pariġi. Hi membru tal-Unjoni Ewropea. Franza hi maqsuma f'22 régions
|
||||||
|
li huma suddiviżi f' départements.
|
||||||
Loading…
x
Reference in New Issue
Block a user