mirror of
https://gitlab.freedesktop.org/uchardet/uchardet.git
synced 2025-12-06 16:56:40 +08:00
LangModels: add support for Lithuanian / ISO-8859-13.
Test text extracted from https://lt.wikipedia.org/wiki/Vincent_van_Gogh.
This commit is contained in:
parent
2a559e7b52
commit
7cb3dd9ddd
162
script/BuildLangModelLogs/LangLithuanianModel.log
Normal file
162
script/BuildLangModelLogs/LangLithuanianModel.log
Normal file
@ -0,0 +1,162 @@
|
||||
= Logs of language model for Lithuanian (lt) =
|
||||
|
||||
- Generated by BuildLangModel.py
|
||||
- Started: 2016-09-20 22:53:23.311784
|
||||
- Maximum depth: 5
|
||||
- Max number of pages: 100
|
||||
|
||||
== Parsed pages ==
|
||||
|
||||
Karūna (laivas) (revision 5080379)
|
||||
1650 (revision 4990868)
|
||||
1654 (revision 4991037)
|
||||
1664 (revision 4991048)
|
||||
1665 (revision 4991050)
|
||||
1668 (revision 4991052)
|
||||
1669 (revision 4991053)
|
||||
1672 (revision 4991056)
|
||||
1676 (revision 4991060)
|
||||
1718 (revision 4990914)
|
||||
1909 (revision 4990667)
|
||||
1928 (revision 4990262)
|
||||
1932 (revision 4990613)
|
||||
1956 (revision 4990635)
|
||||
1980 (revision 4990655)
|
||||
Baltijos jūra (revision 5052833)
|
||||
Burinis laivas (revision 4657401)
|
||||
Flagmanas (laivas) (revision 5005271)
|
||||
Grimzlė (revision 4487052)
|
||||
Kalmaras (Švedija) (revision 4978519)
|
||||
Karo laivas (revision 4726931)
|
||||
Karolis XI (revision 4944621)
|
||||
Karolis XII (revision 4915230)
|
||||
Kilis (revision 4325533)
|
||||
Koordinačių sistema (revision 5033980)
|
||||
Laivo vėliava (revision 4986001)
|
||||
Liepos 1 d. (revision 4910200)
|
||||
Nyderlandai (revision 5080140)
|
||||
Rugpjūčio 10 (revision 4910281)
|
||||
Varytuvas (revision 4620792)
|
||||
Vaza (laivas) (revision 5079282)
|
||||
XVIII a. (revision 4896219)
|
||||
XVII a. (revision 4768242)
|
||||
Švedija (revision 5057665)
|
||||
Švedų kalba (revision 4687559)
|
||||
1590 (revision 4990983)
|
||||
1596 (revision 4990989)
|
||||
1608 (revision 4991000)
|
||||
1610 (revision 4991002)
|
||||
1623 m. (revision 4991015)
|
||||
1634 m. (revision 4991026)
|
||||
1643 m. (revision 4990870)
|
||||
1644 m. (revision 4990872)
|
||||
1645 m. (revision 4990873)
|
||||
1646 m. (revision 4990874)
|
||||
1647 m. (revision 4913295)
|
||||
1648 m. (revision 4990875)
|
||||
1649 m. (revision 4990876)
|
||||
1651 m. (revision 4991035)
|
||||
1652 m. (revision 4991072)
|
||||
1653 m. (revision 4991036)
|
||||
1654 m. (revision 4991037)
|
||||
1655 m. (revision 4991038)
|
||||
1662 m. (revision 4991046)
|
||||
1668 m. (revision 4991052)
|
||||
1677 m. (revision 4991061)
|
||||
1702 (revision 4990595)
|
||||
1704 (revision 4990863)
|
||||
1722 (revision 4990918)
|
||||
1723 (revision 4990919)
|
||||
1737 (revision 4990931)
|
||||
2 tūkstantmetis (revision 4296407)
|
||||
ATR (revision 5078529)
|
||||
Abiejų Tautų Respublika (revision 5078529)
|
||||
Adomas Freitagas (revision 4362991)
|
||||
Anglų kalba (revision 4911240)
|
||||
Armėnų kalendorius (revision 4817534)
|
||||
Bahajų kalendorius (revision 4706296)
|
||||
Bajorai (revision 5006456)
|
||||
Berberų kalendorius (revision 4926904)
|
||||
Birželio 21 (revision 4910142)
|
||||
Bizantijos kalendorius (revision 4927623)
|
||||
Budistų kalendorius (revision 4705734)
|
||||
Dešimtmetis (revision 4296419)
|
||||
Dominikonai (revision 4921895)
|
||||
Dominikonų ordinas (revision 4921895)
|
||||
Džohoro sultonatas (revision 4934526)
|
||||
Džu Ihai (revision 4991072)
|
||||
Džu Joulang (revision 4991072)
|
||||
Emanuelis Vladislovas Tiškevičius Logoiskis (revision 4939239)
|
||||
Filosofas (revision 5078172)
|
||||
Gegužės 26 (revision 4910130)
|
||||
Grafas (titulas) (revision 5008057)
|
||||
Grigaliaus kalendorius (revision 5000317)
|
||||
Hebrajų kalendorius (revision 4728592)
|
||||
Imperatorius Go-Komijas (revision 4907057)
|
||||
Inocentas X (revision 4905150)
|
||||
Iraniečių kalendorius (revision 4964854)
|
||||
Isaac Titsingh (revision 4990745)
|
||||
Japonija (revision 5035249)
|
||||
Japonijos imperatorius (revision 4720428)
|
||||
Japonų kalendorius (revision 4956765)
|
||||
John Churchill (revision 4903704)
|
||||
Jonas Kazimieras Vaza (revision 5037754)
|
||||
Jurgis Kasakauskis (revision 5047829)
|
||||
Jurgis Kazimieras Ancuta (revision 5059404)
|
||||
Jurgis Mikalojus Tiškevičius (revision 4939554)
|
||||
|
||||
== End of Parsed pages ==
|
||||
|
||||
- Wikipedia parsing ended at: 2016-09-20 22:57:33.076907
|
||||
|
||||
53 characters appeared 353013 times.
|
||||
|
||||
First 38 characters:
|
||||
[ 0] Char i: 13.033797622183885 %
|
||||
[ 1] Char a: 11.1684272250597 %
|
||||
[ 2] Char s: 8.587502443252799 %
|
||||
[ 3] Char o: 7.01957151719625 %
|
||||
[ 4] Char e: 5.52642537243671 %
|
||||
[ 5] Char r: 5.469770235090492 %
|
||||
[ 6] Char n: 5.143153368289553 %
|
||||
[ 7] Char t: 5.1063275290145125 %
|
||||
[ 8] Char u: 4.270947528844546 %
|
||||
[ 9] Char k: 3.9621770303076653 %
|
||||
[10] Char l: 3.905521892961449 %
|
||||
[11] Char m: 3.360216196004113 %
|
||||
[12] Char d: 3.037565188817409 %
|
||||
[13] Char v: 2.727378311846872 %
|
||||
[14] Char j: 2.447501933356562 %
|
||||
[15] Char p: 2.3293759719897 %
|
||||
[16] Char g: 1.942987935288502 %
|
||||
[17] Char ė: 1.56594799624943 %
|
||||
[18] Char b: 1.5075932047828267 %
|
||||
[19] Char y: 1.223750966678281 %
|
||||
[20] Char ų: 1.1818261650420805 %
|
||||
[21] Char š: 0.9631373348856841 %
|
||||
[22] Char ž: 0.8172503562191761 %
|
||||
[23] Char c: 0.5960120448821998 %
|
||||
[24] Char č: 0.48015228900918666 %
|
||||
[25] Char f: 0.42831283833739836 %
|
||||
[26] Char h: 0.42519680578335645 %
|
||||
[27] Char z: 0.40111837241121434 %
|
||||
[28] Char ū: 0.3685416684371397 %
|
||||
[29] Char ą: 0.352678229980199 %
|
||||
[30] Char į: 0.29007430321262956 %
|
||||
[31] Char ę: 0.1481531841603567 %
|
||||
[32] Char x: 0.08753218719990481 %
|
||||
[33] Char w: 0.05920461852679646 %
|
||||
[34] Char ō: 0.018129643950789347 %
|
||||
[35] Char ö: 0.00878154628866359 %
|
||||
[36] Char é: 0.007648443541739256 %
|
||||
[37] Char q: 0.0073651678550081725 %
|
||||
|
||||
The first 38 characters have an accumulated ratio of 0.9997705466937479.
|
||||
|
||||
976 sequences found.
|
||||
|
||||
First 512 (typical positive ratio): 0.9930868640383149
|
||||
Next 512 (512-1024): 0.008172503562191761
|
||||
Rest: -2.688821387764051e-17
|
||||
|
||||
- Processing end: 2016-09-20 22:57:33.185223
|
||||
72
script/charsets/iso-8859-13.py
Normal file
72
script/charsets/iso-8859-13.py
Normal file
@ -0,0 +1,72 @@
|
||||
#!/usr/bin/python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# ##### BEGIN LICENSE BLOCK #####
|
||||
# Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
#
|
||||
# The contents of this file are subject to the Mozilla Public License Version
|
||||
# 1.1 (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
# http://www.mozilla.org/MPL/
|
||||
#
|
||||
# Software distributed under the License is distributed on an "AS IS" basis,
|
||||
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
# for the specific language governing rights and limitations under the
|
||||
# License.
|
||||
#
|
||||
# The Original Code is Mozilla Universal charset detector code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 2001
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Jehan <jehan@girinstud.io>
|
||||
#
|
||||
# Alternatively, the contents of this file may be used under the terms of
|
||||
# either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
# the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
# in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
# of those above. If you wish to allow use of your version of this file only
|
||||
# under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
# use your version of this file under the terms of the MPL, indicate your
|
||||
# decision by deleting the provisions above and replace them with the notice
|
||||
# and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
# the provisions above, a recipient may use your version of this file under
|
||||
# the terms of any one of the MPL, the GPL or the LGPL.
|
||||
#
|
||||
# ##### END LICENSE BLOCK #####
|
||||
|
||||
from codepoints import *
|
||||
|
||||
name = 'ISO-8859-13'
|
||||
aliases = ['csISO885913']
|
||||
|
||||
language = \
|
||||
{
|
||||
# Designed to cover Baltic languages.
|
||||
'complete': [ 'lv', 'lt' ],
|
||||
'incomplete': []
|
||||
}
|
||||
|
||||
# X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF #
|
||||
charmap = \
|
||||
[
|
||||
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, # 0X
|
||||
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, # 1X
|
||||
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, # 2X
|
||||
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, # 3X
|
||||
SYM,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # 4X
|
||||
LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,SYM,SYM,SYM,SYM,SYM, # 5X
|
||||
SYM,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # 6X
|
||||
LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,SYM,SYM,SYM,SYM,CTR, # 7X
|
||||
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, # 8X
|
||||
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, # 9X
|
||||
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,LET,SYM,LET,SYM,SYM,SYM,SYM,LET, # AX
|
||||
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,LET,SYM,LET,SYM,SYM,SYM,SYM,LET, # BX
|
||||
LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # CX
|
||||
LET,LET,LET,LET,LET,LET,LET,SYM,LET,LET,LET,LET,LET,LET,LET,LET, # DX
|
||||
LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET,LET, # EX
|
||||
LET,LET,LET,LET,LET,LET,LET,SYM,LET,LET,LET,LET,LET,LET,LET,SYM, # FX
|
||||
]
|
||||
70
script/langs/lt.py
Normal file
70
script/langs/lt.py
Normal file
@ -0,0 +1,70 @@
|
||||
#!/bin/python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# ##### BEGIN LICENSE BLOCK #####
|
||||
# Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
#
|
||||
# The contents of this file are subject to the Mozilla Public License Version
|
||||
# 1.1 (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
# http://www.mozilla.org/MPL/
|
||||
#
|
||||
# Software distributed under the License is distributed on an "AS IS" basis,
|
||||
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
# for the specific language governing rights and limitations under the
|
||||
# License.
|
||||
#
|
||||
# The Original Code is Mozilla Universal charset detector code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 2001
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Jehan <jehan@girinstud.io>
|
||||
#
|
||||
# Alternatively, the contents of this file may be used under the terms of
|
||||
# either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
# the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
# in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
# of those above. If you wish to allow use of your version of this file only
|
||||
# under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
# use your version of this file under the terms of the MPL, indicate your
|
||||
# decision by deleting the provisions above and replace them with the notice
|
||||
# and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
# the provisions above, a recipient may use your version of this file under
|
||||
# the terms of any one of the MPL, the GPL or the LGPL.
|
||||
#
|
||||
# ##### END LICENSE BLOCK #####
|
||||
|
||||
import re
|
||||
|
||||
## Mandatory Properties ##
|
||||
|
||||
# The human name for the language, in English.
|
||||
name = 'Lithuanian'
|
||||
# Use 2-letter ISO 639-1 if possible, 3-letter ISO code otherwise,
|
||||
# or use another catalog as a last resort.
|
||||
code = 'lt'
|
||||
# ASCII characters are also used.
|
||||
use_ascii = True
|
||||
# The charsets we want to support and create data for.
|
||||
charsets = ['ISO-8859-13']
|
||||
|
||||
## Optional Properties ##
|
||||
|
||||
# Alphabet characters.
|
||||
# If use_ascii=True, there is no need to add any ASCII characters.
|
||||
# If case_mapping=True, there is no need to add several cases of a same
|
||||
# character (provided Python algorithms know the right cases).
|
||||
alphabet = 'ąčęėįšųūž'
|
||||
# The start page. Just taking the page which was in front page the day
|
||||
# I created the data.
|
||||
start_pages = ['Karūna (laivas)']
|
||||
# give possibility to select another code for the Wikipedia URL.
|
||||
wikipedia_code = code
|
||||
# 'a' and 'A' will be considered the same character, and so on.
|
||||
# This uses Python algorithm to determine upper/lower-case of a given
|
||||
# character.
|
||||
case_mapping = True
|
||||
@ -17,6 +17,7 @@ set(
|
||||
LangModels/LangGreekModel.cpp
|
||||
LangModels/LangHungarianModel.cpp
|
||||
LangModels/LangHebrewModel.cpp
|
||||
LangModels/LangLithuanianModel.cpp
|
||||
LangModels/LangSpanishModel.cpp
|
||||
LangModels/LangThaiModel.cpp
|
||||
LangModels/LangTurkishModel.cpp
|
||||
|
||||
144
src/LangModels/LangLithuanianModel.cpp
Normal file
144
src/LangModels/LangLithuanianModel.cpp
Normal file
@ -0,0 +1,144 @@
|
||||
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* The Original Code is Mozilla Communicator client code.
|
||||
*
|
||||
* The Initial Developer of the Original Code is
|
||||
* Netscape Communications Corporation.
|
||||
* Portions created by the Initial Developer are Copyright (C) 1998
|
||||
* the Initial Developer. All Rights Reserved.
|
||||
*
|
||||
* Contributor(s):
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
#include "../nsSBCharSetProber.h"
|
||||
|
||||
/********* Language model for: Lithuanian *********/
|
||||
|
||||
/**
|
||||
* Generated by BuildLangModel.py
|
||||
* On: 2016-09-20 22:57:33.077635
|
||||
**/
|
||||
|
||||
/* Character Mapping Table:
|
||||
* ILL: illegal character.
|
||||
* CTR: control character specific to the charset.
|
||||
* RET: carriage/return.
|
||||
* SYM: symbol (punctuation) that does not belong to word.
|
||||
* NUM: 0 - 9.
|
||||
*
|
||||
* Other characters are ordered by probabilities
|
||||
* (0 is the most common character in the language).
|
||||
*
|
||||
* Orders are generic to a language. So the codepoint with order X in
|
||||
* CHARSET1 maps to the same character as the codepoint with the same
|
||||
* order X in CHARSET2 for the same language.
|
||||
* As such, it is possible to get missing order. For instance the
|
||||
* ligature of 'o' and 'e' exists in ISO-8859-15 but not in ISO-8859-1
|
||||
* even though they are both used for French. Same for the euro sign.
|
||||
*/
|
||||
static const unsigned char Iso_8859_13_CharToOrderMap[] =
|
||||
{
|
||||
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
|
||||
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
|
||||
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
|
||||
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
|
||||
SYM, 1, 18, 23, 12, 4, 25, 16, 26, 0, 14, 9, 10, 11, 6, 3, /* 4X */
|
||||
15, 37, 5, 2, 7, 8, 13, 33, 32, 19, 27,SYM,SYM,SYM,SYM,SYM, /* 5X */
|
||||
SYM, 1, 18, 23, 12, 4, 25, 16, 26, 0, 14, 9, 10, 11, 6, 3, /* 6X */
|
||||
15, 37, 5, 2, 7, 8, 13, 33, 32, 19, 27,SYM,SYM,SYM,SYM,CTR, /* 7X */
|
||||
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
|
||||
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
|
||||
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 45,SYM, 53,SYM,SYM,SYM,SYM, 54, /* AX */
|
||||
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 45,SYM, 55,SYM,SYM,SYM,SYM, 56, /* BX */
|
||||
29, 30, 39, 46, 38, 41, 31, 47, 24, 36, 57, 17, 58, 59, 44, 50, /* CX */
|
||||
21, 48, 51, 42, 34, 60, 35,SYM, 20, 40, 52, 28, 43, 61, 22, 49, /* DX */
|
||||
29, 30, 39, 46, 38, 41, 31, 47, 24, 36, 62, 17, 63, 64, 44, 50, /* EX */
|
||||
21, 48, 51, 42, 34, 65, 35,SYM, 20, 40, 52, 28, 43, 66, 22,SYM, /* FX */
|
||||
};
|
||||
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
|
||||
|
||||
|
||||
/* Model Table:
|
||||
* Total sequences: 976
|
||||
* First 512 sequences: 0.9930868640383149
|
||||
* Next 512 sequences (512-1024): 0.0069131359616851065
|
||||
* Rest: -2.688821387764051e-17
|
||||
* Negative sequences: TODO
|
||||
*/
|
||||
static const PRUint8 LithuanianLangModel[] =
|
||||
{
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,3,3,3,3,3,0,2,3,2,2,2,0,2,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,0,3,3,3,3,3,3,3,0,0,0,0,2,2,0,0,0,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,0,3,3,2,3,2,3,3,2,3,0,2,2,2,2,2,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,0,3,3,3,2,3,3,3,0,0,0,0,2,3,0,0,0,2,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,0,3,3,3,3,3,2,3,0,0,2,0,2,3,0,0,0,2,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,2,2,3,3,3,3,2,2,2,2,2,2,
|
||||
3,3,3,3,3,3,3,3,3,3,2,2,3,3,2,2,3,3,3,3,3,2,3,3,3,3,3,3,2,3,3,3,2,2,2,0,2,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,0,3,2,2,2,3,3,3,3,2,0,2,0,2,3,2,3,3,3,3,0,2,2,2,2,0,
|
||||
3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,0,3,2,0,3,3,3,3,3,2,3,0,0,0,0,0,2,0,0,0,0,
|
||||
3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,2,2,3,2,3,3,3,0,3,2,2,3,2,3,3,2,3,0,2,2,0,2,0,
|
||||
3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,3,3,2,3,3,3,3,0,2,0,2,2,0,
|
||||
3,3,3,3,3,2,2,3,3,2,2,3,2,2,2,3,2,3,3,3,3,2,3,2,0,2,0,2,3,3,0,3,0,2,2,2,2,0,
|
||||
3,3,3,3,3,3,2,2,3,3,2,3,2,3,2,2,2,3,2,3,3,2,3,2,0,2,2,2,2,3,2,3,0,2,2,2,2,2,
|
||||
3,3,3,3,3,2,2,2,3,2,3,0,2,0,2,2,0,3,0,3,3,2,0,2,0,0,0,3,2,3,0,3,0,0,0,0,0,0,
|
||||
3,3,2,3,3,2,2,2,3,2,0,0,0,0,0,2,2,3,0,2,3,0,0,0,0,0,0,0,3,3,3,3,0,0,2,2,0,0,
|
||||
3,3,3,3,3,3,2,3,3,3,3,2,2,3,3,2,2,3,0,3,2,3,2,2,2,2,3,0,2,2,2,2,0,0,2,0,2,0,
|
||||
3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,3,2,3,2,3,3,2,2,2,0,0,3,3,3,3,2,2,0,2,2,2,0,0,
|
||||
2,0,3,0,0,3,3,3,2,3,3,3,3,3,3,0,3,0,2,0,0,2,2,2,2,0,0,2,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,2,3,3,2,3,0,3,2,2,2,0,3,2,2,3,2,2,2,0,0,2,2,3,3,2,3,0,2,2,2,0,0,
|
||||
2,3,3,2,2,3,3,3,2,3,3,3,3,3,3,3,3,0,3,2,0,2,2,2,3,2,0,3,2,0,0,0,0,0,2,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,2,2,3,0,3,2,3,2,3,2,2,2,2,3,2,0,0,2,2,2,2,0,0,2,0,0,0,
|
||||
3,3,3,3,3,2,3,3,3,2,2,3,3,3,2,2,2,3,2,3,2,2,0,0,0,2,0,0,2,2,2,2,0,0,2,0,0,0,
|
||||
3,3,2,3,3,2,0,2,3,3,3,2,2,2,0,0,2,2,2,2,0,0,0,2,0,2,3,2,3,2,0,0,0,0,0,0,2,2,
|
||||
3,3,0,2,3,0,0,0,2,2,0,0,2,0,0,2,0,2,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,2,0,0,0,
|
||||
3,3,2,3,3,3,0,2,3,2,3,2,0,0,2,0,2,2,2,2,2,0,0,2,0,2,0,0,2,2,0,0,0,0,0,2,0,0,
|
||||
3,3,2,3,3,3,3,3,3,2,2,3,2,0,2,0,0,0,2,2,2,0,0,0,0,2,0,0,2,2,0,0,0,2,2,0,0,0,
|
||||
3,3,2,3,3,2,2,2,3,2,3,3,3,2,0,2,2,2,2,3,3,0,0,2,0,0,2,2,2,2,0,2,0,2,2,0,2,0,
|
||||
2,0,3,0,0,3,3,3,0,3,2,3,3,2,0,2,3,0,2,0,0,2,2,0,3,0,0,3,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,3,0,0,2,0,0,0,2,2,2,0,2,3,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,2,3,0,0,3,0,3,0,3,3,2,2,3,2,3,3,2,0,0,0,2,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,2,0,0,2,2,0,2,2,0,0,0,0,3,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,
|
||||
3,3,2,2,3,2,2,0,2,0,2,0,0,0,0,0,2,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,
|
||||
2,0,2,0,2,0,2,0,0,2,0,2,2,0,0,0,2,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,2,0,0,2,2,2,0,2,2,2,2,0,0,0,2,0,0,0,0,0,0,2,0,2,0,2,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,2,0,0,2,2,0,0,0,0,2,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,2,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
};
|
||||
|
||||
|
||||
const SequenceModel Iso_8859_13LithuanianModel =
|
||||
{
|
||||
Iso_8859_13_CharToOrderMap,
|
||||
LithuanianLangModel,
|
||||
38,
|
||||
(float)0.9930868640383149,
|
||||
PR_TRUE,
|
||||
"ISO-8859-13"
|
||||
};
|
||||
@ -111,6 +111,8 @@ nsSBCSGroupProber::nsSBCSGroupProber()
|
||||
mProbers[33] = new nsSingleByteCharSetProber(&Iso_8859_1DanishModel);
|
||||
mProbers[34] = new nsSingleByteCharSetProber(&Windows_1252DanishModel);
|
||||
|
||||
mProbers[35] = new nsSingleByteCharSetProber(&Iso_8859_13LithuanianModel);
|
||||
|
||||
Reset();
|
||||
}
|
||||
|
||||
|
||||
@ -40,7 +40,7 @@
|
||||
#define nsSBCSGroupProber_h__
|
||||
|
||||
|
||||
#define NUM_OF_SBCS_PROBERS 35
|
||||
#define NUM_OF_SBCS_PROBERS 36
|
||||
|
||||
class nsCharSetProber;
|
||||
class nsSBCSGroupProber: public nsCharSetProber {
|
||||
|
||||
@ -172,5 +172,7 @@ extern const SequenceModel Iso_8859_15DanishModel;
|
||||
extern const SequenceModel Iso_8859_1DanishModel;
|
||||
extern const SequenceModel Windows_1252DanishModel;
|
||||
|
||||
extern const SequenceModel Iso_8859_13LithuanianModel;
|
||||
|
||||
#endif /* nsSingleByteCharSetProber_h__ */
|
||||
|
||||
|
||||
3
test/lt/iso-8859-13.txt
Normal file
3
test/lt/iso-8859-13.txt
Normal file
@ -0,0 +1,3 @@
|
||||
Vincentas van Gogas (ol. Vincent van Gogh, 1853 m. kovo 30 d. Grot Zunderte,
|
||||
Nyderlandai - 1890 m. liepos 29 d. Overe prie Uazos, Prancûzija) - olandø
|
||||
tapytojas ir grafikas, postimpresionistas.
|
||||
3
test/lt/utf-8.txt
Normal file
3
test/lt/utf-8.txt
Normal file
@ -0,0 +1,3 @@
|
||||
Vincentas van Gogas (ol. Vincent van Gogh, 1853 m. kovo 30 d. Grot Zunderte,
|
||||
Nyderlandai – 1890 m. liepos 29 d. Overe prie Uazos, Prancūzija) – olandų
|
||||
tapytojas ir grafikas, postimpresionistas.
|
||||
Loading…
x
Reference in New Issue
Block a user