mirror of
https://gitlab.freedesktop.org/uchardet/uchardet.git
synced 2026-01-01 03:12:24 +08:00
LangModels: new support for Latvian / ISO-8859-13.
Test text extracted from: https://lv.wikipedia.org/wiki/Vinsents_van_Gogs
This commit is contained in:
parent
7cb3dd9ddd
commit
ea2f4dd40f
162
script/BuildLangModelLogs/LangLatvianModel.log
Normal file
162
script/BuildLangModelLogs/LangLatvianModel.log
Normal file
@ -0,0 +1,162 @@
|
||||
= Logs of language model for Latvian (lv) =
|
||||
|
||||
- Generated by BuildLangModel.py
|
||||
- Started: 2016-09-20 23:16:39.184579
|
||||
- Maximum depth: 5
|
||||
- Max number of pages: 100
|
||||
|
||||
== Parsed pages ==
|
||||
|
||||
Zigfrīds Anna Meierovics (revision 2546984)
|
||||
1. Saeima (revision 2511127)
|
||||
1. Saeimas deputāti (revision 2303859)
|
||||
1. Saeimas frakcijas (revision 2429725)
|
||||
1. Saeimas vēlēšanas (revision 2464758)
|
||||
1887. gads (revision 2583253)
|
||||
1919. gada Parīzes miera konference (revision 2482078)
|
||||
1920 (revision 2401222)
|
||||
1921 (revision 2473337)
|
||||
1922 (revision 2486819)
|
||||
1923 (revision 2544643)
|
||||
1924 (revision 2539361)
|
||||
1925 (revision 2486795)
|
||||
22. augusts (revision 2583254)
|
||||
31. jūlijs (revision 2559648)
|
||||
5. februāris (revision 2581966)
|
||||
ASV (revision 2549746)
|
||||
Agrārā reforma Latvijā (revision 2473423)
|
||||
Agudas Izrael (Latvija) (revision 2311143)
|
||||
Aigars Kalvītis (revision 2545858)
|
||||
Alberts Kviesis (revision 2546934)
|
||||
Aleksandrs Bočagovs (revision 2329526)
|
||||
Aleksandrs Dauge (revision 2546805)
|
||||
Aleksandrs Jaunbērzs (revision 2462254)
|
||||
Aleksandrs Kerenskis (revision 2461214)
|
||||
Aleksandrs Millerāns (revision 2309419)
|
||||
Aleksandrs Neibergs (revision 2491897)
|
||||
Alfrēds Birznieks (revision 2567317)
|
||||
Alfrēds Jēkabs Bērziņš (revision 2564068)
|
||||
Alfrēds Riekstiņš (politiķis) (revision 2586148)
|
||||
Andrejs Bērziņš (revision 2564283)
|
||||
Andrejs Kurcijs (revision 2564338)
|
||||
Andrejs Petrevics (revision 2460269)
|
||||
Andrejs Sīmanis (revision 2547079)
|
||||
Andrejs Veckalns (revision 2564224)
|
||||
Andrievs Niedra (revision 2546988)
|
||||
Andris Bērziņš (politiķis, 1951) (revision 2218488)
|
||||
Andris Šķēle (revision 2457423)
|
||||
Angļu valoda (revision 2447598)
|
||||
Ansis Buševics (revision 2578312)
|
||||
Ansis Rudevics (revision 2414854)
|
||||
Antante (revision 2581862)
|
||||
Antons Dzenis (revision 2564295)
|
||||
Antons Laizāns (revision 2467408)
|
||||
Antons Rubins (1885) (revision 2465396)
|
||||
Antons Velkme (revision 2564425)
|
||||
Ants Pīps (revision 2564383)
|
||||
Apollo (portāls) (revision 2371202)
|
||||
Apolonija Laurinoviča (revision 2466232)
|
||||
Aprīļa pučs (revision 2150686)
|
||||
Apvienotā Karaliste (revision 2566258)
|
||||
Aristīds Briāns (revision 2536819)
|
||||
Arons Nuroks (revision 2337085)
|
||||
Arturs Alberings (revision 2442531)
|
||||
Arturs Ozols (inženieris) (revision 2491399)
|
||||
Artūrs Balfūrs (revision 2309461)
|
||||
Artūrs Vīgants (revision 2461471)
|
||||
Artūrs Žers (revision 2564230)
|
||||
Arveds Bergs (revision 2564118)
|
||||
Arveds Švābe (revision 2586288)
|
||||
Arvīds Kalniņš (revision 2545254)
|
||||
Aspazija (revision 2574081)
|
||||
Augusts Briedis (revision 2546879)
|
||||
Augusts Kalniņš (revision 2436647)
|
||||
Augusts Kirhenšteins (revision 2547109)
|
||||
Austroungārija (revision 2524307)
|
||||
Autoritatīvā vadība (revision 2385793)
|
||||
Balfūra nota (revision 2538973)
|
||||
Baltijas Antante (revision 2541901)
|
||||
Baltijas pārkrievošana (revision 2570657)
|
||||
Bermontiāde (revision 2499160)
|
||||
Bernards Kublinskis (revision 2441386)
|
||||
Bezpartijiskais nacionālais centrs (revision 2438819)
|
||||
Beļģija (revision 2579008)
|
||||
Brestļitovskas miera līgums (revision 2569020)
|
||||
Brizules muiža (revision 2584564)
|
||||
Bruno Kalniņš (revision 2566572)
|
||||
Brīvības piemineklis (revision 2578595)
|
||||
Bulduru konference (revision 2193449)
|
||||
Ceire-Cion (revision 2311779)
|
||||
Celmiņa 1. Ministru kabinets (revision 2112830)
|
||||
Delfi (portāls) (revision 2544918)
|
||||
Demokrātiskais Centrs (revision 2113060)
|
||||
Demokrātu savienība (revision 2179593)
|
||||
Diena (laikraksts) (revision 2548854)
|
||||
Donats Bicāns (revision 2479349)
|
||||
Dubulti (Jūrmala) (revision 2456811)
|
||||
Durbe (revision 2381790)
|
||||
Dāvids Komisārs (revision 2574685)
|
||||
Džovanni Džoliti (revision 2538055)
|
||||
Ebreju bloks (revision 2311643)
|
||||
Ebreju nacionāldemokrātu partija (revision 2312288)
|
||||
Eduards Grantskalns (revision 2565167)
|
||||
Eduards Jaunzems (revision 2452579)
|
||||
Eduards Laimiņš (revision 2449521)
|
||||
Eduards Radziņš (revision 2564393)
|
||||
|
||||
== End of Parsed pages ==
|
||||
|
||||
- Wikipedia parsing ended at: 2016-09-20 23:23:02.592168
|
||||
|
||||
48 characters appeared 354730 times.
|
||||
|
||||
First 39 characters:
|
||||
[ 0] Char a: 11.905674738533532 %
|
||||
[ 1] Char i: 9.398133791898063 %
|
||||
[ 2] Char s: 8.224565162236066 %
|
||||
[ 3] Char e: 6.367378005807234 %
|
||||
[ 4] Char r: 5.854311730048205 %
|
||||
[ 5] Char t: 5.831477461731457 %
|
||||
[ 6] Char u: 4.939813379189807 %
|
||||
[ 7] Char n: 4.463958503650664 %
|
||||
[ 8] Char ā: 3.950046514250275 %
|
||||
[ 9] Char l: 3.8031742452005752 %
|
||||
[10] Char o: 3.6298029487215633 %
|
||||
[11] Char k: 3.5249344571927943 %
|
||||
[12] Char m: 3.2740394102556873 %
|
||||
[13] Char d: 3.17790995968765 %
|
||||
[14] Char v: 3.0048205677557576 %
|
||||
[15] Char p: 2.8272207030699406 %
|
||||
[16] Char j: 2.8167902348264877 %
|
||||
[17] Char b: 2.0280213119837622 %
|
||||
[18] Char ī: 1.885659515687988 %
|
||||
[19] Char g: 1.6147492459053363 %
|
||||
[20] Char z: 1.5344064499760381 %
|
||||
[21] Char ē: 1.4594198404420264 %
|
||||
[22] Char c: 1.2231838299551772 %
|
||||
[23] Char š: 0.8877174188819666 %
|
||||
[24] Char ņ: 0.4659882163899304 %
|
||||
[25] Char f: 0.42031967975643447 %
|
||||
[26] Char ļ: 0.34702449750514475 %
|
||||
[27] Char ū: 0.3016378654187692 %
|
||||
[28] Char h: 0.20071603754968567 %
|
||||
[29] Char ž: 0.1877484283821498 %
|
||||
[30] Char ķ: 0.1420798917486539 %
|
||||
[31] Char ģ: 0.12685704620415528 %
|
||||
[32] Char č: 0.08287993685338144 %
|
||||
[33] Char w: 0.03241902291883968 %
|
||||
[34] Char y: 0.02734474107067347 %
|
||||
[35] Char x: 0.015786654638739323 %
|
||||
[36] Char ö: 0.005074281848166211 %
|
||||
[37] Char é: 0.003946663659684831 %
|
||||
[38] Char q: 0.0031009500183237955 %
|
||||
|
||||
The first 39 characters have an accumulated ratio of 0.9999013334085078.
|
||||
|
||||
956 sequences found.
|
||||
|
||||
First 512 (typical positive ratio): 0.9904728616367904
|
||||
Next 512 (512-1024): 0.001877484283821498
|
||||
Rest: -4.683753385137379e-17
|
||||
|
||||
- Processing end: 2016-09-20 23:23:02.695068
|
||||
69
script/langs/lv.py
Normal file
69
script/langs/lv.py
Normal file
@ -0,0 +1,69 @@
|
||||
#!/bin/python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# ##### BEGIN LICENSE BLOCK #####
|
||||
# Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
#
|
||||
# The contents of this file are subject to the Mozilla Public License Version
|
||||
# 1.1 (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
# http://www.mozilla.org/MPL/
|
||||
#
|
||||
# Software distributed under the License is distributed on an "AS IS" basis,
|
||||
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
# for the specific language governing rights and limitations under the
|
||||
# License.
|
||||
#
|
||||
# The Original Code is Mozilla Universal charset detector code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 2001
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Jehan <jehan@girinstud.io>
|
||||
#
|
||||
# Alternatively, the contents of this file may be used under the terms of
|
||||
# either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
# the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
# in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
# of those above. If you wish to allow use of your version of this file only
|
||||
# under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
# use your version of this file under the terms of the MPL, indicate your
|
||||
# decision by deleting the provisions above and replace them with the notice
|
||||
# and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
# the provisions above, a recipient may use your version of this file under
|
||||
# the terms of any one of the MPL, the GPL or the LGPL.
|
||||
#
|
||||
# ##### END LICENSE BLOCK #####
|
||||
|
||||
import re
|
||||
|
||||
## Mandatory Properties ##
|
||||
|
||||
# The human name for the language, in English.
|
||||
name = 'Latvian'
|
||||
# Use 2-letter ISO 639-1 if possible, 3-letter ISO code otherwise,
|
||||
# or use another catalog as a last resort.
|
||||
code = 'lv'
|
||||
# ASCII characters are also used.
|
||||
use_ascii = True
|
||||
# The charsets we want to support and create data for.
|
||||
charsets = ['ISO-8859-13']
|
||||
|
||||
## Optional Properties ##
|
||||
|
||||
# Alphabet characters.
|
||||
# If use_ascii=True, there is no need to add any ASCII characters.
|
||||
# If case_mapping=True, there is no need to add several cases of a same
|
||||
# character (provided Python algorithms know the right cases).
|
||||
alphabet = 'āčēģīķļņšūž'
|
||||
# The start page. Just taking a starred page.
|
||||
start_pages = ['Zigfrīds Anna Meierovics']
|
||||
# give possibility to select another code for the Wikipedia URL.
|
||||
wikipedia_code = code
|
||||
# 'a' and 'A' will be considered the same character, and so on.
|
||||
# This uses Python algorithm to determine upper/lower-case of a given
|
||||
# character.
|
||||
case_mapping = True
|
||||
@ -18,6 +18,7 @@ set(
|
||||
LangModels/LangHungarianModel.cpp
|
||||
LangModels/LangHebrewModel.cpp
|
||||
LangModels/LangLithuanianModel.cpp
|
||||
LangModels/LangLatvianModel.cpp
|
||||
LangModels/LangSpanishModel.cpp
|
||||
LangModels/LangThaiModel.cpp
|
||||
LangModels/LangTurkishModel.cpp
|
||||
|
||||
145
src/LangModels/LangLatvianModel.cpp
Normal file
145
src/LangModels/LangLatvianModel.cpp
Normal file
@ -0,0 +1,145 @@
|
||||
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* The Original Code is Mozilla Communicator client code.
|
||||
*
|
||||
* The Initial Developer of the Original Code is
|
||||
* Netscape Communications Corporation.
|
||||
* Portions created by the Initial Developer are Copyright (C) 1998
|
||||
* the Initial Developer. All Rights Reserved.
|
||||
*
|
||||
* Contributor(s):
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
#include "../nsSBCharSetProber.h"
|
||||
|
||||
/********* Language model for: Latvian *********/
|
||||
|
||||
/**
|
||||
* Generated by BuildLangModel.py
|
||||
* On: 2016-09-20 23:23:02.592930
|
||||
**/
|
||||
|
||||
/* Character Mapping Table:
|
||||
* ILL: illegal character.
|
||||
* CTR: control character specific to the charset.
|
||||
* RET: carriage/return.
|
||||
* SYM: symbol (punctuation) that does not belong to word.
|
||||
* NUM: 0 - 9.
|
||||
*
|
||||
* Other characters are ordered by probabilities
|
||||
* (0 is the most common character in the language).
|
||||
*
|
||||
* Orders are generic to a language. So the codepoint with order X in
|
||||
* CHARSET1 maps to the same character as the codepoint with the same
|
||||
* order X in CHARSET2 for the same language.
|
||||
* As such, it is possible to get missing order. For instance the
|
||||
* ligature of 'o' and 'e' exists in ISO-8859-15 but not in ISO-8859-1
|
||||
* even though they are both used for French. Same for the euro sign.
|
||||
*/
|
||||
static const unsigned char Iso_8859_13_CharToOrderMap[] =
|
||||
{
|
||||
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
|
||||
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
|
||||
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
|
||||
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
|
||||
SYM, 0, 17, 22, 13, 3, 25, 19, 28, 1, 16, 11, 9, 12, 7, 10, /* 4X */
|
||||
15, 38, 4, 2, 5, 6, 14, 33, 35, 34, 20,SYM,SYM,SYM,SYM,SYM, /* 5X */
|
||||
SYM, 0, 17, 22, 13, 3, 25, 19, 28, 1, 16, 11, 9, 12, 7, 10, /* 6X */
|
||||
15, 38, 4, 2, 5, 6, 14, 33, 35, 34, 20,SYM,SYM,SYM,SYM,CTR, /* 7X */
|
||||
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
|
||||
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
|
||||
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 48,SYM, 49,SYM,SYM,SYM,SYM, 47, /* AX */
|
||||
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 50,SYM, 51,SYM,SYM,SYM,SYM, 47, /* BX */
|
||||
52, 53, 8, 54, 40, 46, 55, 21, 32, 37, 56, 43, 31, 30, 18, 26, /* CX */
|
||||
23, 57, 24, 44, 45, 58, 36,SYM, 59, 41, 60, 27, 39, 61, 29, 42, /* DX */
|
||||
62, 63, 8, 64, 40, 46, 65, 21, 32, 37, 66, 43, 31, 30, 18, 26, /* EX */
|
||||
23, 67, 24, 44, 45, 68, 36,SYM, 69, 41, 70, 27, 39, 71, 29,SYM, /* FX */
|
||||
};
|
||||
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
|
||||
|
||||
|
||||
/* Model Table:
|
||||
* Total sequences: 956
|
||||
* First 512 sequences: 0.9904728616367904
|
||||
* Next 512 sequences (512-1024): 0.009527138363209666
|
||||
* Rest: -4.683753385137379e-17
|
||||
* Negative sequences: TODO
|
||||
*/
|
||||
static const PRUint8 LatvianLangModel[] =
|
||||
{
|
||||
2,3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,3,3,2,3,3,2,3,3,3,3,3,0,3,3,2,2,3,2,2,2,0,0,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,2,3,3,3,3,3,2,3,3,3,2,3,0,0,2,0,2,2,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,2,3,3,0,2,2,2,3,2,2,0,0,0,2,2,0,2,2,2,
|
||||
3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,3,3,3,3,0,3,3,2,3,2,2,2,2,0,0,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,2,3,3,2,3,2,2,2,2,0,2,2,2,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,3,3,0,0,2,0,2,2,0,0,0,0,
|
||||
3,3,3,2,3,3,2,3,3,3,2,3,3,3,3,3,3,3,2,3,3,2,3,3,3,2,3,0,2,2,2,2,2,0,2,0,0,2,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,2,3,3,3,2,3,3,3,0,3,0,2,2,2,0,0,3,0,2,0,0,0,2,
|
||||
2,2,3,2,3,3,2,3,0,3,0,3,3,3,3,3,3,3,0,2,3,0,3,3,3,3,3,0,0,2,0,2,2,0,0,0,0,0,0,
|
||||
3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,2,3,3,2,0,2,2,0,2,2,0,2,0,
|
||||
3,2,3,2,3,3,3,3,2,3,2,3,3,3,3,3,3,3,0,3,3,2,3,3,3,3,3,0,2,3,2,3,2,2,2,2,0,0,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,2,2,2,2,2,2,2,3,3,2,0,3,2,2,0,0,0,0,0,2,0,2,0,0,
|
||||
3,3,3,3,0,3,3,3,3,2,3,3,2,2,2,3,3,3,3,2,0,3,2,2,0,2,0,3,0,0,0,2,0,0,2,2,0,2,0,
|
||||
3,3,3,3,3,2,3,3,3,2,3,2,3,2,3,2,2,2,3,2,3,3,2,2,2,0,0,2,0,3,0,0,0,2,2,0,0,2,0,
|
||||
3,3,3,3,2,2,3,2,3,2,3,2,2,2,2,3,3,2,3,2,2,3,2,0,2,0,0,2,0,0,0,0,0,0,2,0,0,0,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,3,3,2,3,2,3,2,2,2,2,2,0,0,2,0,0,0,0,0,0,0,
|
||||
3,3,3,3,2,0,3,3,3,2,3,2,2,2,2,2,0,0,2,2,0,3,2,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,0,3,2,3,3,3,2,2,2,2,2,2,2,3,0,0,3,2,2,0,0,2,3,2,0,0,0,2,0,2,0,2,0,0,
|
||||
0,0,3,0,3,3,0,3,0,3,0,3,3,3,3,3,3,3,0,3,3,0,3,3,3,2,2,0,0,2,2,0,2,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,3,2,3,3,3,2,2,2,2,0,0,0,0,2,2,2,0,3,0,2,3,3,2,2,0,0,0,0,2,0,0,2,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,2,3,0,0,2,2,0,0,0,0,2,2,0,0,0,0,
|
||||
2,0,3,0,3,3,0,3,0,3,0,3,3,3,3,2,3,2,0,3,3,0,3,3,2,2,3,0,0,2,2,3,0,0,0,0,0,0,0,
|
||||
3,3,3,3,2,2,3,2,3,2,3,3,2,2,2,2,0,2,3,0,2,3,2,2,0,0,0,2,3,0,0,2,0,0,2,0,0,0,0,
|
||||
3,3,3,3,2,3,3,3,3,3,3,2,2,2,3,2,2,2,3,2,2,2,0,0,2,0,2,2,0,0,3,0,0,0,0,0,0,0,0,
|
||||
3,3,2,3,0,0,3,2,3,0,3,0,2,2,2,2,2,2,0,2,0,3,2,3,0,0,0,2,0,0,3,2,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,2,3,2,2,3,3,2,2,0,0,0,0,0,2,2,0,2,0,0,0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,2,2,2,0,3,2,3,2,3,2,2,0,2,2,2,0,2,2,0,2,0,2,2,0,2,2,0,0,2,3,0,0,0,0,0,0,0,
|
||||
0,2,3,0,3,3,0,3,0,3,2,3,2,3,3,3,2,0,0,2,3,0,3,2,0,2,0,0,2,2,0,0,0,0,0,0,0,0,0,
|
||||
3,3,2,3,2,2,2,3,2,2,3,2,2,2,0,0,2,0,2,0,0,2,0,0,0,0,0,2,2,0,0,0,0,2,2,0,2,0,0,
|
||||
3,3,2,3,2,0,3,2,3,2,3,2,2,0,2,0,0,0,2,0,2,2,0,0,2,0,0,2,0,0,2,2,0,0,0,0,0,0,0,
|
||||
3,3,2,3,0,2,3,0,2,0,2,0,0,0,0,0,0,0,3,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,3,0,3,0,0,2,0,0,0,2,0,0,0,0,0,0,0,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,2,2,3,0,0,3,2,2,0,2,2,2,0,0,2,0,0,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,2,2,2,0,2,0,0,0,2,2,0,2,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,3,0,0,0,0,0,
|
||||
2,0,2,2,2,0,0,2,0,2,2,0,2,2,0,0,0,0,0,2,0,0,2,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,0,
|
||||
2,2,0,0,0,0,2,0,0,0,0,0,0,0,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,
|
||||
0,0,2,0,0,2,0,2,0,2,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,2,0,0,2,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,2,0,2,2,0,2,0,0,2,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
};
|
||||
|
||||
|
||||
const SequenceModel Iso_8859_13LatvianModel =
|
||||
{
|
||||
Iso_8859_13_CharToOrderMap,
|
||||
LatvianLangModel,
|
||||
39,
|
||||
(float)0.9904728616367904,
|
||||
PR_TRUE,
|
||||
"ISO-8859-13"
|
||||
};
|
||||
@ -113,6 +113,8 @@ nsSBCSGroupProber::nsSBCSGroupProber()
|
||||
|
||||
mProbers[35] = new nsSingleByteCharSetProber(&Iso_8859_13LithuanianModel);
|
||||
|
||||
mProbers[36] = new nsSingleByteCharSetProber(&Iso_8859_13LatvianModel);
|
||||
|
||||
Reset();
|
||||
}
|
||||
|
||||
|
||||
@ -40,7 +40,7 @@
|
||||
#define nsSBCSGroupProber_h__
|
||||
|
||||
|
||||
#define NUM_OF_SBCS_PROBERS 36
|
||||
#define NUM_OF_SBCS_PROBERS 37
|
||||
|
||||
class nsCharSetProber;
|
||||
class nsSBCSGroupProber: public nsCharSetProber {
|
||||
|
||||
@ -174,5 +174,7 @@ extern const SequenceModel Windows_1252DanishModel;
|
||||
|
||||
extern const SequenceModel Iso_8859_13LithuanianModel;
|
||||
|
||||
extern const SequenceModel Iso_8859_13LatvianModel;
|
||||
|
||||
#endif /* nsSingleByteCharSetProber_h__ */
|
||||
|
||||
|
||||
6
test/lv/iso-8859-13.txt
Normal file
6
test/lv/iso-8859-13.txt
Normal file
@ -0,0 +1,6 @@
|
||||
Vinsents Villems van Gogs (nîderlandieðu: Vincent Willem van Gogh, dzimis 1853.
|
||||
gada 30. martâ, miris 1890. gada 29. jûlijâ) bija nîderlandieðu gleznotâjs,
|
||||
postimpresionisma pârstâvis. Kopumâ van Gogs radîja vairâk nekâ 2000 darbu, to
|
||||
skaitâ 900 gleznu un 1100 zîmçjumu un skièu. Savus slavenâkos darbus viòð radîja
|
||||
pçdçjo divu dzîves gadu laikâ. Tiek uzskatîts, ka van Gogs bûtiski ir ietekmçjis
|
||||
20. gadsimta mâkslu, tostarp ekspresionismu un fovismu.
|
||||
6
test/lv/utf-8.txt
Normal file
6
test/lv/utf-8.txt
Normal file
@ -0,0 +1,6 @@
|
||||
Vinsents Villems van Gogs (nīderlandiešu: Vincent Willem van Gogh, dzimis 1853.
|
||||
gada 30. martā, miris 1890. gada 29. jūlijā) bija nīderlandiešu gleznotājs,
|
||||
postimpresionisma pārstāvis. Kopumā van Gogs radīja vairāk nekā 2000 darbu, to
|
||||
skaitā 900 gleznu un 1100 zīmējumu un skiču. Savus slavenākos darbus viņš radīja
|
||||
pēdējo divu dzīves gadu laikā. Tiek uzskatīts, ka van Gogs būtiski ir ietekmējis
|
||||
20. gadsimta mākslu, tostarp ekspresionismu un fovismu.
|
||||
Loading…
x
Reference in New Issue
Block a user