mirror of
https://gitlab.freedesktop.org/uchardet/uchardet.git
synced 2025-12-06 08:46:40 +08:00
script, src: update Norwegian model with the new language features.
As I just rebased my branch about new language detection API, I needed to re-generate Norwegian language models. Unfortunately it doesn't detect UTF-8 Norwegian text, though not far off (it detects it as second candidate with high 91% confidence; beaten by Danish UTF-8 with 94% confidence unfortunately!). Note that I also update the alphabet list for Norwegian as there were too many letters in there (according to Wikipedia at least), so even when training a model, we had some missing characters in the training set.
This commit is contained in:
parent
784f614c84
commit
0be80a21db
234
script/BuildLangModelLogs/LangNorwegianModel.log
Normal file
234
script/BuildLangModelLogs/LangNorwegianModel.log
Normal file
@ -0,0 +1,234 @@
|
||||
= Logs of language model for Norwegian (no) =
|
||||
|
||||
- Generated by BuildLangModel.py
|
||||
- Started: 2022-11-30 20:26:27.916571
|
||||
- Maximum depth: 2
|
||||
- Max number of pages: 200
|
||||
|
||||
== Parsed pages ==
|
||||
|
||||
Norsk (revision 22974717)
|
||||
Saft (revision 22967608)
|
||||
Hund (revision 23005187)
|
||||
Valg i Norge (revision 22782362)
|
||||
Asia (revision 23117912)
|
||||
Saarloos wolfhond (revision 22789727)
|
||||
Østfold (revision 23055508)
|
||||
Fårehunder (revision 22264555)
|
||||
Stripesjakal (revision 18745363)
|
||||
12. mai (revision 23118103)
|
||||
Gullsjakal (revision 23104601)
|
||||
Urhund (revision 23050226)
|
||||
E (revision 22904440)
|
||||
Luxembourgsk (revision 22813155)
|
||||
Obstruent (revision 15267134)
|
||||
Gudbrandsdalen (revision 23014277)
|
||||
Norges berggrunn (revision 21768509)
|
||||
Riksforsamlingen (revision 22999081)
|
||||
Sosiolekt (revision 21458982)
|
||||
Habitat (revision 23123646)
|
||||
Norsk språkhistorie (20. århundre) (revision 22891154)
|
||||
Søsterart (revision 20748512)
|
||||
Halvdan Koht (revision 22303367)
|
||||
Plosiver (revision 21816753)
|
||||
Svorsk (revision 20789512)
|
||||
Skandinavia (revision 22814296)
|
||||
Partisipp (revision 22785842)
|
||||
H (revision 23086416)
|
||||
Kreft (revision 23050449)
|
||||
Kreft hos hunder (revision 21811805)
|
||||
Q (revision 23024714)
|
||||
Fédération Cynologique Internationale (revision 22172054)
|
||||
Rosin (revision 22818749)
|
||||
Tribus (biologi) (revision 21339936)
|
||||
Siste istids maksimum (revision 23141296)
|
||||
Laurents Hallager (revision 22655416)
|
||||
Canider (revision 22229857)
|
||||
Individ (revision 20992252)
|
||||
Stortingsvalg 1945– (revision 22861299)
|
||||
Svalbards geologi (revision 22935346)
|
||||
Riksmålsvernet (revision 22966421)
|
||||
Magedreining (hund) (revision 21661370)
|
||||
Stortinget (revision 23071662)
|
||||
Bokmål (revision 22928969)
|
||||
Recessiv (revision 21780786)
|
||||
Synkopetida (revision 22906353)
|
||||
Artskompleks (revision 20848344)
|
||||
Homogenitet (revision 22857280)
|
||||
Pyometra (hund) (revision 22374115)
|
||||
Den norske språkstriden (revision 22428585)
|
||||
Gruppe (biologi) (revision 21969525)
|
||||
Stående fuglehunder (revision 22264516)
|
||||
Samnorsk (revision 22785915)
|
||||
Fastlands-Norge (revision 23141642)
|
||||
Drivende hunder (revision 22264618)
|
||||
Sibir (revision 22369404)
|
||||
Norges demografi (revision 23034159)
|
||||
FCI (revision 22172054)
|
||||
Vannhunder (revision 22264145)
|
||||
Prednisolon (revision 21804718)
|
||||
Midtvesten (revision 22423559)
|
||||
Buskerud (revision 22915767)
|
||||
Sogn og Fjordane (revision 22811825)
|
||||
Transport i Norge (revision 23131810)
|
||||
Ustemt palatal frikativ (revision 19011330)
|
||||
Anatolsk gjeterhund (revision 22303224)
|
||||
Norges fylker (revision 23129287)
|
||||
Tonelag (revision 22751959)
|
||||
Statsforvalter (revision 23133685)
|
||||
Sjokolade (revision 22988920)
|
||||
Nasaler (revision 16002502)
|
||||
Hundens pels (revision 22900550)
|
||||
Approksimanter (revision 16000119)
|
||||
Tapper (revision 18322970)
|
||||
Vakt- og vokterhunder (revision 23091054)
|
||||
Saluki (revision 22267261)
|
||||
Canis (revision 23079627)
|
||||
Island (revision 23097723)
|
||||
Flyball (revision 20457011)
|
||||
Staffordshire bull terrier (revision 23135078)
|
||||
Stockholm (revision 22770528)
|
||||
Sahel (revision 19821400)
|
||||
ISO 639-3 (revision 18859824)
|
||||
Ny-guinea villhund (revision 22567866)
|
||||
Rabies (revision 19440055)
|
||||
Ordbog over det norske Folkesprog (revision 23096800)
|
||||
Norge (revision 23141642)
|
||||
Flåttbårne sykdommer (hund) (revision 21355504)
|
||||
Bombehund (revision 22942055)
|
||||
Læreboknormalen av 1959 (revision 18841941)
|
||||
Tromøy (revision 22053767)
|
||||
Vorstehhund korthåret (revision 22264532)
|
||||
Tåkeskog (revision 20461967)
|
||||
Vest-Telemark (revision 22923647)
|
||||
Oslo (revision 23118371)
|
||||
Tyrkia (revision 23034073)
|
||||
Liste over Norges største tettsteder (revision 23138252)
|
||||
Energi (revision 22979461)
|
||||
Jakt med hund (revision 22890790)
|
||||
Sogn fogderi (revision 22425444)
|
||||
Integrated Taxonomic Information System (revision 20457376)
|
||||
Tadsjikistan (revision 22864814)
|
||||
Befolkningstetthet (revision 22253839)
|
||||
Tøddel (revision 21641445)
|
||||
Den lille istid (revision 22782643)
|
||||
Norsk språkhistorie (1400–1800) (revision 21342667)
|
||||
Unionen mellom Sverige og Norge (revision 22922743)
|
||||
Fylkeskommune (revision 22011606)
|
||||
ĸ (revision 17096887)
|
||||
Degas (revision 22751270)
|
||||
Gløgg (revision 22902469)
|
||||
Antistoff (revision 20746889)
|
||||
Norges statsminister (revision 22948566)
|
||||
Lørdag (revision 23031303)
|
||||
Ş (revision 12094187)
|
||||
Hallingdal (revision 22811584)
|
||||
1969 (revision 22958238)
|
||||
Juli (revision 22359558)
|
||||
Shar pei (revision 22891357)
|
||||
Dyr (revision 23101991)
|
||||
Ƙ (revision 15223100)
|
||||
PhyloCode (revision 22857413)
|
||||
Y-kromosom (revision 22783781)
|
||||
Høst (revision 23087627)
|
||||
Geit (revision 21989005)
|
||||
Guatemala (revision 22780680)
|
||||
USA (revision 22781448)
|
||||
Tamhund (revision 23005187)
|
||||
Populasjonsdynamikk (revision 20640003)
|
||||
Christoffer Oftedahl (revision 19783269)
|
||||
Mellomnorsk (revision 22546096)
|
||||
1000 (revision 20456192)
|
||||
Servicehund (revision 22337757)
|
||||
Himalayaulv (revision 21791662)
|
||||
Ø (bokstav) (revision 22617366)
|
||||
Ǩ (revision 15223173)
|
||||
Bordeaux dogge (revision 22266230)
|
||||
Frøplanter (revision 21763501)
|
||||
Ustemt bilabial plosiv (revision 22354758)
|
||||
Digraf (revision 19954081)
|
||||
12. århundre (revision 23123540)
|
||||
Sametingsvalget 1993 (revision 21890290)
|
||||
Førerhund (revision 20465384)
|
||||
Grenada (revision 22948831)
|
||||
Aserbajdsjans administrative inndeling (revision 22782483)
|
||||
Verneområder i Norge (revision 22076171)
|
||||
Pelsdyroppdrett (revision 22827568)
|
||||
Kretahund (revision 22201230)
|
||||
Etne (revision 22659600)
|
||||
Koreansk chejudo (revision 22199018)
|
||||
Riesenschnauzer (revision 23103775)
|
||||
Italias regioner (revision 22182270)
|
||||
Dingo (revision 23050226)
|
||||
Firfisle (revision 21650282)
|
||||
Dominans (revision 21160764)
|
||||
CITES (revision 22637082)
|
||||
Helligdager i Norge (revision 22095322)
|
||||
Bunad (revision 23086915)
|
||||
Barnekreftforeningen (revision 19888945)
|
||||
Guttorm Hansen (revision 22098933)
|
||||
Albania (revision 22939774)
|
||||
Medier i Norge (revision 21776331)
|
||||
Finsk (revision 22908244)
|
||||
Anders Lysgaard (revision 22858529)
|
||||
Bakverk (revision 15226081)
|
||||
Ć (revision 15785421)
|
||||
Vatikanstaten (revision 22782366)
|
||||
Steinalderen i Norge (revision 23106147)
|
||||
Johnny Depp (revision 22764203)
|
||||
Sverre Steen (revision 22112509)
|
||||
Fjellrev (revision 22812483)
|
||||
Bayersk viltsporhund (revision 22805751)
|
||||
Ń (revision 15222385)
|
||||
Utdannelse i Norge (revision 22814897)
|
||||
Espen Berntsen (revision 21025561)
|
||||
Nederland (revision 23024484)
|
||||
Liste over hundegrupper (revision 18570830)
|
||||
|
||||
== End of Parsed pages ==
|
||||
|
||||
- Wikipedia parsing ended at: 2022-11-30 20:29:27.551046
|
||||
|
||||
62 characters appeared 1228749 times.
|
||||
|
||||
Most Frequent characters:
|
||||
[ 0] Char e: 15.049208585317261 %
|
||||
[ 1] Char r: 8.84924423132796 %
|
||||
[ 2] Char n: 8.422550089562636 %
|
||||
[ 3] Char t: 7.726394894319344 %
|
||||
[ 4] Char s: 6.64798099530498 %
|
||||
[ 5] Char a: 6.28020856985438 %
|
||||
[ 6] Char i: 5.99455218274847 %
|
||||
[ 7] Char l: 5.422262805503809 %
|
||||
[ 8] Char o: 5.386942329149403 %
|
||||
[ 9] Char d: 4.534774799409806 %
|
||||
[10] Char g: 3.86091870674971 %
|
||||
[11] Char k: 3.6487516978650643 %
|
||||
[12] Char m: 3.216197937902696 %
|
||||
[13] Char v: 2.4669806445417253 %
|
||||
[14] Char f: 2.0122091655822305 %
|
||||
[15] Char u: 1.8136332155712844 %
|
||||
[16] Char p: 1.6869189720602011 %
|
||||
[17] Char b: 1.4243755233981878 %
|
||||
[18] Char h: 1.3665117937023752 %
|
||||
[19] Char å: 1.1134902246105591 %
|
||||
[20] Char y: 0.8473658981614633 %
|
||||
[21] Char ø: 0.792431977564173 %
|
||||
[22] Char j: 0.7630525029928814 %
|
||||
[23] Char c: 0.2926553755079353 %
|
||||
[24] Char æ: 0.20012223814627725 %
|
||||
[25] Char w: 0.05932863424507365 %
|
||||
[26] Char z: 0.028565638710591017 %
|
||||
[27] Char x: 0.023194322029967063 %
|
||||
[28] Char é: 0.017171936660782636 %
|
||||
[29] Char q: 0.009521879570197005 %
|
||||
|
||||
The first 30 characters have an accumulated ratio of 0.9995751776807141.
|
||||
|
||||
967 sequences found.
|
||||
|
||||
First 442 (typical positive ratio): 0.9950425176429516
|
||||
Next 157 (599-442): 0.0039580060347621515
|
||||
Rest: 0.0009994763222862524
|
||||
|
||||
- Processing end: 2022-11-30 20:29:27.623923
|
||||
@ -48,7 +48,7 @@ charsets = ['IBM865', 'ISO-8859-15', 'ISO-8859-1', 'WINDOWS-1252']
|
||||
## Optional Properties ##
|
||||
|
||||
# Alphabet characters.
|
||||
alphabet = 'æøåéìîàêÆØÅ'
|
||||
alphabet = 'æøå'
|
||||
# Some pages that should contain most norwegian-norwegian norwegian
|
||||
start_pages = ['Norsk', 'Saft', 'Hund']
|
||||
wikipedia_code = code
|
||||
|
||||
@ -36,12 +36,13 @@
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
#include "../nsSBCharSetProber.h"
|
||||
#include "../nsLanguageDetector.h"
|
||||
|
||||
/********* Language model for: Norwegian *********/
|
||||
|
||||
/**
|
||||
* Generated by BuildLangModel.py
|
||||
* On: 2022-01-28 21:58:11.143599
|
||||
* On: 2022-11-30 20:29:27.551827
|
||||
**/
|
||||
|
||||
/* Character Mapping Table:
|
||||
@ -67,17 +68,17 @@ static const unsigned char Ibm865_CharToOrderMap[] =
|
||||
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
|
||||
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
|
||||
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
|
||||
SYM, 5, 17, 23, 9, 0, 14, 11, 18, 6, 22, 10, 7, 12, 2, 8, /* 4X */
|
||||
15, 29, 1, 4, 3, 16, 13, 25, 27, 20, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */
|
||||
SYM, 5, 17, 23, 9, 0, 14, 11, 18, 6, 22, 10, 7, 12, 2, 8, /* 6X */
|
||||
15, 29, 1, 4, 3, 16, 13, 25, 27, 20, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */
|
||||
43, 32, 28, 50, 31, 45, 19, 43, 53, 42, 41, 57, 61, 58, 31, 19, /* 8X */
|
||||
28, 24, 24, 37, 30, 54, 63, 59, 64, 30, 32, 21,SYM, 21,SYM,SYM, /* 9X */
|
||||
36, 33, 35, 40, 44, 44,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
|
||||
SYM, 5, 17, 23, 9, 0, 14, 10, 18, 6, 22, 11, 7, 12, 2, 8, /* 4X */
|
||||
16, 29, 1, 4, 3, 15, 13, 25, 27, 20, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */
|
||||
SYM, 5, 17, 23, 9, 0, 14, 10, 18, 6, 22, 11, 7, 12, 2, 8, /* 6X */
|
||||
16, 29, 1, 4, 3, 15, 13, 25, 27, 20, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */
|
||||
37, 36, 28, 45, 31, 43, 19, 37, 53, 39, 44, 59, 56, 54, 31, 19, /* 8X */
|
||||
28, 24, 24, 41, 30, 48, 62, 55, 63, 30, 36, 21,SYM, 21,SYM,SYM, /* 9X */
|
||||
33, 34, 35, 40, 49, 49,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
|
||||
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
|
||||
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* CX */
|
||||
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* DX */
|
||||
48, 46, 65, 66, 60, 60, 67, 62, 68, 69, 70, 71, 72, 73, 52,SYM, /* EX */
|
||||
52, 60, 64, 65, 61, 61, 66, 47, 67, 68, 69, 70, 71, 72, 42,SYM, /* EX */
|
||||
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* FX */
|
||||
};
|
||||
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
|
||||
@ -88,18 +89,18 @@ static const unsigned char Iso_8859_15_CharToOrderMap[] =
|
||||
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
|
||||
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
|
||||
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
|
||||
SYM, 5, 17, 23, 9, 0, 14, 11, 18, 6, 22, 10, 7, 12, 2, 8, /* 4X */
|
||||
15, 29, 1, 4, 3, 16, 13, 25, 27, 20, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */
|
||||
SYM, 5, 17, 23, 9, 0, 14, 11, 18, 6, 22, 10, 7, 12, 2, 8, /* 6X */
|
||||
15, 29, 1, 4, 3, 16, 13, 25, 27, 20, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */
|
||||
SYM, 5, 17, 23, 9, 0, 14, 10, 18, 6, 22, 11, 7, 12, 2, 8, /* 4X */
|
||||
16, 29, 1, 4, 3, 15, 13, 25, 27, 20, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */
|
||||
SYM, 5, 17, 23, 9, 0, 14, 10, 18, 6, 22, 11, 7, 12, 2, 8, /* 6X */
|
||||
16, 29, 1, 4, 3, 15, 13, 25, 27, 20, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */
|
||||
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
|
||||
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
|
||||
SYM,SYM,SYM,SYM,SYM,SYM, 47,SYM, 47,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
|
||||
SYM,SYM,SYM,SYM, 49, 74,SYM,SYM, 49,SYM,SYM,SYM, 51, 51, 75,SYM, /* BX */
|
||||
45, 36, 50, 55, 31, 19, 24, 43, 41, 28, 53, 42, 58, 33, 61, 57, /* CX */
|
||||
34, 44, 54, 35, 37, 56, 30,SYM, 21, 59, 40, 76, 32, 39, 38, 46, /* DX */
|
||||
45, 36, 50, 55, 31, 19, 24, 43, 41, 28, 53, 42, 58, 33, 61, 57, /* EX */
|
||||
34, 44, 54, 35, 37, 56, 30,SYM, 21, 59, 40, 77, 32, 39, 38, 78, /* FX */
|
||||
SYM,SYM,SYM,SYM,SYM,SYM, 58,SYM, 58,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
|
||||
SYM,SYM,SYM,SYM, 73, 74,SYM,SYM, 75,SYM,SYM,SYM, 50, 50, 76,SYM, /* BX */
|
||||
43, 33, 45, 57, 31, 19, 24, 37, 44, 28, 53, 39, 54, 34, 56, 59, /* CX */
|
||||
32, 49, 48, 35, 41, 46, 30,SYM, 21, 55, 40, 77, 36, 51, 38, 60, /* DX */
|
||||
43, 33, 45, 57, 31, 19, 24, 37, 44, 28, 53, 39, 54, 34, 56, 59, /* EX */
|
||||
32, 49, 48, 35, 41, 46, 30,SYM, 21, 55, 40, 78, 36, 51, 38, 79, /* FX */
|
||||
};
|
||||
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
|
||||
|
||||
@ -109,18 +110,18 @@ static const unsigned char Iso_8859_1_CharToOrderMap[] =
|
||||
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
|
||||
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
|
||||
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
|
||||
SYM, 5, 17, 23, 9, 0, 14, 11, 18, 6, 22, 10, 7, 12, 2, 8, /* 4X */
|
||||
15, 29, 1, 4, 3, 16, 13, 25, 27, 20, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */
|
||||
SYM, 5, 17, 23, 9, 0, 14, 11, 18, 6, 22, 10, 7, 12, 2, 8, /* 6X */
|
||||
15, 29, 1, 4, 3, 16, 13, 25, 27, 20, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */
|
||||
SYM, 5, 17, 23, 9, 0, 14, 10, 18, 6, 22, 11, 7, 12, 2, 8, /* 4X */
|
||||
16, 29, 1, 4, 3, 15, 13, 25, 27, 20, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */
|
||||
SYM, 5, 17, 23, 9, 0, 14, 10, 18, 6, 22, 11, 7, 12, 2, 8, /* 6X */
|
||||
16, 29, 1, 4, 3, 15, 13, 25, 27, 20, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */
|
||||
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 8X */
|
||||
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 9X */
|
||||
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
|
||||
SYM,SYM,SYM,SYM,SYM, 79,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
|
||||
45, 36, 50, 55, 31, 19, 24, 43, 41, 28, 53, 42, 58, 33, 61, 57, /* CX */
|
||||
34, 44, 54, 35, 37, 56, 30,SYM, 21, 59, 40, 80, 32, 39, 38, 46, /* DX */
|
||||
45, 36, 50, 55, 31, 19, 24, 43, 41, 28, 53, 42, 58, 33, 61, 57, /* EX */
|
||||
34, 44, 54, 35, 37, 56, 30,SYM, 21, 59, 40, 81, 32, 39, 38, 82, /* FX */
|
||||
SYM,SYM,SYM,SYM,SYM, 80,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
|
||||
43, 33, 45, 57, 31, 19, 24, 37, 44, 28, 53, 39, 54, 34, 56, 59, /* CX */
|
||||
32, 49, 48, 35, 41, 46, 30,SYM, 21, 55, 40, 81, 36, 51, 38, 60, /* DX */
|
||||
43, 33, 45, 57, 31, 19, 24, 37, 44, 28, 53, 39, 54, 34, 56, 59, /* EX */
|
||||
32, 49, 48, 35, 41, 46, 30,SYM, 21, 55, 40, 82, 36, 51, 38, 83, /* FX */
|
||||
};
|
||||
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
|
||||
|
||||
@ -130,155 +131,75 @@ static const unsigned char Windows_1252_CharToOrderMap[] =
|
||||
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
|
||||
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
|
||||
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
|
||||
SYM, 5, 17, 23, 9, 0, 14, 11, 18, 6, 22, 10, 7, 12, 2, 8, /* 4X */
|
||||
15, 29, 1, 4, 3, 16, 13, 25, 27, 20, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */
|
||||
SYM, 5, 17, 23, 9, 0, 14, 11, 18, 6, 22, 10, 7, 12, 2, 8, /* 6X */
|
||||
15, 29, 1, 4, 3, 16, 13, 25, 27, 20, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */
|
||||
SYM,ILL,SYM, 83,SYM,SYM,SYM,SYM,SYM,SYM, 47,SYM, 51,ILL, 49,ILL, /* 8X */
|
||||
ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 47,SYM, 51,ILL, 49, 84, /* 9X */
|
||||
SYM, 5, 17, 23, 9, 0, 14, 10, 18, 6, 22, 11, 7, 12, 2, 8, /* 4X */
|
||||
16, 29, 1, 4, 3, 15, 13, 25, 27, 20, 26,SYM,SYM,SYM,SYM,SYM, /* 5X */
|
||||
SYM, 5, 17, 23, 9, 0, 14, 10, 18, 6, 22, 11, 7, 12, 2, 8, /* 6X */
|
||||
16, 29, 1, 4, 3, 15, 13, 25, 27, 20, 26,SYM,SYM,SYM,SYM,CTR, /* 7X */
|
||||
SYM,ILL,SYM, 84,SYM,SYM,SYM,SYM,SYM,SYM, 58,SYM, 50,ILL, 85,ILL, /* 8X */
|
||||
ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, 58,SYM, 50,ILL, 86, 87, /* 9X */
|
||||
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
|
||||
SYM,SYM,SYM,SYM,SYM, 85,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
|
||||
45, 36, 50, 55, 31, 19, 24, 43, 41, 28, 53, 42, 58, 33, 61, 57, /* CX */
|
||||
34, 44, 54, 35, 37, 56, 30,SYM, 21, 59, 40, 86, 32, 39, 38, 46, /* DX */
|
||||
45, 36, 50, 55, 31, 19, 24, 43, 41, 28, 53, 42, 58, 33, 61, 57, /* EX */
|
||||
34, 44, 54, 35, 37, 56, 30,SYM, 21, 59, 40, 87, 32, 39, 38, 88, /* FX */
|
||||
SYM,SYM,SYM,SYM,SYM, 88,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
|
||||
43, 33, 45, 57, 31, 19, 24, 37, 44, 28, 53, 39, 54, 34, 56, 59, /* CX */
|
||||
32, 49, 48, 35, 41, 46, 30,SYM, 21, 55, 40, 89, 36, 51, 38, 60, /* DX */
|
||||
43, 33, 45, 57, 31, 19, 24, 37, 44, 28, 53, 39, 54, 34, 56, 59, /* EX */
|
||||
32, 49, 48, 35, 41, 46, 30,SYM, 21, 55, 40, 90, 36, 51, 38, 91, /* FX */
|
||||
};
|
||||
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
|
||||
|
||||
static const int Unicode_Char_size = 60;
|
||||
static const unsigned int Unicode_CharOrder[] =
|
||||
{
|
||||
65, 5, 66, 17, 67, 23, 68, 9, 69, 0, 70, 14, 71, 10, 72, 18,
|
||||
73, 6, 74, 22, 75, 11, 76, 7, 77, 12, 78, 2, 79, 8, 80, 16,
|
||||
81, 29, 82, 1, 83, 4, 84, 3, 85, 15, 86, 13, 87, 25, 88, 27,
|
||||
89, 20, 90, 26, 97, 5, 98, 17, 99, 23, 100, 9, 101, 0,102, 14,
|
||||
103, 10, 104, 18, 105, 6, 106, 22, 107, 11, 108, 7, 109, 12,110, 2,
|
||||
111, 8, 112, 16, 113, 29, 114, 1, 115, 4, 116, 3, 117, 15,118, 13,
|
||||
119, 25, 120, 27, 121, 20, 122, 26, 197, 19, 198, 24, 201, 28,216, 21,
|
||||
229, 19, 230, 24, 233, 28, 248, 21,
|
||||
};
|
||||
|
||||
|
||||
/* Model Table:
|
||||
* Total sequences: 991
|
||||
* First 512 sequences: 0.9975864274305254
|
||||
* Next 512 sequences (512-1024): 0.002413572569474574
|
||||
* Rest: 3.5128150388530344e-17
|
||||
* Total considered sequences: 967 / 900
|
||||
* - Positive sequences: first 442 (0.9950425176429516)
|
||||
* - Probable sequences: next 157 (599-442) (0.0039580060347621515)
|
||||
* - Neutral sequences: last 301 (0.0009994763222862524)
|
||||
* - Negative sequences: -67 (off-ratio)
|
||||
* Negative sequences: TODO
|
||||
*/
|
||||
static const PRUint8 NorwegianLangModel[] =
|
||||
{
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,0,2,0,
|
||||
0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,
|
||||
2,2,2,2,2,2,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,2,2,
|
||||
2,2,2,0,0,2,0,0,2,2,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,0,3,2,0,
|
||||
2,2,2,0,2,0,0,0,2,0,0,2,0,0,2,0,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,0,2,2,2,
|
||||
2,2,0,0,0,2,0,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,0,3,3,3,0,2,0,
|
||||
0,0,2,2,0,0,0,0,0,2,0,2,0,2,0,2,2,0,2,0,0,0,0,0,0,0,2,0,0,0,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,2,3,2,2,3,2,2,2,0,
|
||||
0,0,0,2,2,0,0,0,0,0,2,2,0,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,2,2,2,2,
|
||||
2,2,2,0,2,2,0,2,0,0,2,0,0,0,0,0,0,0,0,2,0,0,2,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,0,2,3,0,3,2,3,0,2,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,0,2,0,2,
|
||||
0,2,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,0,0,0,0,2,
|
||||
0,0,0,0,2,2,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,2,3,2,2,2,0,0,2,2,2,
|
||||
2,2,0,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,0,2,0,2,2,2,
|
||||
2,2,0,0,0,2,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,0,3,2,0,0,2,0,0,
|
||||
2,0,2,0,0,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,3,3,3,2,2,3,2,2,3,0,3,2,2,3,3,3,3,3,3,0,0,0,2,0,2,
|
||||
0,2,0,0,2,0,2,0,0,0,0,0,0,0,2,0,0,0,0,0,2,0,0,2,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,0,2,0,0,
|
||||
2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,2,3,3,0,2,2,2,3,2,2,3,2,2,2,0,
|
||||
0,0,0,2,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,2,3,3,3,3,3,2,2,2,2,0,2,2,3,3,2,3,3,3,3,2,3,2,2,0,2,0,2,
|
||||
2,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,3,3,3,2,2,2,3,3,2,2,3,2,2,3,3,3,3,2,3,2,2,0,2,0,2,
|
||||
2,2,2,0,2,2,0,0,0,0,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,2,2,3,2,3,3,3,2,3,3,3,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,0,2,0,2,3,2,2,2,2,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,0,2,3,2,3,3,3,3,3,3,3,0,3,2,0,3,2,2,2,0,2,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,2,2,2,2,3,3,2,3,2,2,2,2,0,2,0,3,0,0,2,2,3,2,0,3,0,0,0,0,0,2,
|
||||
2,2,0,0,2,2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,2,3,3,3,3,3,3,2,3,0,3,2,0,2,3,2,3,0,3,0,0,3,2,0,2,0,2,2,0,
|
||||
0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,
|
||||
0,3,3,3,3,0,2,2,0,2,2,2,2,2,2,2,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,2,2,2,3,3,3,2,3,2,2,0,2,0,2,2,2,2,3,0,2,2,2,2,0,2,0,0,0,0,0,
|
||||
2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,2,2,2,2,3,3,2,3,2,2,2,2,0,2,0,2,2,2,0,3,0,0,2,0,2,2,0,0,0,0,
|
||||
0,2,0,0,0,2,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,0,2,2,2,3,3,2,2,0,0,0,0,2,2,2,2,2,2,0,2,0,0,0,0,0,0,2,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,2,3,2,2,2,0,2,2,2,0,2,2,2,2,0,0,2,2,0,0,0,0,2,0,0,2,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,2,0,0,2,2,0,0,0,0,0,0,2,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,2,2,2,2,0,0,2,0,2,2,2,2,2,2,2,0,0,2,0,2,0,0,2,0,0,0,0,0,0,2,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,2,2,2,2,0,0,2,0,2,0,2,2,0,2,0,2,2,2,0,0,0,0,2,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,2,2,0,2,0,0,2,0,2,0,2,0,0,2,0,0,2,2,0,0,0,0,2,0,2,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,2,2,0,2,2,0,2,0,0,2,2,2,2,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,2,2,0,0,2,2,2,0,0,0,0,2,0,0,0,2,2,0,0,0,0,0,0,0,2,0,0,0,0,0,
|
||||
0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,2,2,2,2,0,0,2,0,0,2,2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,2,2,2,2,0,0,2,0,0,2,2,2,2,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,2,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,2,2,2,2,2,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,2,2,0,2,0,0,0,0,0,0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,3,2,3,2,3,0,1,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,2,1,2,1,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,1,2,0,2,1,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,0,2,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,1,0,1,1,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,1,2,3,0,2,2,2,0,2,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,3,1,2,2,2,2,2,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,1,0,1,1,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,2,1,2,3,0,2,2,2,0,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,3,3,2,2,1,1,0,2,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,2,3,0,1,1,0,1,0,1,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,3,3,3,2,3,1,1,1,0,0,1,0,
|
||||
3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,2,2,0,0,0,0,1,
|
||||
3,3,3,3,3,3,3,3,3,3,3,2,2,3,2,3,3,3,3,3,1,2,2,1,3,0,0,0,0,0,
|
||||
3,3,2,3,2,3,3,3,3,1,2,1,1,0,3,3,0,1,2,3,3,3,3,2,2,0,1,1,2,1,
|
||||
3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,2,3,3,2,1,1,2,1,2,0,1,2,2,1,1,
|
||||
3,3,3,3,3,3,3,3,3,3,3,2,2,2,3,3,3,2,3,3,2,3,1,1,1,0,1,0,1,0,
|
||||
3,3,3,1,3,3,3,3,3,2,0,1,1,0,2,3,2,3,1,3,3,3,3,1,3,1,0,0,1,0,
|
||||
3,3,3,3,2,3,3,2,3,1,1,2,2,3,1,3,1,2,2,3,3,3,3,1,2,2,0,1,0,1,
|
||||
3,3,3,3,3,1,2,3,1,3,3,3,2,3,2,0,3,2,2,0,0,1,0,0,0,0,0,0,0,0,
|
||||
3,3,3,3,3,3,2,3,3,3,3,3,3,3,2,2,3,3,2,0,1,1,1,2,0,1,1,1,0,0,
|
||||
3,3,3,3,3,1,2,3,1,3,3,3,3,3,2,0,3,2,1,0,3,1,1,0,0,0,0,0,0,0,
|
||||
3,1,1,1,1,3,3,1,3,1,0,1,2,0,2,3,1,0,1,2,1,3,1,0,3,0,0,0,0,0,
|
||||
3,2,2,3,2,3,3,3,3,2,1,3,2,0,0,3,0,2,3,0,3,0,0,2,1,1,1,0,0,1,
|
||||
0,3,2,2,2,0,1,2,0,1,1,1,1,1,2,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,1,2,2,2,3,3,1,3,1,0,1,1,0,1,1,0,1,2,0,1,0,0,0,0,2,1,0,0,0,
|
||||
3,0,2,1,1,2,2,1,3,0,1,0,1,0,1,2,0,1,1,0,1,0,1,0,0,1,2,0,0,0,
|
||||
2,0,1,1,1,2,2,1,2,0,0,1,1,0,1,1,1,0,1,0,1,0,0,1,0,0,0,1,0,0,
|
||||
2,2,3,2,2,1,1,1,0,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,
|
||||
0,1,0,0,0,2,1,0,1,0,0,0,0,1,0,3,0,0,0,0,0,0,0,1,0,0,0,0,0,0,
|
||||
};
|
||||
|
||||
|
||||
@ -286,38 +207,52 @@ const SequenceModel Ibm865NorwegianModel =
|
||||
{
|
||||
Ibm865_CharToOrderMap,
|
||||
NorwegianLangModel,
|
||||
62,
|
||||
(float)0.9975864274305254,
|
||||
30,
|
||||
(float)0.9990005236777137,
|
||||
PR_TRUE,
|
||||
"IBM865"
|
||||
"IBM865",
|
||||
"no"
|
||||
};
|
||||
|
||||
const SequenceModel Iso_8859_15NorwegianModel =
|
||||
{
|
||||
Iso_8859_15_CharToOrderMap,
|
||||
NorwegianLangModel,
|
||||
62,
|
||||
(float)0.9975864274305254,
|
||||
30,
|
||||
(float)0.9990005236777137,
|
||||
PR_TRUE,
|
||||
"ISO-8859-15"
|
||||
"ISO-8859-15",
|
||||
"no"
|
||||
};
|
||||
|
||||
const SequenceModel Iso_8859_1NorwegianModel =
|
||||
{
|
||||
Iso_8859_1_CharToOrderMap,
|
||||
NorwegianLangModel,
|
||||
62,
|
||||
(float)0.9975864274305254,
|
||||
30,
|
||||
(float)0.9990005236777137,
|
||||
PR_TRUE,
|
||||
"ISO-8859-1"
|
||||
"ISO-8859-1",
|
||||
"no"
|
||||
};
|
||||
|
||||
const SequenceModel Windows_1252NorwegianModel =
|
||||
{
|
||||
Windows_1252_CharToOrderMap,
|
||||
NorwegianLangModel,
|
||||
62,
|
||||
(float)0.9975864274305254,
|
||||
30,
|
||||
(float)0.9990005236777137,
|
||||
PR_TRUE,
|
||||
"WINDOWS-1252"
|
||||
"WINDOWS-1252",
|
||||
"no"
|
||||
};
|
||||
|
||||
const LanguageModel NorwegianModel =
|
||||
{
|
||||
"no",
|
||||
Unicode_CharOrder,
|
||||
60,
|
||||
NorwegianLangModel,
|
||||
30,
|
||||
(float)0.9995751776807141,
|
||||
};
|
||||
|
||||
@ -131,6 +131,7 @@ extern const LanguageModel ItalianModel;
|
||||
extern const LanguageModel LatvianModel;
|
||||
extern const LanguageModel LithuanianModel;
|
||||
extern const LanguageModel MalteseModel;
|
||||
extern const LanguageModel NorwegianModel;
|
||||
extern const LanguageModel PolishModel;
|
||||
extern const LanguageModel PortugueseModel;
|
||||
extern const LanguageModel RomanianModel;
|
||||
|
||||
@ -111,6 +111,7 @@ nsMBCSGroupProber::nsMBCSGroupProber(PRUint32 aLanguageFilter)
|
||||
langDetectors[i][j++] = new nsLanguageDetector(&LatvianModel);
|
||||
langDetectors[i][j++] = new nsLanguageDetector(&LithuanianModel);
|
||||
langDetectors[i][j++] = new nsLanguageDetector(&MalteseModel);
|
||||
langDetectors[i][j++] = new nsLanguageDetector(&NorwegianModel);
|
||||
langDetectors[i][j++] = new nsLanguageDetector(&PolishModel);
|
||||
langDetectors[i][j++] = new nsLanguageDetector(&PortugueseModel);
|
||||
langDetectors[i][j++] = new nsLanguageDetector(&RomanianModel);
|
||||
|
||||
@ -49,7 +49,7 @@
|
||||
#include "nsEUCTWProber.h"
|
||||
|
||||
#define NUM_OF_PROBERS 8
|
||||
#define NUM_OF_LANGUAGES 30
|
||||
#define NUM_OF_LANGUAGES 31
|
||||
|
||||
class nsMBCSGroupProber: public nsCharSetProber {
|
||||
public:
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user