mirror of
https://gitlab.freedesktop.org/uchardet/uchardet.git
synced 2025-12-06 08:46:40 +08:00
LangModels: add VISCII encoding support and retrain Vietnamese model.
This commit is contained in:
parent
600cf76a76
commit
98b5e52252
@ -1,7 +1,7 @@
|
||||
= Logs of language model for Vietnamese (vi) =
|
||||
|
||||
- Generated by BuildLangModel.py
|
||||
- Started: 2016-02-13 02:13:44.503931
|
||||
- Started: 2016-02-13 03:37:17.480303
|
||||
- Maximum depth: 3
|
||||
- Max number of pages: 40
|
||||
|
||||
@ -49,51 +49,73 @@ D (revision 21447691)
|
||||
|
||||
== End of Parsed pages ==
|
||||
|
||||
- Wikipedia parsing ended at: 2016-02-13 02:16:03.731928
|
||||
- Wikipedia parsing ended at: 2016-02-13 03:42:06.560479
|
||||
|
||||
49 characters appeared 190798 times.
|
||||
101 characters appeared 222814 times.
|
||||
|
||||
First 33 characters:
|
||||
[ 0] Char n: 13.15212947724819 %
|
||||
[ 1] Char h: 10.371702009455026 %
|
||||
[ 2] Char t: 8.20134382959989 %
|
||||
[ 3] Char c: 7.433516074591977 %
|
||||
[ 4] Char i: 7.238545477415906 %
|
||||
[ 5] Char g: 6.529418547364228 %
|
||||
[ 6] Char a: 4.203922472981897 %
|
||||
[ 7] Char u: 3.328127129215191 %
|
||||
[ 8] Char m: 3.0540152412499086 %
|
||||
[ 9] Char o: 3.037767691485236 %
|
||||
[10] Char đ: 2.5948909317707733 %
|
||||
[11] Char r: 2.4643864191448546 %
|
||||
[12] Char à: 2.3878657008983324 %
|
||||
[13] Char v: 2.269939936477322 %
|
||||
[14] Char l: 2.2327278063711358 %
|
||||
[15] Char á: 2.0482394993658217 %
|
||||
[16] Char p: 1.9214037882996675 %
|
||||
[17] Char b: 1.7998092223188922 %
|
||||
[18] Char ư: 1.6813593433893437 %
|
||||
[19] Char s: 1.6069350831769726 %
|
||||
[20] Char y: 1.4952986928584158 %
|
||||
[21] Char e: 1.4544177611924654 %
|
||||
[22] Char d: 1.3139550729043281 %
|
||||
[23] Char k: 1.2489648738456378 %
|
||||
[24] Char â: 1.1278944223734 %
|
||||
[25] Char ê: 0.977997672931582 %
|
||||
[26] Char ô: 0.8260044654556128 %
|
||||
[27] Char ó: 0.7091269300516777 %
|
||||
[28] Char q: 0.60011111227581 %
|
||||
[29] Char ơ: 0.4192916068302603 %
|
||||
[30] Char í: 0.4166710342875712 %
|
||||
[31] Char ă: 0.37998301868992335 %
|
||||
[32] Char x: 0.34329500309227556 %
|
||||
First 55 characters:
|
||||
[ 0] Char n: 11.262308472537633 %
|
||||
[ 1] Char h: 8.881398834902654 %
|
||||
[ 2] Char t: 7.022898022565907 %
|
||||
[ 3] Char c: 6.365398942615815 %
|
||||
[ 4] Char i: 6.198443544840091 %
|
||||
[ 5] Char g: 5.591210606155808 %
|
||||
[ 6] Char a: 3.5998635633308496 %
|
||||
[ 7] Char u: 2.8499106878382867 %
|
||||
[ 8] Char m: 2.615185760320267 %
|
||||
[ 9] Char o: 2.6012728105056238 %
|
||||
[10] Char đ: 2.222032726848403 %
|
||||
[11] Char r: 2.1102803234985235 %
|
||||
[12] Char à: 2.0447548179198796 %
|
||||
[13] Char v: 1.9437737305555307 %
|
||||
[14] Char l: 1.9119085874316697 %
|
||||
[15] Char á: 1.7539292863105551 %
|
||||
[16] Char p: 1.6453185167897888 %
|
||||
[17] Char b: 1.541195795596327 %
|
||||
[18] Char ư: 1.4397659033992478 %
|
||||
[19] Char s: 1.3760356171515256 %
|
||||
[20] Char y: 1.280440187779942 %
|
||||
[21] Char e: 1.2454334108269678 %
|
||||
[22] Char d: 1.1251537156552103 %
|
||||
[23] Char ế: 1.071745940560288 %
|
||||
[24] Char k: 1.0695019163966357 %
|
||||
[25] Char â: 0.9658280000359044 %
|
||||
[26] Char ữ: 0.9604423420431392 %
|
||||
[27] Char ê: 0.8374698178749989 %
|
||||
[28] Char ệ: 0.7459136319979893 %
|
||||
[29] Char ô: 0.7073164163831717 %
|
||||
[30] Char ạ: 0.6727584442629277 %
|
||||
[31] Char ộ: 0.6705144200992756 %
|
||||
[32] Char ố: 0.6476253736300233 %
|
||||
[33] Char ó: 0.6072329386842837 %
|
||||
[34] Char ả: 0.5484395055965963 %
|
||||
[35] Char ủ: 0.5475418959311353 %
|
||||
[36] Char q: 0.5138815334763525 %
|
||||
[37] Char ợ: 0.48560682901433483 %
|
||||
[38] Char ờ: 0.4851580241816044 %
|
||||
[39] Char ể: 0.4748355130288043 %
|
||||
[40] Char ớ: 0.4676546357051173 %
|
||||
[41] Char ấ: 0.418286104104769 %
|
||||
[42] Char ị: 0.40212913012647317 %
|
||||
[43] Char ầ: 0.3904602044754818 %
|
||||
[44] Char ọ: 0.3801376933226817 %
|
||||
[45] Char ề: 0.3787912788244904 %
|
||||
[46] Char ơ: 0.3590438661843511 %
|
||||
[47] Char í: 0.35679984202069887 %
|
||||
[48] Char ụ: 0.35276059852612496 %
|
||||
[49] Char ậ: 0.3469261357006292 %
|
||||
[50] Char ì: 0.32762752789322036 %
|
||||
[51] Char ă: 0.3253835037295682 %
|
||||
[52] Char ứ: 0.29665999443482005 %
|
||||
[53] Char ồ: 0.29665999443482005 %
|
||||
[54] Char x: 0.2939671654384374 %
|
||||
|
||||
The first 33 characters have an accumulated ratio of 0.9887105734860954.
|
||||
The first 55 characters have an accumulated ratio of 0.9603301408349568.
|
||||
|
||||
852 sequences found.
|
||||
1494 sequences found.
|
||||
|
||||
First 512 (typical positive ratio): 0.990048941203513
|
||||
Next 512 (512-1024): 1.0482290170756506e-05
|
||||
Rest: -1.5612511283791264e-17
|
||||
First 512 (typical positive ratio): 0.9321889118082535
|
||||
Next 512 (512-1024): 0.009604423420431392
|
||||
Rest: 0.0068905733918831966
|
||||
|
||||
- Processing end: 2016-02-13 02:16:03.877897
|
||||
- Processing end: 2016-02-13 03:42:07.174723
|
||||
|
||||
@ -54,7 +54,7 @@ use_ascii = False
|
||||
# Quoted-Readable (VIQR) or VSCII Mnemonic (VSCII-MNEM),[4] though usage of either
|
||||
# variable-width scheme has declined dramatically following the adoption of
|
||||
# Unicode on the World Wide Web.
|
||||
charsets = ['WINDOWS-1258'] # TODO: add 'VISCII'
|
||||
charsets = ['WINDOWS-1258', 'VISCII']
|
||||
|
||||
## Optional Properties ##
|
||||
|
||||
|
||||
@ -41,7 +41,7 @@
|
||||
|
||||
/**
|
||||
* Generated by BuildLangModel.py
|
||||
* On: 2016-02-13 02:16:03.733608
|
||||
* On: 2016-02-13 03:42:06.561440
|
||||
**/
|
||||
|
||||
/* Character Mapping Table:
|
||||
@ -67,64 +67,162 @@ static const unsigned char Windows_1258_CharToOrderMap[] =
|
||||
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
|
||||
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
|
||||
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
|
||||
SYM, 6, 17, 3, 22, 21, 35, 5, 1, 4, 38, 23, 14, 8, 0, 9, /* 4X */
|
||||
16, 28, 11, 19, 2, 7, 13, 37, 32, 20, 39,SYM,SYM,SYM,SYM,SYM, /* 5X */
|
||||
SYM, 6, 17, 3, 22, 21, 35, 5, 1, 4, 38, 23, 14, 8, 0, 9, /* 6X */
|
||||
16, 28, 11, 19, 2, 7, 13, 37, 32, 20, 39,SYM,SYM,SYM,SYM,CTR, /* 7X */
|
||||
SYM,ILL,SYM, 49,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM, 47,ILL,ILL,ILL, /* 8X */
|
||||
ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM, 47,ILL,ILL, 50, /* 9X */
|
||||
SYM, 6, 17, 3, 22, 21, 66, 5, 1, 4, 75, 24, 14, 8, 0, 9, /* 4X */
|
||||
16, 36, 11, 19, 2, 7, 13, 69, 54, 20, 82,SYM,SYM,SYM,SYM,SYM, /* 5X */
|
||||
SYM, 6, 17, 3, 22, 21, 66, 5, 1, 4, 75, 24, 14, 8, 0, 9, /* 6X */
|
||||
16, 36, 11, 19, 2, 7, 13, 69, 54, 20, 82,SYM,SYM,SYM,SYM,CTR, /* 7X */
|
||||
SYM,ILL,SYM,101,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM,100,ILL,ILL,ILL, /* 8X */
|
||||
ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM,100,ILL,ILL,102, /* 9X */
|
||||
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
|
||||
SYM,SYM,SYM,SYM,SYM, 51,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
|
||||
12, 15, 24, 31, 45, 52, 46, 41, 40, 34, 25, 53,SYM, 30, 54, 55, /* CX */
|
||||
10, 56,SYM, 27, 26, 29, 42,SYM, 43, 33, 36, 57, 44, 18,SYM, 48, /* DX */
|
||||
12, 15, 24, 31, 45, 58, 46, 41, 40, 34, 25, 59,SYM, 30, 60, 61, /* EX */
|
||||
10, 62,SYM, 27, 26, 29, 42,SYM, 43, 33, 36, 63, 44, 18, 64, 65, /* FX */
|
||||
SYM,SYM,SYM,SYM,SYM,103,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
|
||||
12, 15, 25, 51, 97,104, 98, 91, 90, 62, 27,105,SYM, 47,106,107, /* CX */
|
||||
10,108,SYM, 33, 29, 46, 93,SYM, 94, 58, 67,109, 96, 18,SYM, 99, /* DX */
|
||||
12, 15, 25, 51, 97,110, 98, 91, 90, 62, 27,111,SYM, 47,112,113, /* EX */
|
||||
10,114,SYM, 33, 29, 46, 93,SYM, 94, 58, 67,115, 96, 18,116,117, /* FX */
|
||||
};
|
||||
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
|
||||
|
||||
static const unsigned char Viscii_CharToOrderMap[] =
|
||||
{
|
||||
CTR,CTR, 88,CTR,CTR, 95, 77,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
|
||||
CTR,CTR,CTR,CTR, 80,CTR,CTR,CTR,CTR, 79,CTR,CTR,CTR,CTR, 92,CTR, /* 1X */
|
||||
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
|
||||
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
|
||||
SYM, 6, 17, 3, 22, 21, 66, 5, 1, 4, 75, 24, 14, 8, 0, 9, /* 4X */
|
||||
16, 36, 11, 19, 2, 7, 13, 69, 54, 20, 82,SYM,SYM,SYM,SYM,SYM, /* 5X */
|
||||
SYM, 6, 17, 3, 22, 21, 66, 5, 1, 4, 75, 24, 14, 8, 0, 9, /* 6X */
|
||||
16, 36, 11, 19, 2, 7, 13, 69, 54, 20, 82,SYM,SYM,SYM,SYM,CTR, /* 7X */
|
||||
30, 57, 71, 65, 41, 43, 78, 49, 83, 89, 23, 45, 39, 74, 28, 32, /* 8X */
|
||||
53, 60, 84, 31, 37, 40, 38, 59, 42, 81, 44, 73, 35, 72, 48, 76, /* 9X */
|
||||
86, 57, 71, 65, 41, 43, 78, 49, 83, 89, 23, 45, 39, 74, 28, 32, /* AX */
|
||||
53, 60, 84, 87, 46, 31, 38, 59, 42, 56, 52, 55, 70, 46, 40, 18, /* BX */
|
||||
12, 15, 25, 61, 34, 51, 88, 95, 90, 62, 27, 85, 50, 47, 64, 76, /* CX */
|
||||
10, 52, 63, 33, 29, 30, 80, 55, 70, 58, 67, 79, 92, 68, 87, 18, /* DX */
|
||||
12, 15, 25, 61, 34, 51, 26, 77, 90, 62, 27, 85, 50, 47, 64, 73, /* EX */
|
||||
10, 56, 63, 33, 29, 86, 81, 44, 48, 58, 67, 72, 35, 68, 37, 26, /* FX */
|
||||
};
|
||||
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
|
||||
|
||||
|
||||
/* Model Table:
|
||||
* Total sequences: 852
|
||||
* First 512 sequences: 0.990048941203513
|
||||
* Next 512 sequences (512-1024): 0.00995105879648696
|
||||
* Rest: -1.5612511283791264e-17
|
||||
* Total sequences: 1494
|
||||
* First 512 sequences: 0.9321889118082535
|
||||
* Next 512 sequences (512-1024): 0.06092051479986333
|
||||
* Rest: 0.0068905733918831966
|
||||
* Negative sequences: TODO
|
||||
*/
|
||||
static const PRUint8 VietnameseLangModel[] =
|
||||
{
|
||||
3,3,3,3,3,3,3,3,3,3,0,3,3,2,2,2,2,2,3,3,3,3,3,3,2,3,3,3,2,3,2,3,3,
|
||||
3,2,3,0,3,0,3,3,3,3,0,3,3,0,2,3,0,0,3,2,3,3,2,2,3,3,3,3,0,3,3,3,0,
|
||||
3,3,3,3,3,2,3,3,2,3,2,3,3,3,3,3,3,0,3,3,3,3,2,0,3,3,3,3,0,2,3,3,0,
|
||||
3,3,3,3,3,0,3,3,2,3,0,3,3,2,3,3,2,2,3,2,3,3,2,3,3,0,3,3,2,3,0,3,2,
|
||||
3,2,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,0,3,2,3,3,3,0,3,2,2,3,0,0,2,3,
|
||||
3,3,3,2,3,2,3,3,2,3,0,3,3,0,3,2,0,2,3,2,2,3,2,2,2,0,3,3,0,0,0,2,0,
|
||||
3,3,3,3,3,3,2,3,3,3,0,3,0,3,3,0,3,3,0,3,3,3,3,3,0,0,0,0,2,0,2,0,3,
|
||||
3,2,3,3,3,3,3,2,3,2,0,3,2,2,3,3,3,3,0,3,3,3,3,2,3,3,3,0,0,2,2,2,2,
|
||||
3,2,0,3,3,0,3,3,3,3,0,0,3,0,3,3,3,3,3,3,2,3,2,0,2,0,3,3,0,0,0,2,2,
|
||||
3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,3,0,3,3,3,3,3,0,0,0,2,0,0,0,3,2,
|
||||
0,0,0,0,3,0,3,3,0,3,0,2,3,0,0,3,0,2,3,0,0,2,0,0,3,3,3,3,0,3,2,3,0,
|
||||
3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,2,3,3,3,3,3,3,3,2,3,3,0,0,2,3,3,0,
|
||||
3,0,0,0,3,0,0,3,3,3,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,0,0,0,3,0,3,3,0,3,0,2,3,0,2,2,0,0,3,2,2,3,2,0,3,2,3,0,0,0,3,3,0,
|
||||
0,3,3,2,3,3,3,3,3,3,0,0,3,2,3,3,3,3,3,3,3,3,3,2,3,3,3,2,0,2,3,3,2,
|
||||
3,0,3,3,3,2,0,3,3,3,0,2,0,0,2,0,3,0,0,2,3,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,3,0,3,0,3,3,2,3,0,3,0,0,3,0,3,2,0,2,3,3,2,0,0,2,0,2,2,0,2,0,0,
|
||||
3,2,0,3,3,2,3,3,0,3,0,3,3,0,3,3,0,3,2,2,3,3,2,2,0,3,0,3,0,0,3,0,0,
|
||||
3,0,2,2,2,0,3,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,
|
||||
2,3,3,3,3,2,3,3,3,3,0,2,3,0,3,3,3,3,3,3,3,3,3,3,3,0,3,3,2,3,0,2,0,
|
||||
3,0,2,2,2,0,3,2,2,3,0,3,0,0,2,2,2,2,0,3,0,3,2,2,0,3,0,0,0,0,0,0,0,
|
||||
3,2,3,3,3,3,3,3,3,3,0,3,0,3,3,2,3,3,0,3,3,3,3,3,0,0,0,0,2,0,0,0,3,
|
||||
2,2,2,2,3,2,3,3,0,3,0,3,3,2,2,2,0,0,3,2,3,3,3,2,3,2,0,2,0,2,0,2,0,
|
||||
2,3,2,0,3,2,3,3,0,3,0,2,0,2,3,0,0,2,0,2,3,3,0,2,0,3,2,0,0,0,3,0,2,
|
||||
3,0,2,2,0,0,0,3,3,0,0,0,0,0,2,0,2,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,0,3,2,0,0,0,3,3,0,2,2,0,0,0,0,2,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,
|
||||
3,0,3,2,3,0,0,0,3,0,0,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,3,0,0,0,0,0,0,
|
||||
3,0,2,3,3,0,3,0,3,0,0,2,0,2,0,0,3,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,3,0,0,0,3,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,0,2,2,3,0,0,2,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,0,3,3,0,0,3,2,2,0,0,0,0,0,2,0,0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,0,2,2,2,0,0,0,3,0,0,2,0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,
|
||||
2,2,2,2,3,0,3,3,0,2,0,0,0,2,0,3,2,2,3,0,2,3,0,0,3,3,3,2,0,2,2,2,3,
|
||||
3,3,3,3,3,3,3,2,2,3,0,2,3,1,1,1,1,2,3,3,2,3,3,3,2,1,2,
|
||||
3,0,3,2,2,2,3,1,0,1,1,2,0,0,1,0,1,0,2,2,1,0,0,0,3,0,0,2,
|
||||
2,1,2,0,3,0,3,3,2,3,0,2,3,0,2,3,0,0,3,1,3,3,1,3,1,3,3,
|
||||
3,3,3,3,3,3,3,3,3,0,3,3,3,2,3,3,3,3,2,3,3,3,3,3,2,3,2,0,
|
||||
2,3,2,2,3,1,3,3,1,3,1,3,3,2,2,3,2,0,3,2,2,3,1,3,0,3,0,
|
||||
3,1,3,3,3,3,2,3,2,0,0,2,1,2,2,2,2,0,0,1,3,2,3,2,2,2,2,0,
|
||||
2,3,2,2,3,0,3,3,2,3,0,2,2,1,2,3,1,1,2,2,2,3,1,0,2,2,0,
|
||||
0,0,3,2,3,2,3,3,3,1,1,2,0,0,2,0,3,0,0,2,0,2,2,0,2,3,1,1,
|
||||
3,1,3,3,3,3,3,2,3,3,1,3,2,2,3,3,2,2,0,3,1,3,3,3,2,0,3,
|
||||
3,3,1,0,0,3,1,3,0,2,0,2,3,3,2,0,0,2,3,0,0,0,1,0,1,0,0,2,
|
||||
2,3,2,2,3,1,3,3,1,3,0,3,3,0,2,2,0,1,3,2,2,3,1,1,1,2,3,
|
||||
0,0,3,3,1,2,2,0,1,0,2,2,0,0,1,1,3,3,0,0,0,1,1,2,1,0,3,0,
|
||||
3,2,3,3,3,2,2,3,3,3,0,3,0,2,3,0,2,3,0,3,3,2,3,0,2,0,0,
|
||||
0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,2,
|
||||
3,1,3,2,3,2,3,1,3,2,0,3,1,2,3,2,2,2,0,3,3,3,2,2,2,3,0,
|
||||
2,1,3,1,3,3,0,2,0,0,0,1,0,1,3,0,3,0,0,2,2,0,3,0,2,0,3,1,
|
||||
2,1,0,2,3,0,3,3,2,3,0,0,3,0,2,3,2,2,3,2,2,3,2,0,0,1,0,
|
||||
0,2,3,3,3,2,2,1,0,0,0,2,0,3,3,0,1,2,2,0,0,3,2,2,1,2,1,1,
|
||||
3,2,3,2,3,2,3,3,3,2,0,3,3,2,3,3,2,3,0,3,2,2,3,0,2,0,0,
|
||||
0,0,0,3,0,0,1,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,2,
|
||||
0,0,0,0,3,0,3,2,0,3,0,1,3,0,0,3,0,1,3,0,0,1,0,3,0,3,0,
|
||||
2,3,3,3,3,3,3,3,2,0,1,3,3,1,3,3,3,3,3,2,2,0,1,2,2,3,3,0,
|
||||
3,2,3,2,3,2,3,3,2,3,0,3,2,2,3,2,1,2,3,3,3,3,3,0,2,1,2,
|
||||
3,1,2,2,3,2,0,2,0,0,2,2,1,0,3,3,2,3,0,1,2,2,2,3,3,1,2,0,
|
||||
3,0,0,0,3,0,0,2,3,3,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,0,0,0,3,0,3,3,0,2,0,1,3,0,1,1,0,0,2,1,1,3,1,1,0,2,1,
|
||||
2,1,2,1,0,1,0,0,0,0,2,1,0,3,2,3,3,1,3,0,3,2,3,3,3,0,0,0,
|
||||
0,2,2,1,3,2,3,3,2,3,0,0,3,2,3,2,2,2,3,2,2,3,2,1,1,2,1,
|
||||
3,2,2,3,3,2,1,0,0,0,3,2,0,3,2,3,2,1,0,1,2,2,3,0,2,0,0,1,
|
||||
3,0,3,3,3,1,0,2,3,3,0,1,0,0,1,0,3,0,0,1,3,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,3,2,0,3,0,3,2,1,3,0,3,0,0,2,0,2,1,0,2,2,3,1,0,0,0,0,
|
||||
2,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
|
||||
2,1,0,2,3,1,3,3,0,3,0,3,3,0,3,3,0,3,1,2,2,3,1,1,1,0,0,
|
||||
2,1,0,2,3,3,2,3,0,0,0,1,0,2,2,3,2,0,1,0,2,1,2,3,0,2,3,0,
|
||||
3,0,1,1,2,0,3,3,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,3,3,0,3,0,0,0,0,0,3,0,0,0,0,0,0,0,0,
|
||||
1,3,3,3,3,1,3,3,2,3,0,1,2,0,2,3,2,2,2,3,2,3,2,0,2,2,0,
|
||||
0,0,2,1,0,3,2,2,0,1,1,1,1,1,1,0,0,0,0,2,0,1,0,0,1,2,1,0,
|
||||
2,0,1,2,1,0,2,2,1,2,0,2,0,0,1,1,2,1,0,2,0,2,1,3,1,0,0,
|
||||
3,2,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,
|
||||
3,2,3,2,2,2,3,2,3,3,0,3,0,2,3,1,2,2,0,3,2,3,3,0,2,0,0,
|
||||
0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,
|
||||
1,1,1,2,3,1,3,3,0,3,0,3,3,1,2,1,0,0,3,2,2,3,2,0,1,3,1,
|
||||
1,0,0,3,1,1,1,0,0,0,0,1,0,0,3,3,2,1,0,1,0,3,2,1,1,2,1,0,
|
||||
3,0,3,2,0,0,0,3,3,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,3,1,0,3,1,3,2,0,2,0,2,0,1,2,0,0,1,0,2,2,2,0,3,1,0,0,
|
||||
2,0,1,0,0,0,0,0,0,0,0,0,2,0,0,2,0,0,0,0,3,0,0,2,0,0,0,1,
|
||||
3,0,1,1,0,0,0,3,3,0,0,0,0,0,1,0,1,0,0,0,3,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,0,0,0,0,0,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,0,2,1,0,0,0,3,3,0,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,
|
||||
0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,0,3,3,0,0,0,3,3,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,0,2,2,3,0,0,0,3,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,
|
||||
0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,0,3,3,3,0,0,0,2,3,0,0,0,0,0,0,3,0,0,0,2,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,0,3,3,3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,0,3,3,3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,0,1,2,3,0,3,0,2,0,0,1,0,1,0,0,2,0,0,0,0,0,0,0,1,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,0,0,0,3,0,0,0,2,3,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
2,0,0,0,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,3,0,0,0,2,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,0,2,3,3,0,0,2,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,0,0,0,0,0,0,3,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,0,2,3,3,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,0,3,0,0,0,0,3,3,0,0,0,0,0,0,0,3,0,0,0,3,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,0,1,3,0,0,3,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,0,0,0,1,0,0,3,2,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,0,1,3,3,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,0,0,0,0,0,0,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,0,1,1,3,0,0,2,2,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,0,2,3,0,0,2,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,0,1,3,1,0,0,0,1,0,0,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,0,3,1,0,0,0,2,2,0,0,0,0,0,0,0,3,0,0,0,2,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,0,0,0,0,0,1,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,0,1,1,1,0,0,0,3,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,0,2,3,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
3,0,0,0,2,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||
1,1,2,1,2,0,3,3,0,1,0,0,0,2,0,3,1,2,2,0,1,3,0,2,0,2,0,
|
||||
2,0,2,1,1,0,1,2,0,0,0,1,0,0,1,0,0,0,0,1,2,0,0,1,1,2,0,2,
|
||||
};
|
||||
|
||||
|
||||
@ -132,8 +230,18 @@ const SequenceModel Windows_1258VietnameseModel =
|
||||
{
|
||||
Windows_1258_CharToOrderMap,
|
||||
VietnameseLangModel,
|
||||
33,
|
||||
(float)0.990048941203513,
|
||||
55,
|
||||
(float)0.9321889118082535,
|
||||
PR_FALSE,
|
||||
"WINDOWS-1258"
|
||||
};
|
||||
|
||||
const SequenceModel VisciiVietnameseModel =
|
||||
{
|
||||
Viscii_CharToOrderMap,
|
||||
VietnameseLangModel,
|
||||
55,
|
||||
(float)0.9321889118082535,
|
||||
PR_FALSE,
|
||||
"VISCII"
|
||||
};
|
||||
@ -104,7 +104,8 @@ nsSBCSGroupProber::nsSBCSGroupProber()
|
||||
mProbers[28] = new nsSingleByteCharSetProber(&Iso_8859_6ArabicModel);
|
||||
mProbers[29] = new nsSingleByteCharSetProber(&Windows_1256ArabicModel);
|
||||
|
||||
mProbers[30] = new nsSingleByteCharSetProber(&Windows_1258VietnameseModel);
|
||||
mProbers[30] = new nsSingleByteCharSetProber(&VisciiVietnameseModel);
|
||||
mProbers[31] = new nsSingleByteCharSetProber(&Windows_1258VietnameseModel);
|
||||
|
||||
Reset();
|
||||
}
|
||||
|
||||
@ -40,7 +40,7 @@
|
||||
#define nsSBCSGroupProber_h__
|
||||
|
||||
|
||||
#define NUM_OF_SBCS_PROBERS 31
|
||||
#define NUM_OF_SBCS_PROBERS 32
|
||||
|
||||
class nsCharSetProber;
|
||||
class nsSBCSGroupProber: public nsCharSetProber {
|
||||
|
||||
@ -162,6 +162,7 @@ extern const SequenceModel Iso_8859_3EsperantoModel;
|
||||
extern const SequenceModel Iso_8859_3TurkishModel;
|
||||
extern const SequenceModel Iso_8859_9TurkishModel;
|
||||
|
||||
extern const SequenceModel VisciiVietnameseModel;
|
||||
extern const SequenceModel Windows_1258VietnameseModel;
|
||||
|
||||
#endif /* nsSingleByteCharSetProber_h__ */
|
||||
|
||||
4
test/vi/viscii.txt
Normal file
4
test/vi/viscii.txt
Normal file
@ -0,0 +1,4 @@
|
||||
Chæ Qu¯c ngæ là h® chæ viªt th¯ng nh¤t chính thÑc hi®n nay cüa tiªng Vi®t, sØ
|
||||
døng ký tñ La Tinh, dña trên các bäng chæ cái cüa nhóm ngôn ngæ Rôman,[1] ð£c
|
||||
bi®t là bäng chæ cái B° Ðào Nha,[2] v¾i các d¤u phø chü yªu t× bäng chæ cái Hy
|
||||
LÕp.
|
||||
Loading…
x
Reference in New Issue
Block a user