mirror of
https://gitlab.freedesktop.org/uchardet/uchardet.git
synced 2025-12-06 16:56:40 +08:00
LangModels: add VISCII encoding support and retrain Vietnamese model.
This commit is contained in:
parent
600cf76a76
commit
98b5e52252
@ -1,7 +1,7 @@
|
|||||||
= Logs of language model for Vietnamese (vi) =
|
= Logs of language model for Vietnamese (vi) =
|
||||||
|
|
||||||
- Generated by BuildLangModel.py
|
- Generated by BuildLangModel.py
|
||||||
- Started: 2016-02-13 02:13:44.503931
|
- Started: 2016-02-13 03:37:17.480303
|
||||||
- Maximum depth: 3
|
- Maximum depth: 3
|
||||||
- Max number of pages: 40
|
- Max number of pages: 40
|
||||||
|
|
||||||
@ -49,51 +49,73 @@ D (revision 21447691)
|
|||||||
|
|
||||||
== End of Parsed pages ==
|
== End of Parsed pages ==
|
||||||
|
|
||||||
- Wikipedia parsing ended at: 2016-02-13 02:16:03.731928
|
- Wikipedia parsing ended at: 2016-02-13 03:42:06.560479
|
||||||
|
|
||||||
49 characters appeared 190798 times.
|
101 characters appeared 222814 times.
|
||||||
|
|
||||||
First 33 characters:
|
First 55 characters:
|
||||||
[ 0] Char n: 13.15212947724819 %
|
[ 0] Char n: 11.262308472537633 %
|
||||||
[ 1] Char h: 10.371702009455026 %
|
[ 1] Char h: 8.881398834902654 %
|
||||||
[ 2] Char t: 8.20134382959989 %
|
[ 2] Char t: 7.022898022565907 %
|
||||||
[ 3] Char c: 7.433516074591977 %
|
[ 3] Char c: 6.365398942615815 %
|
||||||
[ 4] Char i: 7.238545477415906 %
|
[ 4] Char i: 6.198443544840091 %
|
||||||
[ 5] Char g: 6.529418547364228 %
|
[ 5] Char g: 5.591210606155808 %
|
||||||
[ 6] Char a: 4.203922472981897 %
|
[ 6] Char a: 3.5998635633308496 %
|
||||||
[ 7] Char u: 3.328127129215191 %
|
[ 7] Char u: 2.8499106878382867 %
|
||||||
[ 8] Char m: 3.0540152412499086 %
|
[ 8] Char m: 2.615185760320267 %
|
||||||
[ 9] Char o: 3.037767691485236 %
|
[ 9] Char o: 2.6012728105056238 %
|
||||||
[10] Char đ: 2.5948909317707733 %
|
[10] Char đ: 2.222032726848403 %
|
||||||
[11] Char r: 2.4643864191448546 %
|
[11] Char r: 2.1102803234985235 %
|
||||||
[12] Char à: 2.3878657008983324 %
|
[12] Char à: 2.0447548179198796 %
|
||||||
[13] Char v: 2.269939936477322 %
|
[13] Char v: 1.9437737305555307 %
|
||||||
[14] Char l: 2.2327278063711358 %
|
[14] Char l: 1.9119085874316697 %
|
||||||
[15] Char á: 2.0482394993658217 %
|
[15] Char á: 1.7539292863105551 %
|
||||||
[16] Char p: 1.9214037882996675 %
|
[16] Char p: 1.6453185167897888 %
|
||||||
[17] Char b: 1.7998092223188922 %
|
[17] Char b: 1.541195795596327 %
|
||||||
[18] Char ư: 1.6813593433893437 %
|
[18] Char ư: 1.4397659033992478 %
|
||||||
[19] Char s: 1.6069350831769726 %
|
[19] Char s: 1.3760356171515256 %
|
||||||
[20] Char y: 1.4952986928584158 %
|
[20] Char y: 1.280440187779942 %
|
||||||
[21] Char e: 1.4544177611924654 %
|
[21] Char e: 1.2454334108269678 %
|
||||||
[22] Char d: 1.3139550729043281 %
|
[22] Char d: 1.1251537156552103 %
|
||||||
[23] Char k: 1.2489648738456378 %
|
[23] Char ế: 1.071745940560288 %
|
||||||
[24] Char â: 1.1278944223734 %
|
[24] Char k: 1.0695019163966357 %
|
||||||
[25] Char ê: 0.977997672931582 %
|
[25] Char â: 0.9658280000359044 %
|
||||||
[26] Char ô: 0.8260044654556128 %
|
[26] Char ữ: 0.9604423420431392 %
|
||||||
[27] Char ó: 0.7091269300516777 %
|
[27] Char ê: 0.8374698178749989 %
|
||||||
[28] Char q: 0.60011111227581 %
|
[28] Char ệ: 0.7459136319979893 %
|
||||||
[29] Char ơ: 0.4192916068302603 %
|
[29] Char ô: 0.7073164163831717 %
|
||||||
[30] Char í: 0.4166710342875712 %
|
[30] Char ạ: 0.6727584442629277 %
|
||||||
[31] Char ă: 0.37998301868992335 %
|
[31] Char ộ: 0.6705144200992756 %
|
||||||
[32] Char x: 0.34329500309227556 %
|
[32] Char ố: 0.6476253736300233 %
|
||||||
|
[33] Char ó: 0.6072329386842837 %
|
||||||
|
[34] Char ả: 0.5484395055965963 %
|
||||||
|
[35] Char ủ: 0.5475418959311353 %
|
||||||
|
[36] Char q: 0.5138815334763525 %
|
||||||
|
[37] Char ợ: 0.48560682901433483 %
|
||||||
|
[38] Char ờ: 0.4851580241816044 %
|
||||||
|
[39] Char ể: 0.4748355130288043 %
|
||||||
|
[40] Char ớ: 0.4676546357051173 %
|
||||||
|
[41] Char ấ: 0.418286104104769 %
|
||||||
|
[42] Char ị: 0.40212913012647317 %
|
||||||
|
[43] Char ầ: 0.3904602044754818 %
|
||||||
|
[44] Char ọ: 0.3801376933226817 %
|
||||||
|
[45] Char ề: 0.3787912788244904 %
|
||||||
|
[46] Char ơ: 0.3590438661843511 %
|
||||||
|
[47] Char í: 0.35679984202069887 %
|
||||||
|
[48] Char ụ: 0.35276059852612496 %
|
||||||
|
[49] Char ậ: 0.3469261357006292 %
|
||||||
|
[50] Char ì: 0.32762752789322036 %
|
||||||
|
[51] Char ă: 0.3253835037295682 %
|
||||||
|
[52] Char ứ: 0.29665999443482005 %
|
||||||
|
[53] Char ồ: 0.29665999443482005 %
|
||||||
|
[54] Char x: 0.2939671654384374 %
|
||||||
|
|
||||||
The first 33 characters have an accumulated ratio of 0.9887105734860954.
|
The first 55 characters have an accumulated ratio of 0.9603301408349568.
|
||||||
|
|
||||||
852 sequences found.
|
1494 sequences found.
|
||||||
|
|
||||||
First 512 (typical positive ratio): 0.990048941203513
|
First 512 (typical positive ratio): 0.9321889118082535
|
||||||
Next 512 (512-1024): 1.0482290170756506e-05
|
Next 512 (512-1024): 0.009604423420431392
|
||||||
Rest: -1.5612511283791264e-17
|
Rest: 0.0068905733918831966
|
||||||
|
|
||||||
- Processing end: 2016-02-13 02:16:03.877897
|
- Processing end: 2016-02-13 03:42:07.174723
|
||||||
|
|||||||
@ -54,7 +54,7 @@ use_ascii = False
|
|||||||
# Quoted-Readable (VIQR) or VSCII Mnemonic (VSCII-MNEM),[4] though usage of either
|
# Quoted-Readable (VIQR) or VSCII Mnemonic (VSCII-MNEM),[4] though usage of either
|
||||||
# variable-width scheme has declined dramatically following the adoption of
|
# variable-width scheme has declined dramatically following the adoption of
|
||||||
# Unicode on the World Wide Web.
|
# Unicode on the World Wide Web.
|
||||||
charsets = ['WINDOWS-1258'] # TODO: add 'VISCII'
|
charsets = ['WINDOWS-1258', 'VISCII']
|
||||||
|
|
||||||
## Optional Properties ##
|
## Optional Properties ##
|
||||||
|
|
||||||
|
|||||||
@ -41,7 +41,7 @@
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Generated by BuildLangModel.py
|
* Generated by BuildLangModel.py
|
||||||
* On: 2016-02-13 02:16:03.733608
|
* On: 2016-02-13 03:42:06.561440
|
||||||
**/
|
**/
|
||||||
|
|
||||||
/* Character Mapping Table:
|
/* Character Mapping Table:
|
||||||
@ -67,64 +67,162 @@ static const unsigned char Windows_1258_CharToOrderMap[] =
|
|||||||
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
|
CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */
|
||||||
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
|
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
|
||||||
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
|
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
|
||||||
SYM, 6, 17, 3, 22, 21, 35, 5, 1, 4, 38, 23, 14, 8, 0, 9, /* 4X */
|
SYM, 6, 17, 3, 22, 21, 66, 5, 1, 4, 75, 24, 14, 8, 0, 9, /* 4X */
|
||||||
16, 28, 11, 19, 2, 7, 13, 37, 32, 20, 39,SYM,SYM,SYM,SYM,SYM, /* 5X */
|
16, 36, 11, 19, 2, 7, 13, 69, 54, 20, 82,SYM,SYM,SYM,SYM,SYM, /* 5X */
|
||||||
SYM, 6, 17, 3, 22, 21, 35, 5, 1, 4, 38, 23, 14, 8, 0, 9, /* 6X */
|
SYM, 6, 17, 3, 22, 21, 66, 5, 1, 4, 75, 24, 14, 8, 0, 9, /* 6X */
|
||||||
16, 28, 11, 19, 2, 7, 13, 37, 32, 20, 39,SYM,SYM,SYM,SYM,CTR, /* 7X */
|
16, 36, 11, 19, 2, 7, 13, 69, 54, 20, 82,SYM,SYM,SYM,SYM,CTR, /* 7X */
|
||||||
SYM,ILL,SYM, 49,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM, 47,ILL,ILL,ILL, /* 8X */
|
SYM,ILL,SYM,101,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM,100,ILL,ILL,ILL, /* 8X */
|
||||||
ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM, 47,ILL,ILL, 50, /* 9X */
|
ILL,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,ILL,SYM,100,ILL,ILL,102, /* 9X */
|
||||||
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
|
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* AX */
|
||||||
SYM,SYM,SYM,SYM,SYM, 51,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
|
SYM,SYM,SYM,SYM,SYM,103,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* BX */
|
||||||
12, 15, 24, 31, 45, 52, 46, 41, 40, 34, 25, 53,SYM, 30, 54, 55, /* CX */
|
12, 15, 25, 51, 97,104, 98, 91, 90, 62, 27,105,SYM, 47,106,107, /* CX */
|
||||||
10, 56,SYM, 27, 26, 29, 42,SYM, 43, 33, 36, 57, 44, 18,SYM, 48, /* DX */
|
10,108,SYM, 33, 29, 46, 93,SYM, 94, 58, 67,109, 96, 18,SYM, 99, /* DX */
|
||||||
12, 15, 24, 31, 45, 58, 46, 41, 40, 34, 25, 59,SYM, 30, 60, 61, /* EX */
|
12, 15, 25, 51, 97,110, 98, 91, 90, 62, 27,111,SYM, 47,112,113, /* EX */
|
||||||
10, 62,SYM, 27, 26, 29, 42,SYM, 43, 33, 36, 63, 44, 18, 64, 65, /* FX */
|
10,114,SYM, 33, 29, 46, 93,SYM, 94, 58, 67,115, 96, 18,116,117, /* FX */
|
||||||
|
};
|
||||||
|
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
|
||||||
|
|
||||||
|
static const unsigned char Viscii_CharToOrderMap[] =
|
||||||
|
{
|
||||||
|
CTR,CTR, 88,CTR,CTR, 95, 77,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */
|
||||||
|
CTR,CTR,CTR,CTR, 80,CTR,CTR,CTR,CTR, 79,CTR,CTR,CTR,CTR, 92,CTR, /* 1X */
|
||||||
|
SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM,SYM, /* 2X */
|
||||||
|
NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,NUM,SYM,SYM,SYM,SYM,SYM,SYM, /* 3X */
|
||||||
|
SYM, 6, 17, 3, 22, 21, 66, 5, 1, 4, 75, 24, 14, 8, 0, 9, /* 4X */
|
||||||
|
16, 36, 11, 19, 2, 7, 13, 69, 54, 20, 82,SYM,SYM,SYM,SYM,SYM, /* 5X */
|
||||||
|
SYM, 6, 17, 3, 22, 21, 66, 5, 1, 4, 75, 24, 14, 8, 0, 9, /* 6X */
|
||||||
|
16, 36, 11, 19, 2, 7, 13, 69, 54, 20, 82,SYM,SYM,SYM,SYM,CTR, /* 7X */
|
||||||
|
30, 57, 71, 65, 41, 43, 78, 49, 83, 89, 23, 45, 39, 74, 28, 32, /* 8X */
|
||||||
|
53, 60, 84, 31, 37, 40, 38, 59, 42, 81, 44, 73, 35, 72, 48, 76, /* 9X */
|
||||||
|
86, 57, 71, 65, 41, 43, 78, 49, 83, 89, 23, 45, 39, 74, 28, 32, /* AX */
|
||||||
|
53, 60, 84, 87, 46, 31, 38, 59, 42, 56, 52, 55, 70, 46, 40, 18, /* BX */
|
||||||
|
12, 15, 25, 61, 34, 51, 88, 95, 90, 62, 27, 85, 50, 47, 64, 76, /* CX */
|
||||||
|
10, 52, 63, 33, 29, 30, 80, 55, 70, 58, 67, 79, 92, 68, 87, 18, /* DX */
|
||||||
|
12, 15, 25, 61, 34, 51, 26, 77, 90, 62, 27, 85, 50, 47, 64, 73, /* EX */
|
||||||
|
10, 56, 63, 33, 29, 86, 81, 44, 48, 58, 67, 72, 35, 68, 37, 26, /* FX */
|
||||||
};
|
};
|
||||||
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
|
/*X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
|
||||||
|
|
||||||
|
|
||||||
/* Model Table:
|
/* Model Table:
|
||||||
* Total sequences: 852
|
* Total sequences: 1494
|
||||||
* First 512 sequences: 0.990048941203513
|
* First 512 sequences: 0.9321889118082535
|
||||||
* Next 512 sequences (512-1024): 0.00995105879648696
|
* Next 512 sequences (512-1024): 0.06092051479986333
|
||||||
* Rest: -1.5612511283791264e-17
|
* Rest: 0.0068905733918831966
|
||||||
* Negative sequences: TODO
|
* Negative sequences: TODO
|
||||||
*/
|
*/
|
||||||
static const PRUint8 VietnameseLangModel[] =
|
static const PRUint8 VietnameseLangModel[] =
|
||||||
{
|
{
|
||||||
3,3,3,3,3,3,3,3,3,3,0,3,3,2,2,2,2,2,3,3,3,3,3,3,2,3,3,3,2,3,2,3,3,
|
3,3,3,3,3,3,3,2,2,3,0,2,3,1,1,1,1,2,3,3,2,3,3,3,2,1,2,
|
||||||
3,2,3,0,3,0,3,3,3,3,0,3,3,0,2,3,0,0,3,2,3,3,2,2,3,3,3,3,0,3,3,3,0,
|
3,0,3,2,2,2,3,1,0,1,1,2,0,0,1,0,1,0,2,2,1,0,0,0,3,0,0,2,
|
||||||
3,3,3,3,3,2,3,3,2,3,2,3,3,3,3,3,3,0,3,3,3,3,2,0,3,3,3,3,0,2,3,3,0,
|
2,1,2,0,3,0,3,3,2,3,0,2,3,0,2,3,0,0,3,1,3,3,1,3,1,3,3,
|
||||||
3,3,3,3,3,0,3,3,2,3,0,3,3,2,3,3,2,2,3,2,3,3,2,3,3,0,3,3,2,3,0,3,2,
|
3,3,3,3,3,3,3,3,3,0,3,3,3,2,3,3,3,3,2,3,3,3,3,3,2,3,2,0,
|
||||||
3,2,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,0,3,2,3,3,3,0,3,2,2,3,0,0,2,3,
|
2,3,2,2,3,1,3,3,1,3,1,3,3,2,2,3,2,0,3,2,2,3,1,3,0,3,0,
|
||||||
3,3,3,2,3,2,3,3,2,3,0,3,3,0,3,2,0,2,3,2,2,3,2,2,2,0,3,3,0,0,0,2,0,
|
3,1,3,3,3,3,2,3,2,0,0,2,1,2,2,2,2,0,0,1,3,2,3,2,2,2,2,0,
|
||||||
3,3,3,3,3,3,2,3,3,3,0,3,0,3,3,0,3,3,0,3,3,3,3,3,0,0,0,0,2,0,2,0,3,
|
2,3,2,2,3,0,3,3,2,3,0,2,2,1,2,3,1,1,2,2,2,3,1,0,2,2,0,
|
||||||
3,2,3,3,3,3,3,2,3,2,0,3,2,2,3,3,3,3,0,3,3,3,3,2,3,3,3,0,0,2,2,2,2,
|
0,0,3,2,3,2,3,3,3,1,1,2,0,0,2,0,3,0,0,2,0,2,2,0,2,3,1,1,
|
||||||
3,2,0,3,3,0,3,3,3,3,0,0,3,0,3,3,3,3,3,3,2,3,2,0,2,0,3,3,0,0,0,2,2,
|
3,1,3,3,3,3,3,2,3,3,1,3,2,2,3,3,2,2,0,3,1,3,3,3,2,0,3,
|
||||||
3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,3,0,3,3,3,3,3,0,0,0,2,0,0,0,3,2,
|
3,3,1,0,0,3,1,3,0,2,0,2,3,3,2,0,0,2,3,0,0,0,1,0,1,0,0,2,
|
||||||
0,0,0,0,3,0,3,3,0,3,0,2,3,0,0,3,0,2,3,0,0,2,0,0,3,3,3,3,0,3,2,3,0,
|
2,3,2,2,3,1,3,3,1,3,0,3,3,0,2,2,0,1,3,2,2,3,1,1,1,2,3,
|
||||||
3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,2,3,3,3,3,3,3,3,2,3,3,0,0,2,3,3,0,
|
0,0,3,3,1,2,2,0,1,0,2,2,0,0,1,1,3,3,0,0,0,1,1,2,1,0,3,0,
|
||||||
3,0,0,0,3,0,0,3,3,3,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,
|
3,2,3,3,3,2,2,3,3,3,0,3,0,2,3,0,2,3,0,3,3,2,3,0,2,0,0,
|
||||||
3,0,0,0,3,0,3,3,0,3,0,2,3,0,2,2,0,0,3,2,2,3,2,0,3,2,3,0,0,0,3,3,0,
|
0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,2,
|
||||||
0,3,3,2,3,3,3,3,3,3,0,0,3,2,3,3,3,3,3,3,3,3,3,2,3,3,3,2,0,2,3,3,2,
|
3,1,3,2,3,2,3,1,3,2,0,3,1,2,3,2,2,2,0,3,3,3,2,2,2,3,0,
|
||||||
3,0,3,3,3,2,0,3,3,3,0,2,0,0,2,0,3,0,0,2,3,0,0,0,0,0,0,0,0,0,0,0,0,
|
2,1,3,1,3,3,0,2,0,0,0,1,0,1,3,0,3,0,0,2,2,0,3,0,2,0,3,1,
|
||||||
0,3,3,0,3,0,3,3,2,3,0,3,0,0,3,0,3,2,0,2,3,3,2,0,0,2,0,2,2,0,2,0,0,
|
2,1,0,2,3,0,3,3,2,3,0,0,3,0,2,3,2,2,3,2,2,3,2,0,0,1,0,
|
||||||
3,2,0,3,3,2,3,3,0,3,0,3,3,0,3,3,0,3,2,2,3,3,2,2,0,3,0,3,0,0,3,0,0,
|
0,2,3,3,3,2,2,1,0,0,0,2,0,3,3,0,1,2,2,0,0,3,2,2,1,2,1,1,
|
||||||
3,0,2,2,2,0,3,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,
|
3,2,3,2,3,2,3,3,3,2,0,3,3,2,3,3,2,3,0,3,2,2,3,0,2,0,0,
|
||||||
2,3,3,3,3,2,3,3,3,3,0,2,3,0,3,3,3,3,3,3,3,3,3,3,3,0,3,3,2,3,0,2,0,
|
0,0,0,3,0,0,1,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,2,
|
||||||
3,0,2,2,2,0,3,2,2,3,0,3,0,0,2,2,2,2,0,3,0,3,2,2,0,3,0,0,0,0,0,0,0,
|
0,0,0,0,3,0,3,2,0,3,0,1,3,0,0,3,0,1,3,0,0,1,0,3,0,3,0,
|
||||||
3,2,3,3,3,3,3,3,3,3,0,3,0,3,3,2,3,3,0,3,3,3,3,3,0,0,0,0,2,0,0,0,3,
|
2,3,3,3,3,3,3,3,2,0,1,3,3,1,3,3,3,3,3,2,2,0,1,2,2,3,3,0,
|
||||||
2,2,2,2,3,2,3,3,0,3,0,3,3,2,2,2,0,0,3,2,3,3,3,2,3,2,0,2,0,2,0,2,0,
|
3,2,3,2,3,2,3,3,2,3,0,3,2,2,3,2,1,2,3,3,3,3,3,0,2,1,2,
|
||||||
2,3,2,0,3,2,3,3,0,3,0,2,0,2,3,0,0,2,0,2,3,3,0,2,0,3,2,0,0,0,3,0,2,
|
3,1,2,2,3,2,0,2,0,0,2,2,1,0,3,3,2,3,0,1,2,2,2,3,3,1,2,0,
|
||||||
3,0,2,2,0,0,0,3,3,0,0,0,0,0,2,0,2,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,
|
3,0,0,0,3,0,0,2,3,3,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,
|
||||||
3,0,3,2,0,0,0,3,3,0,2,2,0,0,0,0,2,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
3,0,3,2,3,0,0,0,3,0,0,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,3,0,0,0,0,0,0,
|
2,0,0,0,3,0,3,3,0,2,0,1,3,0,1,1,0,0,2,1,1,3,1,1,0,2,1,
|
||||||
3,0,2,3,3,0,3,0,3,0,0,2,0,2,0,0,3,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
|
2,1,2,1,0,1,0,0,0,0,2,1,0,3,2,3,3,1,3,0,3,2,3,3,3,0,0,0,
|
||||||
0,0,0,0,0,0,0,3,0,0,0,3,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
0,2,2,1,3,2,3,3,2,3,0,0,3,2,3,2,2,2,3,2,2,3,2,1,1,2,1,
|
||||||
3,0,2,2,3,0,0,2,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
3,2,2,3,3,2,1,0,0,0,3,2,0,3,2,3,2,1,0,1,2,2,3,0,2,0,0,1,
|
||||||
3,0,3,3,0,0,3,2,2,0,0,0,0,0,2,0,0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
3,0,3,3,3,1,0,2,3,3,0,1,0,0,1,0,3,0,0,1,3,0,0,0,0,0,0,
|
||||||
3,0,2,2,2,0,0,0,3,0,0,2,0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
2,2,2,2,3,0,3,3,0,2,0,0,0,2,0,3,2,2,3,0,2,3,0,0,3,3,3,2,0,2,2,2,3,
|
0,3,2,0,3,0,3,2,1,3,0,3,0,0,2,0,2,1,0,2,2,3,1,0,0,0,0,
|
||||||
|
2,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
|
||||||
|
2,1,0,2,3,1,3,3,0,3,0,3,3,0,3,3,0,3,1,2,2,3,1,1,1,0,0,
|
||||||
|
2,1,0,2,3,3,2,3,0,0,0,1,0,2,2,3,2,0,1,0,2,1,2,3,0,2,3,0,
|
||||||
|
3,0,1,1,2,0,3,3,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,3,3,0,3,0,0,0,0,0,3,0,0,0,0,0,0,0,0,
|
||||||
|
1,3,3,3,3,1,3,3,2,3,0,1,2,0,2,3,2,2,2,3,2,3,2,0,2,2,0,
|
||||||
|
0,0,2,1,0,3,2,2,0,1,1,1,1,1,1,0,0,0,0,2,0,1,0,0,1,2,1,0,
|
||||||
|
2,0,1,2,1,0,2,2,1,2,0,2,0,0,1,1,2,1,0,2,0,2,1,3,1,0,0,
|
||||||
|
3,2,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,2,3,2,2,2,3,2,3,3,0,3,0,2,3,1,2,2,0,3,2,3,3,0,2,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,
|
||||||
|
1,1,1,2,3,1,3,3,0,3,0,3,3,1,2,1,0,0,3,2,2,3,2,0,1,3,1,
|
||||||
|
1,0,0,3,1,1,1,0,0,0,0,1,0,0,3,3,2,1,0,1,0,3,2,1,1,2,1,0,
|
||||||
|
3,0,3,2,0,0,0,3,3,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
1,3,1,0,3,1,3,2,0,2,0,2,0,1,2,0,0,1,0,2,2,2,0,3,1,0,0,
|
||||||
|
2,0,1,0,0,0,0,0,0,0,0,0,2,0,0,2,0,0,0,0,3,0,0,2,0,0,0,1,
|
||||||
|
3,0,1,1,0,0,0,3,3,0,0,0,0,0,1,0,1,0,0,0,3,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,0,0,0,0,0,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,0,2,1,0,0,0,3,3,0,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,
|
||||||
|
0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,0,3,3,0,0,0,3,3,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,0,2,2,3,0,0,0,3,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,
|
||||||
|
0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,0,3,3,3,0,0,0,2,3,0,0,0,0,0,0,3,0,0,0,2,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,0,3,3,3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,0,3,3,3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,0,1,2,3,0,3,0,2,0,0,1,0,1,0,0,2,0,0,0,0,0,0,0,1,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,0,0,0,3,0,0,0,2,3,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
2,0,0,0,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,3,0,0,0,2,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,0,2,3,3,0,0,2,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,0,0,0,0,0,0,3,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,0,2,3,3,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,0,3,0,0,0,0,3,3,0,0,0,0,0,0,0,3,0,0,0,3,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,0,1,3,0,0,3,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,0,0,0,1,0,0,3,2,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,0,1,3,3,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,0,0,0,0,0,0,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,0,1,1,3,0,0,2,2,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,0,2,3,0,0,2,1,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,0,1,3,1,0,0,0,1,0,0,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,0,3,1,0,0,0,2,2,0,0,0,0,0,0,0,3,0,0,0,2,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,0,0,0,0,0,1,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,0,1,1,1,0,0,0,3,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,0,2,3,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
3,0,0,0,2,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
1,1,2,1,2,0,3,3,0,1,0,0,0,2,0,3,1,2,2,0,1,3,0,2,0,2,0,
|
||||||
|
2,0,2,1,1,0,1,2,0,0,0,1,0,0,1,0,0,0,0,1,2,0,0,1,1,2,0,2,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@ -132,8 +230,18 @@ const SequenceModel Windows_1258VietnameseModel =
|
|||||||
{
|
{
|
||||||
Windows_1258_CharToOrderMap,
|
Windows_1258_CharToOrderMap,
|
||||||
VietnameseLangModel,
|
VietnameseLangModel,
|
||||||
33,
|
55,
|
||||||
(float)0.990048941203513,
|
(float)0.9321889118082535,
|
||||||
PR_FALSE,
|
PR_FALSE,
|
||||||
"WINDOWS-1258"
|
"WINDOWS-1258"
|
||||||
|
};
|
||||||
|
|
||||||
|
const SequenceModel VisciiVietnameseModel =
|
||||||
|
{
|
||||||
|
Viscii_CharToOrderMap,
|
||||||
|
VietnameseLangModel,
|
||||||
|
55,
|
||||||
|
(float)0.9321889118082535,
|
||||||
|
PR_FALSE,
|
||||||
|
"VISCII"
|
||||||
};
|
};
|
||||||
@ -104,7 +104,8 @@ nsSBCSGroupProber::nsSBCSGroupProber()
|
|||||||
mProbers[28] = new nsSingleByteCharSetProber(&Iso_8859_6ArabicModel);
|
mProbers[28] = new nsSingleByteCharSetProber(&Iso_8859_6ArabicModel);
|
||||||
mProbers[29] = new nsSingleByteCharSetProber(&Windows_1256ArabicModel);
|
mProbers[29] = new nsSingleByteCharSetProber(&Windows_1256ArabicModel);
|
||||||
|
|
||||||
mProbers[30] = new nsSingleByteCharSetProber(&Windows_1258VietnameseModel);
|
mProbers[30] = new nsSingleByteCharSetProber(&VisciiVietnameseModel);
|
||||||
|
mProbers[31] = new nsSingleByteCharSetProber(&Windows_1258VietnameseModel);
|
||||||
|
|
||||||
Reset();
|
Reset();
|
||||||
}
|
}
|
||||||
|
|||||||
@ -40,7 +40,7 @@
|
|||||||
#define nsSBCSGroupProber_h__
|
#define nsSBCSGroupProber_h__
|
||||||
|
|
||||||
|
|
||||||
#define NUM_OF_SBCS_PROBERS 31
|
#define NUM_OF_SBCS_PROBERS 32
|
||||||
|
|
||||||
class nsCharSetProber;
|
class nsCharSetProber;
|
||||||
class nsSBCSGroupProber: public nsCharSetProber {
|
class nsSBCSGroupProber: public nsCharSetProber {
|
||||||
|
|||||||
@ -162,6 +162,7 @@ extern const SequenceModel Iso_8859_3EsperantoModel;
|
|||||||
extern const SequenceModel Iso_8859_3TurkishModel;
|
extern const SequenceModel Iso_8859_3TurkishModel;
|
||||||
extern const SequenceModel Iso_8859_9TurkishModel;
|
extern const SequenceModel Iso_8859_9TurkishModel;
|
||||||
|
|
||||||
|
extern const SequenceModel VisciiVietnameseModel;
|
||||||
extern const SequenceModel Windows_1258VietnameseModel;
|
extern const SequenceModel Windows_1258VietnameseModel;
|
||||||
|
|
||||||
#endif /* nsSingleByteCharSetProber_h__ */
|
#endif /* nsSingleByteCharSetProber_h__ */
|
||||||
|
|||||||
4
test/vi/viscii.txt
Normal file
4
test/vi/viscii.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
Chæ Qu¯c ngæ là h® chæ viªt th¯ng nh¤t chính thÑc hi®n nay cüa tiªng Vi®t, sØ
|
||||||
|
døng ký tñ La Tinh, dña trên các bäng chæ cái cüa nhóm ngôn ngæ Rôman,[1] ð£c
|
||||||
|
bi®t là bäng chæ cái B° Ðào Nha,[2] v¾i các d¤u phø chü yªu t× bäng chæ cái Hy
|
||||||
|
LÕp.
|
||||||
Loading…
x
Reference in New Issue
Block a user