mirror of
https://gitlab.freedesktop.org/uchardet/uchardet.git
synced 2025-12-08 01:36:41 +08:00
src: new API to get the detected language.
This doesn't work for all probers yet, in particular not for the most generic probers (such as UTF-8) or WINDOWS-1252. These will return NULL. It's still a good first step. Right now, it returns the 2-character language code from ISO 639-1. A using project could easily get the English language name from the XML/json files provided by the iso-codes project. This project will also allow to easily localize the language name in other languages through gettext (this is what we do in GIMP for instance). I don't add any dependency though and leave it to downstream projects to implement this. I was also wondering if we want to support region information for cases when it would make sense. I especially wondered about it for Chinese encodings as some of them seem quite specific to a region (according to Wikipedia at least). For the time being though, these just return "zh". We'll see later if it makes sense to be more accurate (maybe depending on reports?).
This commit is contained in:
parent
94736d1565
commit
4e967c9e88
@ -251,7 +251,8 @@ const SequenceModel Iso_8859_6ArabicModel =
|
||||
64,
|
||||
(float)0.9696025116913417,
|
||||
PR_FALSE,
|
||||
"ISO-8859-6"
|
||||
"ISO-8859-6",
|
||||
"ar"
|
||||
};
|
||||
|
||||
const SequenceModel Windows_1256ArabicModel =
|
||||
@ -261,5 +262,6 @@ const SequenceModel Windows_1256ArabicModel =
|
||||
64,
|
||||
(float)0.9696025116913417,
|
||||
PR_FALSE,
|
||||
"WINDOWS-1256"
|
||||
"WINDOWS-1256",
|
||||
"ar"
|
||||
};
|
||||
|
||||
@ -233,7 +233,8 @@ const SequenceModel Latin5BulgarianModel =
|
||||
64,
|
||||
(float)0.969392,
|
||||
PR_FALSE,
|
||||
"ISO-8859-5"
|
||||
"ISO-8859-5",
|
||||
"bg"
|
||||
};
|
||||
|
||||
const SequenceModel Win1251BulgarianModel =
|
||||
@ -243,5 +244,6 @@ const SequenceModel Win1251BulgarianModel =
|
||||
64,
|
||||
(float)0.969392,
|
||||
PR_FALSE,
|
||||
"WINDOWS-1251"
|
||||
"WINDOWS-1251",
|
||||
"bg"
|
||||
};
|
||||
|
||||
@ -238,7 +238,8 @@ const SequenceModel Windows_1250CroatianModel =
|
||||
31,
|
||||
(float)0.9989731099787131,
|
||||
PR_TRUE,
|
||||
"WINDOWS-1250"
|
||||
"WINDOWS-1250",
|
||||
"hr"
|
||||
};
|
||||
|
||||
const SequenceModel Iso_8859_2CroatianModel =
|
||||
@ -248,7 +249,8 @@ const SequenceModel Iso_8859_2CroatianModel =
|
||||
31,
|
||||
(float)0.9989731099787131,
|
||||
PR_TRUE,
|
||||
"ISO-8859-2"
|
||||
"ISO-8859-2",
|
||||
"hr"
|
||||
};
|
||||
|
||||
const SequenceModel Iso_8859_16CroatianModel =
|
||||
@ -258,7 +260,8 @@ const SequenceModel Iso_8859_16CroatianModel =
|
||||
31,
|
||||
(float)0.9989731099787131,
|
||||
PR_TRUE,
|
||||
"ISO-8859-16"
|
||||
"ISO-8859-16",
|
||||
"hr"
|
||||
};
|
||||
|
||||
const SequenceModel Mac_CentraleuropeCroatianModel =
|
||||
@ -268,7 +271,8 @@ const SequenceModel Mac_CentraleuropeCroatianModel =
|
||||
31,
|
||||
(float)0.9989731099787131,
|
||||
PR_TRUE,
|
||||
"MAC-CENTRALEUROPE"
|
||||
"MAC-CENTRALEUROPE",
|
||||
"hr"
|
||||
};
|
||||
|
||||
const SequenceModel Iso_8859_13CroatianModel =
|
||||
@ -278,7 +282,8 @@ const SequenceModel Iso_8859_13CroatianModel =
|
||||
31,
|
||||
(float)0.9989731099787131,
|
||||
PR_TRUE,
|
||||
"ISO-8859-13"
|
||||
"ISO-8859-13",
|
||||
"hr"
|
||||
};
|
||||
|
||||
const SequenceModel Ibm852CroatianModel =
|
||||
@ -288,5 +293,6 @@ const SequenceModel Ibm852CroatianModel =
|
||||
31,
|
||||
(float)0.9989731099787131,
|
||||
PR_TRUE,
|
||||
"IBM852"
|
||||
"IBM852",
|
||||
"hr"
|
||||
};
|
||||
|
||||
@ -247,7 +247,8 @@ const SequenceModel Windows_1250CzechModel =
|
||||
41,
|
||||
(float)0.9786035192432675,
|
||||
PR_TRUE,
|
||||
"WINDOWS-1250"
|
||||
"WINDOWS-1250",
|
||||
"cs"
|
||||
};
|
||||
|
||||
const SequenceModel Mac_CentraleuropeCzechModel =
|
||||
@ -257,7 +258,8 @@ const SequenceModel Mac_CentraleuropeCzechModel =
|
||||
41,
|
||||
(float)0.9786035192432675,
|
||||
PR_TRUE,
|
||||
"MAC-CENTRALEUROPE"
|
||||
"MAC-CENTRALEUROPE",
|
||||
"cs"
|
||||
};
|
||||
|
||||
const SequenceModel Ibm852CzechModel =
|
||||
@ -267,7 +269,8 @@ const SequenceModel Ibm852CzechModel =
|
||||
41,
|
||||
(float)0.9786035192432675,
|
||||
PR_TRUE,
|
||||
"IBM852"
|
||||
"IBM852",
|
||||
"cs"
|
||||
};
|
||||
|
||||
const SequenceModel Iso_8859_2CzechModel =
|
||||
@ -277,5 +280,6 @@ const SequenceModel Iso_8859_2CzechModel =
|
||||
41,
|
||||
(float)0.9786035192432675,
|
||||
PR_TRUE,
|
||||
"ISO-8859-2"
|
||||
"ISO-8859-2",
|
||||
"cs"
|
||||
};
|
||||
|
||||
@ -174,7 +174,8 @@ const SequenceModel Iso_8859_15DanishModel =
|
||||
30,
|
||||
(float)0.9968082796759031,
|
||||
PR_TRUE,
|
||||
"ISO-8859-15"
|
||||
"ISO-8859-15",
|
||||
"da"
|
||||
};
|
||||
|
||||
const SequenceModel Iso_8859_1DanishModel =
|
||||
@ -184,7 +185,8 @@ const SequenceModel Iso_8859_1DanishModel =
|
||||
30,
|
||||
(float)0.9968082796759031,
|
||||
PR_TRUE,
|
||||
"ISO-8859-1"
|
||||
"ISO-8859-1",
|
||||
"da"
|
||||
};
|
||||
|
||||
const SequenceModel Windows_1252DanishModel =
|
||||
@ -194,5 +196,6 @@ const SequenceModel Windows_1252DanishModel =
|
||||
30,
|
||||
(float)0.9968082796759031,
|
||||
PR_TRUE,
|
||||
"WINDOWS-1252"
|
||||
"WINDOWS-1252",
|
||||
"da"
|
||||
};
|
||||
|
||||
@ -137,5 +137,6 @@ const SequenceModel Iso_8859_3EsperantoModel =
|
||||
35,
|
||||
(float)0.9942980632768038,
|
||||
PR_FALSE,
|
||||
"ISO-8859-3"
|
||||
"ISO-8859-3",
|
||||
"eo"
|
||||
};
|
||||
|
||||
@ -219,7 +219,8 @@ const SequenceModel Iso_8859_4EstonianModel =
|
||||
33,
|
||||
(float)0.9972721312183132,
|
||||
PR_TRUE,
|
||||
"ISO-8859-4"
|
||||
"ISO-8859-4",
|
||||
"et"
|
||||
};
|
||||
|
||||
const SequenceModel Windows_1252EstonianModel =
|
||||
@ -229,7 +230,8 @@ const SequenceModel Windows_1252EstonianModel =
|
||||
33,
|
||||
(float)0.9972721312183132,
|
||||
PR_TRUE,
|
||||
"WINDOWS-1252"
|
||||
"WINDOWS-1252",
|
||||
"et"
|
||||
};
|
||||
|
||||
const SequenceModel Iso_8859_15EstonianModel =
|
||||
@ -239,7 +241,8 @@ const SequenceModel Iso_8859_15EstonianModel =
|
||||
33,
|
||||
(float)0.9972721312183132,
|
||||
PR_TRUE,
|
||||
"ISO-8859-15"
|
||||
"ISO-8859-15",
|
||||
"et"
|
||||
};
|
||||
|
||||
const SequenceModel Iso_8859_13EstonianModel =
|
||||
@ -249,7 +252,8 @@ const SequenceModel Iso_8859_13EstonianModel =
|
||||
33,
|
||||
(float)0.9972721312183132,
|
||||
PR_TRUE,
|
||||
"ISO-8859-13"
|
||||
"ISO-8859-13",
|
||||
"et"
|
||||
};
|
||||
|
||||
const SequenceModel Windows_1257EstonianModel =
|
||||
@ -259,5 +263,6 @@ const SequenceModel Windows_1257EstonianModel =
|
||||
33,
|
||||
(float)0.9972721312183132,
|
||||
PR_TRUE,
|
||||
"WINDOWS-1257"
|
||||
"WINDOWS-1257",
|
||||
"et"
|
||||
};
|
||||
|
||||
@ -237,7 +237,8 @@ const SequenceModel Iso_8859_15FinnishModel =
|
||||
30,
|
||||
(float)0.9985378147555799,
|
||||
PR_TRUE,
|
||||
"ISO-8859-15"
|
||||
"ISO-8859-15",
|
||||
"fi"
|
||||
};
|
||||
|
||||
const SequenceModel Windows_1252FinnishModel =
|
||||
@ -247,7 +248,8 @@ const SequenceModel Windows_1252FinnishModel =
|
||||
30,
|
||||
(float)0.9985378147555799,
|
||||
PR_TRUE,
|
||||
"WINDOWS-1252"
|
||||
"WINDOWS-1252",
|
||||
"fi"
|
||||
};
|
||||
|
||||
const SequenceModel Iso_8859_4FinnishModel =
|
||||
@ -257,7 +259,8 @@ const SequenceModel Iso_8859_4FinnishModel =
|
||||
30,
|
||||
(float)0.9985378147555799,
|
||||
PR_TRUE,
|
||||
"ISO-8859-4"
|
||||
"ISO-8859-4",
|
||||
"fi"
|
||||
};
|
||||
|
||||
const SequenceModel Iso_8859_13FinnishModel =
|
||||
@ -267,7 +270,8 @@ const SequenceModel Iso_8859_13FinnishModel =
|
||||
30,
|
||||
(float)0.9985378147555799,
|
||||
PR_TRUE,
|
||||
"ISO-8859-13"
|
||||
"ISO-8859-13",
|
||||
"fi"
|
||||
};
|
||||
|
||||
const SequenceModel Iso_8859_9FinnishModel =
|
||||
@ -277,7 +281,8 @@ const SequenceModel Iso_8859_9FinnishModel =
|
||||
30,
|
||||
(float)0.9985378147555799,
|
||||
PR_TRUE,
|
||||
"ISO-8859-9"
|
||||
"ISO-8859-9",
|
||||
"fi"
|
||||
};
|
||||
|
||||
const SequenceModel Iso_8859_1FinnishModel =
|
||||
@ -287,5 +292,6 @@ const SequenceModel Iso_8859_1FinnishModel =
|
||||
30,
|
||||
(float)0.9985378147555799,
|
||||
PR_TRUE,
|
||||
"ISO-8859-1"
|
||||
"ISO-8859-1",
|
||||
"fi"
|
||||
};
|
||||
|
||||
@ -182,7 +182,8 @@ const SequenceModel Windows_1252FrenchModel =
|
||||
38,
|
||||
(float)0.997057879992383,
|
||||
PR_TRUE,
|
||||
"WINDOWS-1252"
|
||||
"WINDOWS-1252",
|
||||
"fr"
|
||||
};
|
||||
|
||||
const SequenceModel Iso_8859_1FrenchModel =
|
||||
@ -192,7 +193,8 @@ const SequenceModel Iso_8859_1FrenchModel =
|
||||
38,
|
||||
(float)0.997057879992383,
|
||||
PR_TRUE,
|
||||
"ISO-8859-1"
|
||||
"ISO-8859-1",
|
||||
"fr"
|
||||
};
|
||||
|
||||
const SequenceModel Iso_8859_15FrenchModel =
|
||||
@ -202,5 +204,6 @@ const SequenceModel Iso_8859_15FrenchModel =
|
||||
38,
|
||||
(float)0.997057879992383,
|
||||
PR_TRUE,
|
||||
"ISO-8859-15"
|
||||
"ISO-8859-15",
|
||||
"fr"
|
||||
};
|
||||
|
||||
@ -154,7 +154,8 @@ const SequenceModel Windows_1252GermanModel =
|
||||
31,
|
||||
(float)0.9934041448127945,
|
||||
PR_TRUE,
|
||||
"WINDOWS-1252"
|
||||
"WINDOWS-1252",
|
||||
"de"
|
||||
};
|
||||
|
||||
const SequenceModel Iso_8859_1GermanModel =
|
||||
@ -164,5 +165,6 @@ const SequenceModel Iso_8859_1GermanModel =
|
||||
31,
|
||||
(float)0.9934041448127945,
|
||||
PR_TRUE,
|
||||
"ISO-8859-1"
|
||||
"ISO-8859-1",
|
||||
"de"
|
||||
};
|
||||
|
||||
@ -215,7 +215,8 @@ const SequenceModel Windows_1253GreekModel =
|
||||
46,
|
||||
(float)0.958419074626211,
|
||||
PR_FALSE,
|
||||
"WINDOWS-1253"
|
||||
"WINDOWS-1253",
|
||||
"el"
|
||||
};
|
||||
|
||||
const SequenceModel Iso_8859_7GreekModel =
|
||||
@ -225,5 +226,6 @@ const SequenceModel Iso_8859_7GreekModel =
|
||||
46,
|
||||
(float)0.958419074626211,
|
||||
PR_FALSE,
|
||||
"ISO-8859-7"
|
||||
"ISO-8859-7",
|
||||
"el"
|
||||
};
|
||||
|
||||
@ -215,6 +215,6 @@ const SequenceModel Win1255Model =
|
||||
64,
|
||||
(float)0.984004,
|
||||
PR_FALSE,
|
||||
"WINDOWS-1255"
|
||||
"WINDOWS-1255",
|
||||
"he"
|
||||
};
|
||||
|
||||
|
||||
@ -155,7 +155,8 @@ const SequenceModel Iso_8859_2HungarianModel =
|
||||
32,
|
||||
(float)0.9748272224933486,
|
||||
PR_FALSE,
|
||||
"ISO-8859-2"
|
||||
"ISO-8859-2",
|
||||
"hu"
|
||||
};
|
||||
|
||||
const SequenceModel Windows_1250HungarianModel =
|
||||
@ -165,5 +166,6 @@ const SequenceModel Windows_1250HungarianModel =
|
||||
32,
|
||||
(float)0.9748272224933486,
|
||||
PR_FALSE,
|
||||
"WINDOWS-1250"
|
||||
"WINDOWS-1250",
|
||||
"hu"
|
||||
};
|
||||
|
||||
@ -196,7 +196,8 @@ const SequenceModel Iso_8859_1IrishModel =
|
||||
31,
|
||||
(float)0.9974076651249096,
|
||||
PR_TRUE,
|
||||
"ISO-8859-1"
|
||||
"ISO-8859-1",
|
||||
"ga"
|
||||
};
|
||||
|
||||
const SequenceModel Windows_1252IrishModel =
|
||||
@ -206,7 +207,8 @@ const SequenceModel Windows_1252IrishModel =
|
||||
31,
|
||||
(float)0.9974076651249096,
|
||||
PR_TRUE,
|
||||
"WINDOWS-1252"
|
||||
"WINDOWS-1252",
|
||||
"ga"
|
||||
};
|
||||
|
||||
const SequenceModel Iso_8859_15IrishModel =
|
||||
@ -216,7 +218,8 @@ const SequenceModel Iso_8859_15IrishModel =
|
||||
31,
|
||||
(float)0.9974076651249096,
|
||||
PR_TRUE,
|
||||
"ISO-8859-15"
|
||||
"ISO-8859-15",
|
||||
"ga"
|
||||
};
|
||||
|
||||
const SequenceModel Iso_8859_9IrishModel =
|
||||
@ -226,5 +229,6 @@ const SequenceModel Iso_8859_9IrishModel =
|
||||
31,
|
||||
(float)0.9974076651249096,
|
||||
PR_TRUE,
|
||||
"ISO-8859-9"
|
||||
"ISO-8859-9",
|
||||
"ga"
|
||||
};
|
||||
|
||||
@ -220,7 +220,8 @@ const SequenceModel Iso_8859_3ItalianModel =
|
||||
34,
|
||||
(float)0.9989484485502651,
|
||||
PR_TRUE,
|
||||
"ISO-8859-3"
|
||||
"ISO-8859-3",
|
||||
"it"
|
||||
};
|
||||
|
||||
const SequenceModel Iso_8859_15ItalianModel =
|
||||
@ -230,7 +231,8 @@ const SequenceModel Iso_8859_15ItalianModel =
|
||||
34,
|
||||
(float)0.9989484485502651,
|
||||
PR_TRUE,
|
||||
"ISO-8859-15"
|
||||
"ISO-8859-15",
|
||||
"it"
|
||||
};
|
||||
|
||||
const SequenceModel Iso_8859_9ItalianModel =
|
||||
@ -240,7 +242,8 @@ const SequenceModel Iso_8859_9ItalianModel =
|
||||
34,
|
||||
(float)0.9989484485502651,
|
||||
PR_TRUE,
|
||||
"ISO-8859-9"
|
||||
"ISO-8859-9",
|
||||
"it"
|
||||
};
|
||||
|
||||
const SequenceModel Iso_8859_1ItalianModel =
|
||||
@ -250,7 +253,8 @@ const SequenceModel Iso_8859_1ItalianModel =
|
||||
34,
|
||||
(float)0.9989484485502651,
|
||||
PR_TRUE,
|
||||
"ISO-8859-1"
|
||||
"ISO-8859-1",
|
||||
"it"
|
||||
};
|
||||
|
||||
const SequenceModel Windows_1252ItalianModel =
|
||||
@ -260,5 +264,6 @@ const SequenceModel Windows_1252ItalianModel =
|
||||
34,
|
||||
(float)0.9989484485502651,
|
||||
PR_TRUE,
|
||||
"WINDOWS-1252"
|
||||
"WINDOWS-1252",
|
||||
"it"
|
||||
};
|
||||
|
||||
@ -183,7 +183,8 @@ const SequenceModel Iso_8859_4LatvianModel =
|
||||
39,
|
||||
(float)0.9904102202220861,
|
||||
PR_TRUE,
|
||||
"ISO-8859-4"
|
||||
"ISO-8859-4",
|
||||
"lv"
|
||||
};
|
||||
|
||||
const SequenceModel Iso_8859_10LatvianModel =
|
||||
@ -193,7 +194,8 @@ const SequenceModel Iso_8859_10LatvianModel =
|
||||
39,
|
||||
(float)0.9904102202220861,
|
||||
PR_TRUE,
|
||||
"ISO-8859-10"
|
||||
"ISO-8859-10",
|
||||
"lv"
|
||||
};
|
||||
|
||||
const SequenceModel Iso_8859_13LatvianModel =
|
||||
@ -203,5 +205,6 @@ const SequenceModel Iso_8859_13LatvianModel =
|
||||
39,
|
||||
(float)0.9904102202220861,
|
||||
PR_TRUE,
|
||||
"ISO-8859-13"
|
||||
"ISO-8859-13",
|
||||
"lv"
|
||||
};
|
||||
|
||||
@ -182,7 +182,8 @@ const SequenceModel Iso_8859_10LithuanianModel =
|
||||
38,
|
||||
(float)0.9928710196247589,
|
||||
PR_TRUE,
|
||||
"ISO-8859-10"
|
||||
"ISO-8859-10",
|
||||
"lt"
|
||||
};
|
||||
|
||||
const SequenceModel Iso_8859_4LithuanianModel =
|
||||
@ -192,7 +193,8 @@ const SequenceModel Iso_8859_4LithuanianModel =
|
||||
38,
|
||||
(float)0.9928710196247589,
|
||||
PR_TRUE,
|
||||
"ISO-8859-4"
|
||||
"ISO-8859-4",
|
||||
"lt"
|
||||
};
|
||||
|
||||
const SequenceModel Iso_8859_13LithuanianModel =
|
||||
@ -202,5 +204,6 @@ const SequenceModel Iso_8859_13LithuanianModel =
|
||||
38,
|
||||
(float)0.9928710196247589,
|
||||
PR_TRUE,
|
||||
"ISO-8859-13"
|
||||
"ISO-8859-13",
|
||||
"lt"
|
||||
};
|
||||
|
||||
@ -133,5 +133,6 @@ const SequenceModel Iso_8859_3MalteseModel =
|
||||
31,
|
||||
(float)0.9959115850692665,
|
||||
PR_TRUE,
|
||||
"ISO-8859-3"
|
||||
"ISO-8859-3",
|
||||
"mt"
|
||||
};
|
||||
|
||||
@ -244,7 +244,8 @@ const SequenceModel Ibm852PolishModel =
|
||||
37,
|
||||
(float)0.9894531815946438,
|
||||
PR_TRUE,
|
||||
"IBM852"
|
||||
"IBM852",
|
||||
"pl"
|
||||
};
|
||||
|
||||
const SequenceModel Iso_8859_16PolishModel =
|
||||
@ -254,7 +255,8 @@ const SequenceModel Iso_8859_16PolishModel =
|
||||
37,
|
||||
(float)0.9894531815946438,
|
||||
PR_TRUE,
|
||||
"ISO-8859-16"
|
||||
"ISO-8859-16",
|
||||
"pl"
|
||||
};
|
||||
|
||||
const SequenceModel Iso_8859_2PolishModel =
|
||||
@ -264,7 +266,8 @@ const SequenceModel Iso_8859_2PolishModel =
|
||||
37,
|
||||
(float)0.9894531815946438,
|
||||
PR_TRUE,
|
||||
"ISO-8859-2"
|
||||
"ISO-8859-2",
|
||||
"pl"
|
||||
};
|
||||
|
||||
const SequenceModel Mac_CentraleuropePolishModel =
|
||||
@ -274,7 +277,8 @@ const SequenceModel Mac_CentraleuropePolishModel =
|
||||
37,
|
||||
(float)0.9894531815946438,
|
||||
PR_TRUE,
|
||||
"MAC-CENTRALEUROPE"
|
||||
"MAC-CENTRALEUROPE",
|
||||
"pl"
|
||||
};
|
||||
|
||||
const SequenceModel Iso_8859_13PolishModel =
|
||||
@ -284,7 +288,8 @@ const SequenceModel Iso_8859_13PolishModel =
|
||||
37,
|
||||
(float)0.9894531815946438,
|
||||
PR_TRUE,
|
||||
"ISO-8859-13"
|
||||
"ISO-8859-13",
|
||||
"pl"
|
||||
};
|
||||
|
||||
const SequenceModel Windows_1250PolishModel =
|
||||
@ -294,5 +299,6 @@ const SequenceModel Windows_1250PolishModel =
|
||||
37,
|
||||
(float)0.9894531815946438,
|
||||
PR_TRUE,
|
||||
"WINDOWS-1250"
|
||||
"WINDOWS-1250",
|
||||
"pl"
|
||||
};
|
||||
|
||||
@ -203,7 +203,8 @@ const SequenceModel Iso_8859_1PortugueseModel =
|
||||
38,
|
||||
(float)0.9953179582313172,
|
||||
PR_TRUE,
|
||||
"ISO-8859-1"
|
||||
"ISO-8859-1",
|
||||
"pt"
|
||||
};
|
||||
|
||||
const SequenceModel Iso_8859_9PortugueseModel =
|
||||
@ -213,7 +214,8 @@ const SequenceModel Iso_8859_9PortugueseModel =
|
||||
38,
|
||||
(float)0.9953179582313172,
|
||||
PR_TRUE,
|
||||
"ISO-8859-9"
|
||||
"ISO-8859-9",
|
||||
"pt"
|
||||
};
|
||||
|
||||
const SequenceModel Iso_8859_15PortugueseModel =
|
||||
@ -223,7 +225,8 @@ const SequenceModel Iso_8859_15PortugueseModel =
|
||||
38,
|
||||
(float)0.9953179582313172,
|
||||
PR_TRUE,
|
||||
"ISO-8859-15"
|
||||
"ISO-8859-15",
|
||||
"pt"
|
||||
};
|
||||
|
||||
const SequenceModel Windows_1252PortugueseModel =
|
||||
@ -233,5 +236,6 @@ const SequenceModel Windows_1252PortugueseModel =
|
||||
38,
|
||||
(float)0.9953179582313172,
|
||||
PR_TRUE,
|
||||
"WINDOWS-1252"
|
||||
"WINDOWS-1252",
|
||||
"pt"
|
||||
};
|
||||
|
||||
@ -198,7 +198,8 @@ const SequenceModel Iso_8859_16RomanianModel =
|
||||
33,
|
||||
(float)0.997762564143313,
|
||||
PR_TRUE,
|
||||
"ISO-8859-16"
|
||||
"ISO-8859-16",
|
||||
"ro"
|
||||
};
|
||||
|
||||
const SequenceModel Iso_8859_2RomanianModel =
|
||||
@ -208,7 +209,8 @@ const SequenceModel Iso_8859_2RomanianModel =
|
||||
33,
|
||||
(float)0.997762564143313,
|
||||
PR_TRUE,
|
||||
"ISO-8859-2"
|
||||
"ISO-8859-2",
|
||||
"ro"
|
||||
};
|
||||
|
||||
const SequenceModel Windows_1250RomanianModel =
|
||||
@ -218,7 +220,8 @@ const SequenceModel Windows_1250RomanianModel =
|
||||
33,
|
||||
(float)0.997762564143313,
|
||||
PR_TRUE,
|
||||
"WINDOWS-1250"
|
||||
"WINDOWS-1250",
|
||||
"ro"
|
||||
};
|
||||
|
||||
const SequenceModel Ibm852RomanianModel =
|
||||
@ -228,5 +231,6 @@ const SequenceModel Ibm852RomanianModel =
|
||||
33,
|
||||
(float)0.997762564143313,
|
||||
PR_TRUE,
|
||||
"IBM852"
|
||||
"IBM852",
|
||||
"ro"
|
||||
};
|
||||
|
||||
@ -307,7 +307,8 @@ const SequenceModel Koi8rRussianModel =
|
||||
64,
|
||||
(float)0.976601,
|
||||
PR_FALSE,
|
||||
"KOI8-R"
|
||||
"KOI8-R",
|
||||
"ru"
|
||||
};
|
||||
|
||||
const SequenceModel Win1251RussianModel =
|
||||
@ -317,7 +318,8 @@ const SequenceModel Win1251RussianModel =
|
||||
64,
|
||||
(float)0.976601,
|
||||
PR_FALSE,
|
||||
"WINDOWS-1251"
|
||||
"WINDOWS-1251",
|
||||
"ru"
|
||||
};
|
||||
|
||||
const SequenceModel Latin5RussianModel =
|
||||
@ -327,7 +329,8 @@ const SequenceModel Latin5RussianModel =
|
||||
64,
|
||||
(float)0.976601,
|
||||
PR_FALSE,
|
||||
"ISO-8859-5"
|
||||
"ISO-8859-5",
|
||||
"ru"
|
||||
};
|
||||
|
||||
const SequenceModel MacCyrillicRussianModel =
|
||||
@ -337,7 +340,8 @@ const SequenceModel MacCyrillicRussianModel =
|
||||
64,
|
||||
(float)0.976601,
|
||||
PR_FALSE,
|
||||
"MAC-CYRILLIC"
|
||||
"MAC-CYRILLIC",
|
||||
"ru"
|
||||
};
|
||||
|
||||
const SequenceModel Ibm866RussianModel =
|
||||
@ -347,7 +351,8 @@ const SequenceModel Ibm866RussianModel =
|
||||
64,
|
||||
(float)0.976601,
|
||||
PR_FALSE,
|
||||
"IBM866"
|
||||
"IBM866",
|
||||
"ru"
|
||||
};
|
||||
|
||||
const SequenceModel Ibm855RussianModel =
|
||||
@ -357,5 +362,6 @@ const SequenceModel Ibm855RussianModel =
|
||||
64,
|
||||
(float)0.976601,
|
||||
PR_FALSE,
|
||||
"IBM855"
|
||||
"IBM855",
|
||||
"ru"
|
||||
};
|
||||
|
||||
@ -255,7 +255,8 @@ const SequenceModel Ibm852SlovakModel =
|
||||
45,
|
||||
(float)0.9733303573968434,
|
||||
PR_TRUE,
|
||||
"IBM852"
|
||||
"IBM852",
|
||||
"sk"
|
||||
};
|
||||
|
||||
const SequenceModel Iso_8859_2SlovakModel =
|
||||
@ -265,7 +266,8 @@ const SequenceModel Iso_8859_2SlovakModel =
|
||||
45,
|
||||
(float)0.9733303573968434,
|
||||
PR_TRUE,
|
||||
"ISO-8859-2"
|
||||
"ISO-8859-2",
|
||||
"sk"
|
||||
};
|
||||
|
||||
const SequenceModel Mac_CentraleuropeSlovakModel =
|
||||
@ -275,7 +277,8 @@ const SequenceModel Mac_CentraleuropeSlovakModel =
|
||||
45,
|
||||
(float)0.9733303573968434,
|
||||
PR_TRUE,
|
||||
"MAC-CENTRALEUROPE"
|
||||
"MAC-CENTRALEUROPE",
|
||||
"sk"
|
||||
};
|
||||
|
||||
const SequenceModel Windows_1250SlovakModel =
|
||||
@ -285,5 +288,6 @@ const SequenceModel Windows_1250SlovakModel =
|
||||
45,
|
||||
(float)0.9733303573968434,
|
||||
PR_TRUE,
|
||||
"WINDOWS-1250"
|
||||
"WINDOWS-1250",
|
||||
"sk"
|
||||
};
|
||||
|
||||
@ -215,7 +215,8 @@ const SequenceModel Iso_8859_2SloveneModel =
|
||||
29,
|
||||
(float)0.9983524317161332,
|
||||
PR_TRUE,
|
||||
"ISO-8859-2"
|
||||
"ISO-8859-2",
|
||||
"sl"
|
||||
};
|
||||
|
||||
const SequenceModel Iso_8859_16SloveneModel =
|
||||
@ -225,7 +226,8 @@ const SequenceModel Iso_8859_16SloveneModel =
|
||||
29,
|
||||
(float)0.9983524317161332,
|
||||
PR_TRUE,
|
||||
"ISO-8859-16"
|
||||
"ISO-8859-16",
|
||||
"sl"
|
||||
};
|
||||
|
||||
const SequenceModel Windows_1250SloveneModel =
|
||||
@ -235,7 +237,8 @@ const SequenceModel Windows_1250SloveneModel =
|
||||
29,
|
||||
(float)0.9983524317161332,
|
||||
PR_TRUE,
|
||||
"WINDOWS-1250"
|
||||
"WINDOWS-1250",
|
||||
"sl"
|
||||
};
|
||||
|
||||
const SequenceModel Mac_CentraleuropeSloveneModel =
|
||||
@ -245,7 +248,8 @@ const SequenceModel Mac_CentraleuropeSloveneModel =
|
||||
29,
|
||||
(float)0.9983524317161332,
|
||||
PR_TRUE,
|
||||
"MAC-CENTRALEUROPE"
|
||||
"MAC-CENTRALEUROPE",
|
||||
"sl"
|
||||
};
|
||||
|
||||
const SequenceModel Ibm852SloveneModel =
|
||||
@ -255,5 +259,6 @@ const SequenceModel Ibm852SloveneModel =
|
||||
29,
|
||||
(float)0.9983524317161332,
|
||||
PR_TRUE,
|
||||
"IBM852"
|
||||
"IBM852",
|
||||
"sl"
|
||||
};
|
||||
|
||||
@ -177,7 +177,8 @@ const SequenceModel Iso_8859_1SpanishModel =
|
||||
33,
|
||||
(float)0.9970385677528184,
|
||||
PR_TRUE,
|
||||
"ISO-8859-1"
|
||||
"ISO-8859-1",
|
||||
"es"
|
||||
};
|
||||
|
||||
const SequenceModel Iso_8859_15SpanishModel =
|
||||
@ -187,7 +188,8 @@ const SequenceModel Iso_8859_15SpanishModel =
|
||||
33,
|
||||
(float)0.9970385677528184,
|
||||
PR_TRUE,
|
||||
"ISO-8859-15"
|
||||
"ISO-8859-15",
|
||||
"es"
|
||||
};
|
||||
|
||||
const SequenceModel Windows_1252SpanishModel =
|
||||
@ -197,5 +199,6 @@ const SequenceModel Windows_1252SpanishModel =
|
||||
33,
|
||||
(float)0.9970385677528184,
|
||||
PR_TRUE,
|
||||
"WINDOWS-1252"
|
||||
"WINDOWS-1252",
|
||||
"es"
|
||||
};
|
||||
|
||||
@ -217,7 +217,8 @@ const SequenceModel Windows_1252SwedishModel =
|
||||
31,
|
||||
(float)0.997323508584682,
|
||||
PR_TRUE,
|
||||
"WINDOWS-1252"
|
||||
"WINDOWS-1252",
|
||||
"sv"
|
||||
};
|
||||
|
||||
const SequenceModel Iso_8859_9SwedishModel =
|
||||
@ -227,7 +228,8 @@ const SequenceModel Iso_8859_9SwedishModel =
|
||||
31,
|
||||
(float)0.997323508584682,
|
||||
PR_TRUE,
|
||||
"ISO-8859-9"
|
||||
"ISO-8859-9",
|
||||
"sv"
|
||||
};
|
||||
|
||||
const SequenceModel Iso_8859_1SwedishModel =
|
||||
@ -237,7 +239,8 @@ const SequenceModel Iso_8859_1SwedishModel =
|
||||
31,
|
||||
(float)0.997323508584682,
|
||||
PR_TRUE,
|
||||
"ISO-8859-1"
|
||||
"ISO-8859-1",
|
||||
"sv"
|
||||
};
|
||||
|
||||
const SequenceModel Iso_8859_4SwedishModel =
|
||||
@ -247,7 +250,8 @@ const SequenceModel Iso_8859_4SwedishModel =
|
||||
31,
|
||||
(float)0.997323508584682,
|
||||
PR_TRUE,
|
||||
"ISO-8859-4"
|
||||
"ISO-8859-4",
|
||||
"sv"
|
||||
};
|
||||
|
||||
const SequenceModel Iso_8859_15SwedishModel =
|
||||
@ -257,5 +261,6 @@ const SequenceModel Iso_8859_15SwedishModel =
|
||||
31,
|
||||
(float)0.997323508584682,
|
||||
PR_TRUE,
|
||||
"ISO-8859-15"
|
||||
"ISO-8859-15",
|
||||
"sv"
|
||||
};
|
||||
|
||||
@ -251,7 +251,8 @@ const SequenceModel Tis_620ThaiModel =
|
||||
64,
|
||||
(float)0.8815720594354438,
|
||||
PR_FALSE,
|
||||
"TIS-620"
|
||||
"TIS-620",
|
||||
"th"
|
||||
};
|
||||
|
||||
const SequenceModel Iso_8859_11ThaiModel =
|
||||
@ -261,5 +262,6 @@ const SequenceModel Iso_8859_11ThaiModel =
|
||||
64,
|
||||
(float)0.8815720594354438,
|
||||
PR_FALSE,
|
||||
"ISO-8859-11"
|
||||
"ISO-8859-11",
|
||||
"th"
|
||||
};
|
||||
|
||||
@ -159,7 +159,8 @@ const SequenceModel Iso_8859_3TurkishModel =
|
||||
36,
|
||||
(float)0.991865243864388,
|
||||
PR_FALSE,
|
||||
"ISO-8859-3"
|
||||
"ISO-8859-3",
|
||||
"tr"
|
||||
};
|
||||
|
||||
const SequenceModel Iso_8859_9TurkishModel =
|
||||
@ -169,5 +170,6 @@ const SequenceModel Iso_8859_9TurkishModel =
|
||||
36,
|
||||
(float)0.991865243864388,
|
||||
PR_FALSE,
|
||||
"ISO-8859-9"
|
||||
"ISO-8859-9",
|
||||
"tr"
|
||||
};
|
||||
|
||||
@ -233,7 +233,8 @@ const SequenceModel Windows_1258VietnameseModel =
|
||||
55,
|
||||
(float)0.9321889118082535,
|
||||
PR_FALSE,
|
||||
"WINDOWS-1258"
|
||||
"WINDOWS-1258",
|
||||
"vi"
|
||||
};
|
||||
|
||||
const SequenceModel VisciiVietnameseModel =
|
||||
@ -243,5 +244,6 @@ const SequenceModel VisciiVietnameseModel =
|
||||
55,
|
||||
(float)0.9321889118082535,
|
||||
PR_FALSE,
|
||||
"VISCII"
|
||||
"VISCII",
|
||||
"vi"
|
||||
};
|
||||
|
||||
@ -51,6 +51,7 @@ public:
|
||||
virtual ~nsBig5Prober(void){delete mCodingSM;}
|
||||
nsProbingState HandleData(const char* aBuf, PRUint32 aLen);
|
||||
const char* GetCharSetName() {return "BIG5";}
|
||||
const char* GetLanguage() {return "zh";}
|
||||
nsProbingState GetState(void) {return mState;}
|
||||
void Reset(void);
|
||||
float GetConfidence(void);
|
||||
|
||||
@ -54,6 +54,7 @@ class nsCharSetProber {
|
||||
public:
|
||||
virtual ~nsCharSetProber() {}
|
||||
virtual const char* GetCharSetName() = 0;
|
||||
virtual const char* GetLanguage() = 0;
|
||||
virtual nsProbingState HandleData(const char* aBuf, PRUint32 aLen) = 0;
|
||||
virtual nsProbingState GetState(void) = 0;
|
||||
virtual void Reset(void) = 0;
|
||||
|
||||
@ -57,6 +57,7 @@ public:
|
||||
virtual ~nsEUCJPProber(void){delete mCodingSM;}
|
||||
nsProbingState HandleData(const char* aBuf, PRUint32 aLen);
|
||||
const char* GetCharSetName() {return "EUC-JP";}
|
||||
const char* GetLanguage() {return "ja";}
|
||||
nsProbingState GetState(void) {return mState;}
|
||||
void Reset(void);
|
||||
float GetConfidence(void);
|
||||
|
||||
@ -57,6 +57,7 @@ public:
|
||||
* Korean documents are actually created with this character set.
|
||||
*/
|
||||
const char* GetCharSetName() {return "UHC";}
|
||||
const char* GetLanguage() {return "ko";}
|
||||
nsProbingState GetState(void) {return mState;}
|
||||
void Reset(void);
|
||||
float GetConfidence(void);
|
||||
|
||||
@ -51,6 +51,7 @@ public:
|
||||
virtual ~nsEUCTWProber(void){delete mCodingSM;}
|
||||
nsProbingState HandleData(const char* aBuf, PRUint32 aLen);
|
||||
const char* GetCharSetName() {return "EUC-TW";}
|
||||
const char* GetLanguage() {return "zh";}
|
||||
nsProbingState GetState(void) {return mState;}
|
||||
void Reset(void);
|
||||
float GetConfidence(void);
|
||||
|
||||
@ -38,6 +38,8 @@
|
||||
#ifndef nsEscCharSetProber_h__
|
||||
#define nsEscCharSetProber_h__
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
#include "nsCharSetProber.h"
|
||||
#include "nsCodingStateMachine.h"
|
||||
|
||||
@ -49,6 +51,7 @@ public:
|
||||
virtual ~nsEscCharSetProber(void);
|
||||
nsProbingState HandleData(const char* aBuf, PRUint32 aLen);
|
||||
const char* GetCharSetName() {return mDetectedCharset;}
|
||||
const char* GetLanguage() {return NULL;}
|
||||
nsProbingState GetState(void) {return mState;}
|
||||
void Reset(void);
|
||||
float GetConfidence(void){return (float)0.99;}
|
||||
|
||||
@ -53,6 +53,7 @@ public:
|
||||
virtual ~nsGB18030Prober(void){delete mCodingSM;}
|
||||
nsProbingState HandleData(const char* aBuf, PRUint32 aLen);
|
||||
const char* GetCharSetName() {return "GB18030";}
|
||||
const char* GetLanguage() {return "zh";}
|
||||
nsProbingState GetState(void) {return mState;}
|
||||
void Reset(void);
|
||||
float GetConfidence(void);
|
||||
|
||||
@ -49,7 +49,8 @@ public:
|
||||
|
||||
virtual ~nsHebrewProber(void) {}
|
||||
virtual nsProbingState HandleData(const char* aBuf, PRUint32 aLen);
|
||||
virtual const char* GetCharSetName();
|
||||
virtual const char *GetCharSetName();
|
||||
virtual const char *GetLanguage(void) { return "he"; }
|
||||
virtual void Reset(void);
|
||||
|
||||
virtual nsProbingState GetState(void);
|
||||
|
||||
@ -39,6 +39,8 @@
|
||||
#ifndef nsLatin1Prober_h__
|
||||
#define nsLatin1Prober_h__
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
#include "nsCharSetProber.h"
|
||||
|
||||
#define FREQ_CAT_NUM 4
|
||||
@ -49,6 +51,7 @@ public:
|
||||
virtual ~nsLatin1Prober(void){}
|
||||
nsProbingState HandleData(const char* aBuf, PRUint32 aLen);
|
||||
const char* GetCharSetName() {return "WINDOWS-1252";}
|
||||
const char* GetLanguage() {return NULL;}
|
||||
nsProbingState GetState(void) {return mState;}
|
||||
void Reset(void);
|
||||
float GetConfidence(void);
|
||||
|
||||
@ -97,6 +97,18 @@ const char* nsMBCSGroupProber::GetCharSetName()
|
||||
return mProbers[mBestGuess]->GetCharSetName();
|
||||
}
|
||||
|
||||
const char* nsMBCSGroupProber::GetLanguage(void)
|
||||
{
|
||||
if (mBestGuess == -1)
|
||||
{
|
||||
GetConfidence();
|
||||
}
|
||||
if (mBestGuess == -1)
|
||||
return NULL;
|
||||
else
|
||||
return mProbers[mBestGuess]->GetLanguage();
|
||||
}
|
||||
|
||||
void nsMBCSGroupProber::Reset(void)
|
||||
{
|
||||
mActiveNum = 0;
|
||||
|
||||
@ -55,6 +55,7 @@ public:
|
||||
virtual ~nsMBCSGroupProber();
|
||||
nsProbingState HandleData(const char* aBuf, PRUint32 aLen);
|
||||
const char* GetCharSetName();
|
||||
const char* GetLanguage();
|
||||
nsProbingState GetState(void) {return mState;}
|
||||
void Reset(void);
|
||||
float GetConfidence(void);
|
||||
|
||||
@ -217,6 +217,17 @@ const char* nsSBCSGroupProber::GetCharSetName()
|
||||
return mProbers[mBestGuess]->GetCharSetName();
|
||||
}
|
||||
|
||||
const char* nsSBCSGroupProber::GetLanguage()
|
||||
{
|
||||
if (mBestGuess == -1)
|
||||
{
|
||||
GetConfidence();
|
||||
if (mBestGuess == -1)
|
||||
mBestGuess = 0;
|
||||
}
|
||||
return mProbers[mBestGuess]->GetLanguage();
|
||||
}
|
||||
|
||||
void nsSBCSGroupProber::Reset(void)
|
||||
{
|
||||
mActiveNum = 0;
|
||||
|
||||
@ -49,6 +49,7 @@ public:
|
||||
virtual ~nsSBCSGroupProber();
|
||||
nsProbingState HandleData(const char* aBuf, PRUint32 aLen);
|
||||
const char* GetCharSetName();
|
||||
const char* GetLanguage();
|
||||
nsProbingState GetState(void) {return mState;}
|
||||
void Reset(void);
|
||||
float GetConfidence(void);
|
||||
|
||||
@ -145,6 +145,13 @@ const char* nsSingleByteCharSetProber::GetCharSetName()
|
||||
return mNameProber->GetCharSetName();
|
||||
}
|
||||
|
||||
const char* nsSingleByteCharSetProber::GetLanguage()
|
||||
{
|
||||
if (!mNameProber)
|
||||
return mModel->langName;
|
||||
return mNameProber->GetLanguage();
|
||||
}
|
||||
|
||||
#ifdef DEBUG_chardet
|
||||
void nsSingleByteCharSetProber::DumpStatus()
|
||||
{
|
||||
|
||||
@ -75,6 +75,7 @@ typedef struct
|
||||
float mTypicalPositiveRatio; // = freqSeqs / totalSeqs
|
||||
PRBool keepEnglishLetter; // says if this script contains English characters (not implemented)
|
||||
const char* const charsetName;
|
||||
const char* const langName;
|
||||
} SequenceModel;
|
||||
|
||||
|
||||
@ -86,6 +87,7 @@ public:
|
||||
:mModel(model), mReversed(reversed), mNameProber(nameProber) { Reset(); }
|
||||
|
||||
virtual const char* GetCharSetName();
|
||||
virtual const char* GetLanguage();
|
||||
virtual nsProbingState HandleData(const char* aBuf, PRUint32 aLen);
|
||||
virtual nsProbingState GetState(void) {return mState;}
|
||||
virtual void Reset(void);
|
||||
|
||||
@ -58,6 +58,7 @@ public:
|
||||
virtual ~nsSJISProber(void){delete mCodingSM;}
|
||||
nsProbingState HandleData(const char* aBuf, PRUint32 aLen);
|
||||
const char* GetCharSetName() {return "SHIFT_JIS";}
|
||||
const char* GetLanguage() {return "ja";}
|
||||
nsProbingState GetState(void) {return mState;}
|
||||
void Reset(void);
|
||||
float GetConfidence(void);
|
||||
|
||||
@ -38,6 +38,7 @@
|
||||
#ifndef nsUTF8Prober_h__
|
||||
#define nsUTF8Prober_h__
|
||||
|
||||
#include <cstddef>
|
||||
#include "nsCharSetProber.h"
|
||||
#include "nsCodingStateMachine.h"
|
||||
|
||||
@ -49,6 +50,7 @@ public:
|
||||
virtual ~nsUTF8Prober(){delete mCodingSM;}
|
||||
nsProbingState HandleData(const char* aBuf, PRUint32 aLen);
|
||||
const char* GetCharSetName() {return "UTF-8";}
|
||||
const char* GetLanguage() {return NULL;}
|
||||
nsProbingState GetState(void) {return mState;}
|
||||
void Reset(void);
|
||||
float GetConfidence(void);
|
||||
|
||||
@ -305,7 +305,7 @@ void nsUniversalDetector::DataEnd()
|
||||
* when finding them.
|
||||
*/
|
||||
mDone = PR_TRUE;
|
||||
Report(mDetectedCharset, 1.0);
|
||||
Report(mDetectedCharset, NULL, 1.0);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -323,7 +323,9 @@ void nsUniversalDetector::DataEnd()
|
||||
|
||||
if (proberConfidence > MINIMUM_THRESHOLD)
|
||||
/* Only report what we are confident in. */
|
||||
Report(mCharSetProbers[i]->GetCharSetName(), proberConfidence);
|
||||
Report(mCharSetProbers[i]->GetCharSetName(),
|
||||
mCharSetProbers[i]->GetLanguage(),
|
||||
proberConfidence);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -69,7 +69,8 @@ public:
|
||||
virtual void DataEnd(void);
|
||||
|
||||
protected:
|
||||
virtual void Report(const char* aCharset,
|
||||
virtual void Report(const char *encoding,
|
||||
const char *language,
|
||||
float confidence) = 0;
|
||||
virtual void Reset();
|
||||
nsInputState mInputState;
|
||||
|
||||
@ -9,6 +9,7 @@ set(
|
||||
uchardet_get_candidates
|
||||
uchardet_get_encoding
|
||||
uchardet_get_confidence
|
||||
uchardet_get_language
|
||||
)
|
||||
|
||||
set (LINK_FLAGS "")
|
||||
|
||||
@ -65,6 +65,7 @@ public:
|
||||
}
|
||||
|
||||
virtual void Report(const char *encoding,
|
||||
const char *language,
|
||||
float confidence)
|
||||
{
|
||||
std::vector<UChardetCandidate>::iterator it;
|
||||
@ -72,7 +73,8 @@ public:
|
||||
|
||||
for (it = candidates.begin(); it != candidates.end(); it++)
|
||||
{
|
||||
if (strcmp(it->encoding, encoding) == 0)
|
||||
if (strcmp(it->encoding, encoding) == 0 &&
|
||||
it->language && language && strcmp(it->language, language) == 0)
|
||||
{
|
||||
/* Already reported. Bail out or update the confidence
|
||||
* when needed.
|
||||
@ -91,6 +93,7 @@ public:
|
||||
|
||||
candidate = UChardetCandidate();
|
||||
candidate.encoding = strdup(encoding);
|
||||
candidate.language = language ? strdup(language) : NULL;
|
||||
candidate.confidence = confidence;
|
||||
|
||||
for (it = candidates.begin(); it != candidates.end(); it++)
|
||||
@ -107,7 +110,11 @@ public:
|
||||
|
||||
nsUniversalDetector::Reset();
|
||||
for (it = candidates.begin(); it != candidates.end(); it++)
|
||||
{
|
||||
free(it->encoding);
|
||||
if (it->language)
|
||||
free(it->language);
|
||||
}
|
||||
candidates.clear();
|
||||
}
|
||||
|
||||
@ -125,6 +132,12 @@ public:
|
||||
{
|
||||
return (candidates.size() > i) ? candidates[i].confidence : 0.0;
|
||||
}
|
||||
|
||||
const char* GetLanguage(size_t i) const
|
||||
{
|
||||
return (candidates.size() > i) ? candidates[i].language : NULL;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
uchardet_t uchardet_new(void)
|
||||
@ -178,3 +191,9 @@ const char * uchardet_get_encoding (uchardet_t ud,
|
||||
{
|
||||
return reinterpret_cast<HandleUniversalDetector*>(ud)->GetCharset(candidate);
|
||||
}
|
||||
|
||||
const char * uchardet_get_language (uchardet_t ud,
|
||||
size_t candidate)
|
||||
{
|
||||
return reinterpret_cast<HandleUniversalDetector*>(ud)->GetLanguage(candidate);
|
||||
}
|
||||
|
||||
@ -120,6 +120,8 @@ UCHARDET_INTERFACE float uchardet_get_confidence (uchardet_t ud,
|
||||
size_t candidate);
|
||||
UCHARDET_INTERFACE const char * uchardet_get_encoding (uchardet_t ud,
|
||||
size_t candidate);
|
||||
UCHARDET_INTERFACE const char * uchardet_get_language (uchardet_t ud,
|
||||
size_t candidate);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user