Re-enable Hungarian language models.

Now that we have at least one model for ISO-8859-1, the risk of
detecting all ISO-8859-1 texts as ISO-8859-2 is lessened.
This commit is contained in:
Jehan 2015-12-02 22:24:36 +01:00
parent f4f9fc3f28
commit 683255278d
4 changed files with 7 additions and 7 deletions

View File

@ -47,6 +47,9 @@ Techniques used by universalchardet are described at http://www.mozilla.org/proj
* ISO-8859-15
* English
* ASCII
* Hungarian:
* ISO-8859-2
* WINDOWS-1250
* Others
* WINDOWS-1252

View File

@ -81,10 +81,8 @@ nsSBCSGroupProber::nsSBCSGroupProber()
mProbers[14] = new nsSingleByteCharSetProber(&Iso_8859_1FrenchModel);
mProbers[15] = new nsSingleByteCharSetProber(&Iso_8859_15FrenchModel);
// disable latin2 before latin1 is available, otherwise all latin1
// will be detected as latin2 because of their similarity.
//mProbers[10] = new nsSingleByteCharSetProber(&Latin2HungarianModel);
//mProbers[11] = new nsSingleByteCharSetProber(&Win1250HungarianModel);
mProbers[16] = new nsSingleByteCharSetProber(&Latin2HungarianModel);
mProbers[17] = new nsSingleByteCharSetProber(&Win1250HungarianModel);
Reset();
}

View File

@ -40,7 +40,7 @@
#define nsSBCSGroupProber_h__
#define NUM_OF_SBCS_PROBERS 16
#define NUM_OF_SBCS_PROBERS 18
class nsCharSetProber;
class nsSBCSGroupProber: public nsCharSetProber {

View File

@ -34,8 +34,7 @@ foreach(dir ${dirs})
# enough). We will have to take a closer look and fix these, but
# there is no need to break the whole `make test` right now,
# which may make actual regressions harder to notice.
if ("${lang}:${charset}" STREQUAL "hu:iso-8859-2" OR
"${lang}:${charset}" STREQUAL "el:windows-1253" OR
if ("${lang}:${charset}" STREQUAL "el:windows-1253" OR
"${lang}:${charset}" STREQUAL "ja:utf-16le" OR
"${lang}:${charset}" STREQUAL "ja:utf-16be" OR
"${lang}:${charset}" STREQUAL "he:iso-8859-8")