Re-enable Hungarian language models.

Now that we have at least one model for ISO-8859-1, the risk of
detecting all ISO-8859-1 texts as ISO-8859-2 is lessened.
This commit is contained in:
Jehan 2015-12-02 22:24:36 +01:00
parent f4f9fc3f28
commit 683255278d
4 changed files with 7 additions and 7 deletions

View File

@ -47,6 +47,9 @@ Techniques used by universalchardet are described at http://www.mozilla.org/proj
* ISO-8859-15 * ISO-8859-15
* English * English
* ASCII * ASCII
* Hungarian:
* ISO-8859-2
* WINDOWS-1250
* Others * Others
* WINDOWS-1252 * WINDOWS-1252

View File

@ -81,10 +81,8 @@ nsSBCSGroupProber::nsSBCSGroupProber()
mProbers[14] = new nsSingleByteCharSetProber(&Iso_8859_1FrenchModel); mProbers[14] = new nsSingleByteCharSetProber(&Iso_8859_1FrenchModel);
mProbers[15] = new nsSingleByteCharSetProber(&Iso_8859_15FrenchModel); mProbers[15] = new nsSingleByteCharSetProber(&Iso_8859_15FrenchModel);
// disable latin2 before latin1 is available, otherwise all latin1 mProbers[16] = new nsSingleByteCharSetProber(&Latin2HungarianModel);
// will be detected as latin2 because of their similarity. mProbers[17] = new nsSingleByteCharSetProber(&Win1250HungarianModel);
//mProbers[10] = new nsSingleByteCharSetProber(&Latin2HungarianModel);
//mProbers[11] = new nsSingleByteCharSetProber(&Win1250HungarianModel);
Reset(); Reset();
} }

View File

@ -40,7 +40,7 @@
#define nsSBCSGroupProber_h__ #define nsSBCSGroupProber_h__
#define NUM_OF_SBCS_PROBERS 16 #define NUM_OF_SBCS_PROBERS 18
class nsCharSetProber; class nsCharSetProber;
class nsSBCSGroupProber: public nsCharSetProber { class nsSBCSGroupProber: public nsCharSetProber {

View File

@ -34,8 +34,7 @@ foreach(dir ${dirs})
# enough). We will have to take a closer look and fix these, but # enough). We will have to take a closer look and fix these, but
# there is no need to break the whole `make test` right now, # there is no need to break the whole `make test` right now,
# which may make actual regressions harder to notice. # which may make actual regressions harder to notice.
if ("${lang}:${charset}" STREQUAL "hu:iso-8859-2" OR if ("${lang}:${charset}" STREQUAL "el:windows-1253" OR
"${lang}:${charset}" STREQUAL "el:windows-1253" OR
"${lang}:${charset}" STREQUAL "ja:utf-16le" OR "${lang}:${charset}" STREQUAL "ja:utf-16le" OR
"${lang}:${charset}" STREQUAL "ja:utf-16be" OR "${lang}:${charset}" STREQUAL "ja:utf-16be" OR
"${lang}:${charset}" STREQUAL "he:iso-8859-8") "${lang}:${charset}" STREQUAL "he:iso-8859-8")