mirror of
https://gitlab.freedesktop.org/uchardet/uchardet.git
synced 2025-12-06 16:56:40 +08:00
Re-enable Hungarian language models.
Now that we have at least one model for ISO-8859-1, the risk of detecting all ISO-8859-1 texts as ISO-8859-2 is lessened.
This commit is contained in:
parent
f4f9fc3f28
commit
683255278d
@ -47,6 +47,9 @@ Techniques used by universalchardet are described at http://www.mozilla.org/proj
|
||||
* ISO-8859-15
|
||||
* English
|
||||
* ASCII
|
||||
* Hungarian:
|
||||
* ISO-8859-2
|
||||
* WINDOWS-1250
|
||||
* Others
|
||||
* WINDOWS-1252
|
||||
|
||||
|
||||
@ -81,10 +81,8 @@ nsSBCSGroupProber::nsSBCSGroupProber()
|
||||
mProbers[14] = new nsSingleByteCharSetProber(&Iso_8859_1FrenchModel);
|
||||
mProbers[15] = new nsSingleByteCharSetProber(&Iso_8859_15FrenchModel);
|
||||
|
||||
// disable latin2 before latin1 is available, otherwise all latin1
|
||||
// will be detected as latin2 because of their similarity.
|
||||
//mProbers[10] = new nsSingleByteCharSetProber(&Latin2HungarianModel);
|
||||
//mProbers[11] = new nsSingleByteCharSetProber(&Win1250HungarianModel);
|
||||
mProbers[16] = new nsSingleByteCharSetProber(&Latin2HungarianModel);
|
||||
mProbers[17] = new nsSingleByteCharSetProber(&Win1250HungarianModel);
|
||||
|
||||
Reset();
|
||||
}
|
||||
|
||||
@ -40,7 +40,7 @@
|
||||
#define nsSBCSGroupProber_h__
|
||||
|
||||
|
||||
#define NUM_OF_SBCS_PROBERS 16
|
||||
#define NUM_OF_SBCS_PROBERS 18
|
||||
|
||||
class nsCharSetProber;
|
||||
class nsSBCSGroupProber: public nsCharSetProber {
|
||||
|
||||
@ -34,8 +34,7 @@ foreach(dir ${dirs})
|
||||
# enough). We will have to take a closer look and fix these, but
|
||||
# there is no need to break the whole `make test` right now,
|
||||
# which may make actual regressions harder to notice.
|
||||
if ("${lang}:${charset}" STREQUAL "hu:iso-8859-2" OR
|
||||
"${lang}:${charset}" STREQUAL "el:windows-1253" OR
|
||||
if ("${lang}:${charset}" STREQUAL "el:windows-1253" OR
|
||||
"${lang}:${charset}" STREQUAL "ja:utf-16le" OR
|
||||
"${lang}:${charset}" STREQUAL "ja:utf-16be" OR
|
||||
"${lang}:${charset}" STREQUAL "he:iso-8859-8")
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user