mirror of
https://gitlab.freedesktop.org/uchardet/uchardet.git
synced 2025-12-06 16:56:40 +08:00
Re-enable Hungarian language models.
Now that we have at least one model for ISO-8859-1, the risk of detecting all ISO-8859-1 texts as ISO-8859-2 is lessened.
This commit is contained in:
parent
f4f9fc3f28
commit
683255278d
@ -47,6 +47,9 @@ Techniques used by universalchardet are described at http://www.mozilla.org/proj
|
|||||||
* ISO-8859-15
|
* ISO-8859-15
|
||||||
* English
|
* English
|
||||||
* ASCII
|
* ASCII
|
||||||
|
* Hungarian:
|
||||||
|
* ISO-8859-2
|
||||||
|
* WINDOWS-1250
|
||||||
* Others
|
* Others
|
||||||
* WINDOWS-1252
|
* WINDOWS-1252
|
||||||
|
|
||||||
|
|||||||
@ -81,10 +81,8 @@ nsSBCSGroupProber::nsSBCSGroupProber()
|
|||||||
mProbers[14] = new nsSingleByteCharSetProber(&Iso_8859_1FrenchModel);
|
mProbers[14] = new nsSingleByteCharSetProber(&Iso_8859_1FrenchModel);
|
||||||
mProbers[15] = new nsSingleByteCharSetProber(&Iso_8859_15FrenchModel);
|
mProbers[15] = new nsSingleByteCharSetProber(&Iso_8859_15FrenchModel);
|
||||||
|
|
||||||
// disable latin2 before latin1 is available, otherwise all latin1
|
mProbers[16] = new nsSingleByteCharSetProber(&Latin2HungarianModel);
|
||||||
// will be detected as latin2 because of their similarity.
|
mProbers[17] = new nsSingleByteCharSetProber(&Win1250HungarianModel);
|
||||||
//mProbers[10] = new nsSingleByteCharSetProber(&Latin2HungarianModel);
|
|
||||||
//mProbers[11] = new nsSingleByteCharSetProber(&Win1250HungarianModel);
|
|
||||||
|
|
||||||
Reset();
|
Reset();
|
||||||
}
|
}
|
||||||
|
|||||||
@ -40,7 +40,7 @@
|
|||||||
#define nsSBCSGroupProber_h__
|
#define nsSBCSGroupProber_h__
|
||||||
|
|
||||||
|
|
||||||
#define NUM_OF_SBCS_PROBERS 16
|
#define NUM_OF_SBCS_PROBERS 18
|
||||||
|
|
||||||
class nsCharSetProber;
|
class nsCharSetProber;
|
||||||
class nsSBCSGroupProber: public nsCharSetProber {
|
class nsSBCSGroupProber: public nsCharSetProber {
|
||||||
|
|||||||
@ -34,8 +34,7 @@ foreach(dir ${dirs})
|
|||||||
# enough). We will have to take a closer look and fix these, but
|
# enough). We will have to take a closer look and fix these, but
|
||||||
# there is no need to break the whole `make test` right now,
|
# there is no need to break the whole `make test` right now,
|
||||||
# which may make actual regressions harder to notice.
|
# which may make actual regressions harder to notice.
|
||||||
if ("${lang}:${charset}" STREQUAL "hu:iso-8859-2" OR
|
if ("${lang}:${charset}" STREQUAL "el:windows-1253" OR
|
||||||
"${lang}:${charset}" STREQUAL "el:windows-1253" OR
|
|
||||||
"${lang}:${charset}" STREQUAL "ja:utf-16le" OR
|
"${lang}:${charset}" STREQUAL "ja:utf-16le" OR
|
||||||
"${lang}:${charset}" STREQUAL "ja:utf-16be" OR
|
"${lang}:${charset}" STREQUAL "ja:utf-16be" OR
|
||||||
"${lang}:${charset}" STREQUAL "he:iso-8859-8")
|
"${lang}:${charset}" STREQUAL "he:iso-8859-8")
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user