Re-enable Hungarian language models.

Now that we have at least one model for ISO-8859-1, the risk of detecting all ISO-8859-1 texts as ISO-8859-2 is lessened.
2026-01-01 03:12:24 +08:00 · 2015-12-02 22:24:36 +01:00 · 2015-12-02 22:24:36 +01:00 · 683255278d
commit 683255278d
parent f4f9fc3f28
4 changed files with 7 additions and 7 deletions
--- a/README.md
+++ b/README.md
@ -47,6 +47,9 @@ Techniques used by universalchardet are described at http://www.mozilla.org/proj
    * ISO-8859-15
  * English
    * ASCII
+  * Hungarian:
+    * ISO-8859-2
+    * WINDOWS-1250
  * Others
    * WINDOWS-1252

--- a/src/nsSBCSGroupProber.cpp
+++ b/src/nsSBCSGroupProber.cpp
@ -81,10 +81,8 @@ nsSBCSGroupProber::nsSBCSGroupProber()
  mProbers[14] = new nsSingleByteCharSetProber(&Iso_8859_1FrenchModel);
  mProbers[15] = new nsSingleByteCharSetProber(&Iso_8859_15FrenchModel);

-  // disable latin2 before latin1 is available, otherwise all latin1 
-  // will be detected as latin2 because of their similarity.
-  //mProbers[10] = new nsSingleByteCharSetProber(&Latin2HungarianModel);
-  //mProbers[11] = new nsSingleByteCharSetProber(&Win1250HungarianModel);
+  mProbers[16] = new nsSingleByteCharSetProber(&Latin2HungarianModel);
+  mProbers[17] = new nsSingleByteCharSetProber(&Win1250HungarianModel);

  Reset();
 }
--- a/src/nsSBCSGroupProber.h
+++ b/src/nsSBCSGroupProber.h
@ -40,7 +40,7 @@
 #define nsSBCSGroupProber_h__


-#define NUM_OF_SBCS_PROBERS    16
+#define NUM_OF_SBCS_PROBERS    18

 class nsCharSetProber;
 class nsSBCSGroupProber: public nsCharSetProber {
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@ -34,8 +34,7 @@ foreach(dir ${dirs})
        # enough). We will have to take a closer look and fix these, but
        # there is no need to break the whole `make test` right now,
        # which may make actual regressions harder to notice.
-        if ("${lang}:${charset}" STREQUAL "hu:iso-8859-2"   OR
-            "${lang}:${charset}" STREQUAL "el:windows-1253" OR
+        if ("${lang}:${charset}" STREQUAL "el:windows-1253" OR
            "${lang}:${charset}" STREQUAL "ja:utf-16le"     OR
            "${lang}:${charset}" STREQUAL "ja:utf-16be"     OR
            "${lang}:${charset}" STREQUAL "he:iso-8859-8")