From 84284eccf4d51854d3108e5683d1a4294586e1a6 Mon Sep 17 00:00:00 2001
From: BYVoid <byvoid.kcp@gmail.com>
Date: Mon, 11 Jul 2011 14:42:50 +0800
Subject: [PATCH] Update code from upstream.

---
 debug.sh                    |   0
 release.sh                  |   0
 src/CMakeLists.txt          |  16 ++--
 src/CharDistribution.cpp    |   8 +-
 src/CharDistribution.h      |  13 ++-
 src/JpCntx.cpp              |  11 ++-
 src/JpCntx.h                |  14 +--
 src/LangBulgarianModel.cpp  |  15 ++--
 src/LangCyrillicModel.cpp   |  39 ++++-----
 src/LangGreekModel.cpp      |  15 ++--
 src/LangHebrewModel.cpp     |   9 +-
 src/LangHungarianModel.cpp  |  15 ++--
 src/LangThaiModel.cpp       |   9 +-
 src/nsBig5Prober.cpp        |   7 +-
 src/nsBig5Prober.h          |  10 ++-
 src/nsCharSetProber.h       |   2 +-
 src/nsCodingStateMachine.h  |  30 +++----
 src/nsEUCJPProber.cpp       |   9 +-
 src/nsEUCJPProber.h         |  10 ++-
 src/nsEUCKRProber.cpp       |   7 +-
 src/nsEUCKRProber.h         |  11 ++-
 src/nsEUCTWProber.cpp       |   7 +-
 src/nsEUCTWProber.h         |  10 ++-
 src/nsEscCharsetProber.cpp  |  46 +++++-----
 src/nsEscCharsetProber.h    |   2 +-
 src/nsEscSM.cpp             |  36 ++++----
 src/nsGB2312Prober.cpp      |   7 +-
 src/nsGB2312Prober.h        |  10 ++-
 src/nsHebrewProber.cpp      |   5 +-
 src/nsLatin1Prober.cpp      |   4 +-
 src/nsLatin1Prober.h        |   3 +-
 src/nsMBCSGroupProber.cpp   | 119 ++++++++++++++-----------
 src/nsMBCSGroupProber.h     |   7 +-
 src/nsMBCSSM.cpp            | 170 ++++++------------------------------
 src/nsPkgInt.h              |   2 +-
 src/nsSBCSGroupProber.cpp   |  13 +--
 src/nsSBCSGroupProber.h     |   2 +-
 src/nsSBCharSetProber.h     |  39 +++++----
 src/nsSJISProber.cpp        |   9 +-
 src/nsSJISProber.h          |  10 ++-
 src/nsUTF8Prober.cpp        |   5 --
 src/nsUTF8Prober.h          |   3 +-
 src/nsUniversalDetector.cpp |  89 ++++++++++---------
 src/nsUniversalDetector.h   |  16 +++-
 src/nscore.h                |   2 +
 src/tools/uchardet.cpp      |  36 ++------
 src/uchardet.cpp            |  34 ++++----
 win32.sh                    |   7 ++
 48 files changed, 411 insertions(+), 532 deletions(-)
 mode change 100755 => 100644 debug.sh
 mode change 100755 => 100644 release.sh
 create mode 100644 win32.sh

diff --git a/debug.sh b/debug.sh
old mode 100755
new mode 100644
diff --git a/release.sh b/release.sh
old mode 100755
new mode 100644
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index e30bb9d..0b26a3b 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -11,26 +11,26 @@ set(
 	LangBulgarianModel.cpp
 	LangCyrillicModel.cpp
 	LangGreekModel.cpp
-	LangHebrewModel.cpp
 	LangHungarianModel.cpp
+	LangHebrewModel.cpp
 	LangThaiModel.cpp
-	nsBig5Prober.cpp
+	nsHebrewProber.cpp
 	nsCharSetProber.cpp
-	nsEscCharsetProber.cpp
-	nsEscSM.cpp
+	nsBig5Prober.cpp
 	nsEUCJPProber.cpp
 	nsEUCKRProber.cpp
 	nsEUCTWProber.cpp
+	nsEscCharsetProber.cpp
+	nsEscSM.cpp
 	nsGB2312Prober.cpp
-	nsHebrewProber.cpp
-	nsLatin1Prober.cpp
 	nsMBCSGroupProber.cpp
 	nsMBCSSM.cpp
-	nsSBCharSetProber.cpp
 	nsSBCSGroupProber.cpp
+	nsSBCharSetProber.cpp
 	nsSJISProber.cpp
-	nsUniversalDetector.cpp
 	nsUTF8Prober.cpp
+	nsLatin1Prober.cpp
+	nsUniversalDetector.cpp
 	uchardet.cpp
 )
 
diff --git a/src/CharDistribution.cpp b/src/CharDistribution.cpp
index 41a8fdc..488d9bc 100644
--- a/src/CharDistribution.cpp
+++ b/src/CharDistribution.cpp
@@ -46,15 +46,13 @@
 #define SURE_YES 0.99f
 #define SURE_NO  0.01f
 
-#define MINIMUM_DATA_THRESHOLD  4
-
 //return confidence base on received data
-float CharDistributionAnalysis::GetConfidence()
+float CharDistributionAnalysis::GetConfidence(void)
 { 
   //if we didn't receive any character in our consideration range, or the
-  //number of frequent characters is below the minimum threshold, return
+  // number of frequent characters is below the minimum threshold, return
   // negative answer
-  if (mTotalChars <= 0 || mFreqChars <= MINIMUM_DATA_THRESHOLD)
+  if (mTotalChars <= 0 || mFreqChars <= mDataThreshold)
     return SURE_NO;
 
   if (mTotalChars != mFreqChars) {
diff --git a/src/CharDistribution.h b/src/CharDistribution.h
index 789a659..453c2de 100644
--- a/src/CharDistribution.h
+++ b/src/CharDistribution.h
@@ -42,11 +42,12 @@
 
 #define ENOUGH_DATA_THRESHOLD 1024
  
+#define MINIMUM_DATA_THRESHOLD  4
+
 class CharDistributionAnalysis
 {
 public:
-  CharDistributionAnalysis() {Reset();}
-  virtual ~CharDistributionAnalysis(){};
+  CharDistributionAnalysis() {Reset(PR_FALSE);}
 
   //feed a block of data and do distribution analysis
   void HandleData(const char* aBuf, PRUint32 aLen) {}
@@ -72,14 +73,15 @@ public:
   }
 
   //return confidence base on existing data
-  float GetConfidence();
+  float GetConfidence(void);
 
   //Reset analyser, clear any state 
-  void      Reset(void) 
+  void      Reset(PRBool aIsPreferredLanguage) 
   {
     mDone = PR_FALSE;
     mTotalChars = 0;
     mFreqChars = 0;
+    mDataThreshold = aIsPreferredLanguage ? 0 : MINIMUM_DATA_THRESHOLD;
   }
 
   //This function is for future extension. Caller can use this function to control
@@ -105,6 +107,9 @@ protected:
   //Total character encounted.
   PRUint32 mTotalChars;
 
+  //Number of hi-byte characters needed to trigger detection
+  PRUint32 mDataThreshold;
+
   //Mapping table to get frequency order from char order (get from GetOrder())
   const PRInt16  *mCharToFreqOrder;
 
diff --git a/src/JpCntx.cpp b/src/JpCntx.cpp
index 812c3bf..7da0413 100644
--- a/src/JpCntx.cpp
+++ b/src/JpCntx.cpp
@@ -39,7 +39,7 @@
 #include "JpCntx.h"
 
 //This is hiragana 2-char sequence table, the number in each cell represents its frequency category
-char jp2CharContext[83][83] = 
+const PRUint8 jp2CharContext[83][83] = 
 { 
 { 0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,},
 { 2,4,0,4,0,3,0,4,0,3,4,4,4,2,4,3,3,4,3,2,3,3,4,2,3,3,3,2,4,1,4,3,3,1,5,4,3,4,3,4,3,5,3,0,3,5,4,2,0,3,1,0,3,3,0,3,3,0,1,1,0,4,3,0,3,3,0,4,0,2,0,3,5,5,5,5,4,0,4,1,0,3,4,},
@@ -170,7 +170,7 @@ void JapaneseContextAnalysis::HandleData(const char* aBuf, PRUint32 aLen)
   return;
 }
 
-void JapaneseContextAnalysis::Reset(void)
+void JapaneseContextAnalysis::Reset(PRBool aIsPreferredLanguage)
 {
   mTotalRel = 0;
   for (PRUint32 i = 0; i < NUM_OF_CATEGORY; i++)
@@ -178,13 +178,14 @@ void JapaneseContextAnalysis::Reset(void)
   mNeedToSkipCharNum = 0;
   mLastCharOrder = -1;
   mDone = PR_FALSE;
+  mDataThreshold = aIsPreferredLanguage ? 0 : MINIMUM_DATA_THRESHOLD;
 }
 #define DONT_KNOW (float)-1
 
-float  JapaneseContextAnalysis::GetConfidence()
+float  JapaneseContextAnalysis::GetConfidence(void)
 {
   //This is just one way to calculate confidence. It works well for me.
-  if (mTotalRel > MINIMUM_DATA_THRESHOLD)
+  if (mTotalRel > mDataThreshold)
     return ((float)(mTotalRel - mRelSample[0]))/mTotalRel;
   else 
     return (float)DONT_KNOW;
@@ -227,5 +228,3 @@ PRInt32 EUCJPContextAnalysis::GetOrder(const char* str, PRUint32 *charLen)
      return (unsigned char)*(str+1) - (unsigned char)0xa1;
   return -1;
 }
-
-
diff --git a/src/JpCntx.h b/src/JpCntx.h
index 8231a12..fe8fcb8 100644
--- a/src/JpCntx.h
+++ b/src/JpCntx.h
@@ -46,13 +46,12 @@
 #define MAX_REL_THRESHOLD     1000
 
 //hiragana frequency category table
-extern char jp2CharContext[83][83];
+extern const PRUint8 jp2CharContext[83][83];
 
 class JapaneseContextAnalysis
 {
 public:
-  JapaneseContextAnalysis() {Reset();}
-  virtual ~JapaneseContextAnalysis(){};
+  JapaneseContextAnalysis() {Reset(PR_FALSE);}
 
   void HandleData(const char* aBuf, PRUint32 aLen);
 
@@ -75,8 +74,8 @@ public:
     mLastCharOrder = order;
   }
 
-  float GetConfidence();
-  void      Reset(void);
+  float GetConfidence(void);
+  void      Reset(PRBool aIsPreferredLanguage);
   void      SetOpion(){}
   PRBool GotEnoughData() {return mTotalRel > ENOUGH_REL_THRESHOLD;}
 
@@ -84,11 +83,14 @@ protected:
   virtual PRInt32 GetOrder(const char* str, PRUint32 *charLen) = 0;
   virtual PRInt32 GetOrder(const char* str) = 0;
 
-  //category counters, each interger counts sequence in its category
+  //category counters, each integer counts sequences in its category
   PRUint32 mRelSample[NUM_OF_CATEGORY];
 
   //total sequence received
   PRUint32 mTotalRel;
+
+  //Number of sequences needed to trigger detection
+  PRUint32 mDataThreshold;
   
   //The order of previous char
   PRInt32  mLastCharOrder;
diff --git a/src/LangBulgarianModel.cpp b/src/LangBulgarianModel.cpp
index 9babfda..0f73282 100644
--- a/src/LangBulgarianModel.cpp
+++ b/src/LangBulgarianModel.cpp
@@ -35,7 +35,6 @@
  *
  * ***** END LICENSE BLOCK ***** */
 
-#include "uchardetDefine.h"
 #include "nsSBCharSetProber.h"
 /****************************************************************
 255: Control characters that usually does not exist in any text
@@ -49,7 +48,7 @@
 //this talbe is modified base on win1251BulgarianCharToOrderMap, so 
 //only number <64 is sure valid
 
-unsigned char Latin5_BulgarianCharToOrderMap[] =
+static const unsigned char Latin5_BulgarianCharToOrderMap[] =
 {
 255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  //00
 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  //10
@@ -69,7 +68,7 @@ unsigned char Latin5_BulgarianCharToOrderMap[] =
  62,242,243,244, 58,245, 98,246,247,248,249,250,251, 91,252,253,  //f0
 };
 
-unsigned char win1251BulgarianCharToOrderMap[] =
+static const unsigned char win1251BulgarianCharToOrderMap[] =
 {
 255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  //00
 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  //10
@@ -95,7 +94,7 @@ unsigned char win1251BulgarianCharToOrderMap[] =
 //first 1024 sequences:3.0618%
 //rest  sequences:     0.2992%
 //negative sequences:  0.0020% 
-char BulgarianLangModel[] = 
+static const PRUint8 BulgarianLangModel[] = 
 {
 0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,3,3,3,3,3,
 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,2,2,3,2,2,1,2,2,
@@ -227,20 +226,20 @@ char BulgarianLangModel[] =
 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
 };
 
-SequenceModel Latin5BulgarianModel = 
+const SequenceModel Latin5BulgarianModel = 
 {
   Latin5_BulgarianCharToOrderMap,
   BulgarianLangModel,
   (float)0.969392,
   PR_FALSE,
-  CHARDET_ENCODING_ISO_8859_5
+  "ISO-8859-5"
 };
 
-SequenceModel Win1251BulgarianModel = 
+const SequenceModel Win1251BulgarianModel = 
 {
   win1251BulgarianCharToOrderMap,
   BulgarianLangModel,
   (float)0.969392,
   PR_FALSE,
-  CHARDET_ENCODING_WINDOWS_1251
+  "windows-1251"
 };
diff --git a/src/LangCyrillicModel.cpp b/src/LangCyrillicModel.cpp
index deceba3..d8e73e8 100644
--- a/src/LangCyrillicModel.cpp
+++ b/src/LangCyrillicModel.cpp
@@ -35,14 +35,13 @@
  *
  * ***** END LICENSE BLOCK ***** */
 
-#include "uchardetDefine.h"
 #include "nsSBCharSetProber.h"
 
 
 
 //KOI8-R language model
 //Character Mapping Table:
-unsigned char KOI8R_CharToOrderMap[] =
+static const unsigned char KOI8R_CharToOrderMap[] =
 {
 255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  //00
 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  //10
@@ -62,7 +61,7 @@ unsigned char KOI8R_CharToOrderMap[] =
  35, 43, 45, 32, 40, 52, 56, 33, 61, 62, 51, 57, 47, 63, 50, 70,  //f0
 };
 
-unsigned char win1251_CharToOrderMap[] =
+static const unsigned char win1251_CharToOrderMap[] =
 {
 255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  //00
 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  //10
@@ -82,7 +81,7 @@ unsigned char win1251_CharToOrderMap[] =
   9,  7,  6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16,
 };
 
-unsigned char latin5_CharToOrderMap[] =
+static const unsigned char latin5_CharToOrderMap[] =
 {
 255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  //00
 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  //10
@@ -102,7 +101,7 @@ unsigned char latin5_CharToOrderMap[] =
 239, 68,240,241,242,243,244,245,246,247,248,249,250,251,252,255,
 };
 
-unsigned char macCyrillic_CharToOrderMap[] =
+static const unsigned char macCyrillic_CharToOrderMap[] =
 {
 255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  //00
 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  //10
@@ -122,7 +121,7 @@ unsigned char macCyrillic_CharToOrderMap[] =
   9,  7,  6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27,255,
 };
 
-unsigned char IBM855_CharToOrderMap[] =
+static const unsigned char IBM855_CharToOrderMap[] =
 {
 255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  //00
 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  //10
@@ -142,7 +141,7 @@ unsigned char IBM855_CharToOrderMap[] =
 250, 18, 62, 20, 51, 25, 57, 30, 47, 29, 63, 22, 50,251,252,255,
 };
 
-unsigned char IBM866_CharToOrderMap[] =
+static const unsigned char IBM866_CharToOrderMap[] =
 {
 255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  //00
 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  //10
@@ -168,7 +167,7 @@ unsigned char IBM866_CharToOrderMap[] =
 //first 1024 sequences: 2.3389%
 //rest  sequences:      0.1237%
 //negative sequences:   0.0009% 
-char RussianLangModel[] = 
+static const PRUint8 RussianLangModel[] = 
 {
 0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,1,3,3,3,3,1,3,3,3,2,3,2,3,3,
 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,2,2,2,2,2,0,0,2,
@@ -301,56 +300,56 @@ char RussianLangModel[] =
 };
 
 
-SequenceModel Koi8rModel = 
+const SequenceModel Koi8rModel = 
 {
   KOI8R_CharToOrderMap,
   RussianLangModel,
   (float)0.976601,
   PR_FALSE,
-  CHARDET_ENCODING_KOI8_R
+  "KOI8-R"
 };
 
-SequenceModel Win1251Model = 
+const SequenceModel Win1251Model = 
 {
   win1251_CharToOrderMap,
   RussianLangModel,
   (float)0.976601,
   PR_FALSE,
-  CHARDET_ENCODING_WINDOWS_1251
+  "windows-1251"
 };
 
-SequenceModel Latin5Model = 
+const SequenceModel Latin5Model = 
 {
   latin5_CharToOrderMap,
   RussianLangModel,
   (float)0.976601,
   PR_FALSE,
-  CHARDET_ENCODING_ISO_8859_5
+  "ISO-8859-5"
 };
 
-SequenceModel MacCyrillicModel = 
+const SequenceModel MacCyrillicModel = 
 {
   macCyrillic_CharToOrderMap,
   RussianLangModel,
   (float)0.976601,
   PR_FALSE,
-  CHARDET_ENCODING_MACCYRILLIC
+  "x-mac-cyrillic"
 };
 
-SequenceModel Ibm866Model = 
+const SequenceModel Ibm866Model = 
 {
   IBM866_CharToOrderMap,
   RussianLangModel,
   (float)0.976601,
   PR_FALSE,
-  CHARDET_ENCODING_IBM866
+  "IBM866"
 };
 
-SequenceModel Ibm855Model = 
+const SequenceModel Ibm855Model = 
 {
   IBM855_CharToOrderMap,
   RussianLangModel,
   (float)0.976601,
   PR_FALSE,
-  CHARDET_ENCODING_IBM855
+  "IBM855"
 };
diff --git a/src/LangGreekModel.cpp b/src/LangGreekModel.cpp
index 60cea7e..30c65dc 100644
--- a/src/LangGreekModel.cpp
+++ b/src/LangGreekModel.cpp
@@ -35,7 +35,6 @@
  *
  * ***** END LICENSE BLOCK ***** */
 
-#include "uchardetDefine.h"
 #include "nsSBCharSetProber.h"
 /****************************************************************
 255: Control characters that usually does not exist in any text
@@ -46,7 +45,7 @@
 *****************************************************************/
 
 //Character Mapping Table:
-unsigned char Latin7_CharToOrderMap[] =
+static const unsigned char Latin7_CharToOrderMap[] =
 {
 255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  //00
 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  //10
@@ -68,7 +67,7 @@ unsigned char Latin7_CharToOrderMap[] =
 
 
 
-unsigned char win1253_CharToOrderMap[] =
+static const unsigned char win1253_CharToOrderMap[] =
 {
 255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  //00
 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  //10
@@ -94,7 +93,7 @@ unsigned char win1253_CharToOrderMap[] =
 //first 1024 sequences:1.7001%
 //rest  sequences:     0.0359%
 //negative sequences:  0.0148% 
-char GreekLangModel[] = 
+static const PRUint8 GreekLangModel[] = 
 {
 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
@@ -226,20 +225,20 @@ char GreekLangModel[] =
 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 };
 
-SequenceModel Latin7Model = 
+const SequenceModel Latin7Model = 
 {
   Latin7_CharToOrderMap,
   GreekLangModel,
   (float)0.982851,
   PR_FALSE,
-  CHARDET_ENCODING_ISO_8859_7
+  "ISO-8859-7"
 };
 
-SequenceModel Win1253Model = 
+const SequenceModel Win1253Model = 
 {
   win1253_CharToOrderMap,
   GreekLangModel,
   (float)0.982851,
   PR_FALSE,
-  CHARDET_ENCODING_WINDOWS_1253
+  "windows-1253"
 };
diff --git a/src/LangHebrewModel.cpp b/src/LangHebrewModel.cpp
index 8c18641..a4e10ad 100644
--- a/src/LangHebrewModel.cpp
+++ b/src/LangHebrewModel.cpp
@@ -37,7 +37,6 @@
  *
  * ***** END LICENSE BLOCK ***** */
 
-#include "uchardetDefine.h"
 #include "nsSBCharSetProber.h"
 
 
@@ -51,7 +50,7 @@
 
 //Windows-1255 language model
 //Character Mapping Table:
-unsigned char win1255_CharToOrderMap[] =
+static const unsigned char win1255_CharToOrderMap[] =
 {
 255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  //00
 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  //10
@@ -77,7 +76,7 @@ unsigned char win1255_CharToOrderMap[] =
 //first 1024 sequences: 1.5981%
 //rest  sequences:      0.087%
 //negative sequences:   0.0015% 
-char HebrewLangModel[] = 
+static const PRUint8 HebrewLangModel[] = 
 {
 0,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,3,2,1,2,0,1,0,0,
 3,0,3,1,0,0,1,3,2,0,1,1,2,0,2,2,2,1,1,1,1,2,1,1,1,2,0,0,2,2,0,1,
@@ -209,12 +208,12 @@ char HebrewLangModel[] =
 0,0,0,0,0,0,0,0,0,0,1,2,1,0,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,0,
 };
 
-SequenceModel Win1255Model = 
+const SequenceModel Win1255Model = 
 {
   win1255_CharToOrderMap,
   HebrewLangModel,
   (float)0.984004,
   PR_FALSE,
-  CHARDET_ENCODING_WINDOWS_1255
+  "windows-1255"
 };
 
diff --git a/src/LangHungarianModel.cpp b/src/LangHungarianModel.cpp
index 66e2d5d..3af2f58 100644
--- a/src/LangHungarianModel.cpp
+++ b/src/LangHungarianModel.cpp
@@ -35,7 +35,6 @@
  *
  * ***** END LICENSE BLOCK ***** */
 
-#include "uchardetDefine.h"
 #include "nsSBCharSetProber.h"
 /****************************************************************
 255: Control characters that usually does not exist in any text
@@ -46,7 +45,7 @@
 *****************************************************************/
 
 //Character Mapping Table:
-unsigned char Latin2_HungarianCharToOrderMap[] =
+static const unsigned char Latin2_HungarianCharToOrderMap[] =
 {
 255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  //00
 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  //10
@@ -66,7 +65,7 @@ unsigned char Latin2_HungarianCharToOrderMap[] =
 245,246,247, 25, 73, 42, 24,248,249,250, 31, 56, 29,251,252,253,
 };
 
-unsigned char win1250HungarianCharToOrderMap[] =
+static const unsigned char win1250HungarianCharToOrderMap[] =
 {
 255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  //00
 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  //10
@@ -92,7 +91,7 @@ unsigned char win1250HungarianCharToOrderMap[] =
 //first 1024 sequences:5.2623%
 //rest  sequences:     0.8894%
 //negative sequences:  0.0009% 
-char HungarianLangModel[] = 
+static const PRUint8 HungarianLangModel[] = 
 {
 0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
 3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,2,3,3,1,1,2,2,2,2,2,1,2,
@@ -224,20 +223,20 @@ char HungarianLangModel[] =
 0,1,1,1,1,1,1,0,1,1,0,1,0,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,
 };
 
-SequenceModel Latin2HungarianModel = 
+const SequenceModel Latin2HungarianModel = 
 {
   Latin2_HungarianCharToOrderMap,
   HungarianLangModel,
   (float)0.947368,
   PR_TRUE,
-  CHARDET_ENCODING_ISO_8859_2
+  "ISO-8859-2"
 };
 
-SequenceModel Win1250HungarianModel = 
+const SequenceModel Win1250HungarianModel = 
 {
   win1250HungarianCharToOrderMap,
   HungarianLangModel,
   (float)0.947368,
   PR_TRUE,
-  CHARDET_ENCODING_WINDOWS_1250
+  "windows-1250"
 };
diff --git a/src/LangThaiModel.cpp b/src/LangThaiModel.cpp
index 7d376cc..8145ffa 100644
--- a/src/LangThaiModel.cpp
+++ b/src/LangThaiModel.cpp
@@ -35,7 +35,6 @@
  *
  * ***** END LICENSE BLOCK ***** */
 
-#include "uchardetDefine.h"
 #include "nsSBCharSetProber.h"
 
 
@@ -50,7 +49,7 @@
 //The following result for thai was collected from a limited sample (1M). 
 
 //Character Mapping Table:
-unsigned char TIS620CharToOrderMap[] =
+static const unsigned char TIS620CharToOrderMap[] =
 {
 255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  //00
 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  //10
@@ -79,7 +78,7 @@ unsigned char TIS620CharToOrderMap[] =
 //first 1024 sequences:7.3177%
 //rest  sequences:     1.0230%
 //negative sequences:  0.0436% 
-char ThaiLangModel[] = 
+static const PRUint8 ThaiLangModel[] = 
 {
 0,1,3,3,3,3,0,0,3,3,0,3,3,0,3,3,3,3,3,3,3,3,0,0,3,3,3,0,3,3,3,3,
 0,3,3,0,0,0,1,3,0,3,3,2,3,3,0,1,2,3,3,3,3,0,2,0,2,0,0,3,2,1,2,2,
@@ -212,11 +211,11 @@ char ThaiLangModel[] =
 };
 
 
-SequenceModel TIS620ThaiModel = 
+const SequenceModel TIS620ThaiModel = 
 {
   TIS620CharToOrderMap,
   ThaiLangModel,
   (float)0.926386,
   PR_FALSE,
-  CHARDET_ENCODING_TIS_620
+  "TIS-620"
 };
diff --git a/src/nsBig5Prober.cpp b/src/nsBig5Prober.cpp
index 55d63c6..7a85abb 100644
--- a/src/nsBig5Prober.cpp
+++ b/src/nsBig5Prober.cpp
@@ -41,7 +41,7 @@ void  nsBig5Prober::Reset(void)
 {
   mCodingSM->Reset(); 
   mState = eDetecting;
-  mDistributionAnalyser.Reset();
+  mDistributionAnalyser.Reset(mIsPreferredLanguage);
 }
 
 nsProbingState nsBig5Prober::HandleData(const char* aBuf, PRUint32 aLen)
@@ -51,11 +51,6 @@ nsProbingState nsBig5Prober::HandleData(const char* aBuf, PRUint32 aLen)
   for (PRUint32 i = 0; i < aLen; i++)
   {
     codingState = mCodingSM->NextState(aBuf[i]);
-    if (codingState == eError)
-    {
-      mState = eNotMe;
-      break;
-    }
     if (codingState == eItsMe)
     {
       mState = eFoundIt;
diff --git a/src/nsBig5Prober.h b/src/nsBig5Prober.h
index 5a4ce37..5ae3576 100644
--- a/src/nsBig5Prober.h
+++ b/src/nsBig5Prober.h
@@ -38,18 +38,19 @@
 #ifndef nsBig5Prober_h__
 #define nsBig5Prober_h__
 
-#include "uchardetDefine.h"
 #include "nsCharSetProber.h"
 #include "nsCodingStateMachine.h"
 #include "CharDistribution.h"
 
 class nsBig5Prober: public nsCharSetProber {
 public:
-  nsBig5Prober(void){mCodingSM = new nsCodingStateMachine(&Big5SMModel);
-                      Reset();}
+  nsBig5Prober(PRBool aIsPreferredLanguage)
+    :mIsPreferredLanguage(aIsPreferredLanguage) 
+  {mCodingSM = new nsCodingStateMachine(&Big5SMModel); 
+    Reset();}
   virtual ~nsBig5Prober(void){delete mCodingSM;}
   nsProbingState HandleData(const char* aBuf, PRUint32 aLen);
-  const char* GetCharSetName() {return CHARDET_ENCODING_BIG5;}
+  const char* GetCharSetName() {return "Big5";}
   nsProbingState GetState(void) {return mState;}
   void      Reset(void);
   float     GetConfidence(void);
@@ -64,6 +65,7 @@ protected:
   //Big5ContextAnalysis mContextAnalyser;
   Big5DistributionAnalysis mDistributionAnalyser;
   char mLastChar[2];
+  PRBool mIsPreferredLanguage;
 
 };
 
diff --git a/src/nsCharSetProber.h b/src/nsCharSetProber.h
index 0c3ae95..c078ccf 100644
--- a/src/nsCharSetProber.h
+++ b/src/nsCharSetProber.h
@@ -61,7 +61,7 @@ public:
   virtual void      SetOpion() = 0;
 
 #ifdef DEBUG_chardet
-  virtual void  DumpStatus() {}
+  virtual void  DumpStatus() {};
 #endif
 
   // Helper functions used in the Latin1 and Group probers.
diff --git a/src/nsCodingStateMachine.h b/src/nsCodingStateMachine.h
index b9c2f64..819f9ab 100644
--- a/src/nsCodingStateMachine.h
+++ b/src/nsCodingStateMachine.h
@@ -59,10 +59,7 @@ typedef struct
 
 class nsCodingStateMachine {
 public:
-  nsCodingStateMachine(SMModel* sm){
-          mCurrentState = eStart;
-          mModel = sm;
-        }
+  nsCodingStateMachine(const SMModel* sm) : mModel(sm) { mCurrentState = eStart; }
   nsSMState NextState(char c){
     //for each byte we get its class , if it is first byte, we also get byte length
     PRUint32 byteCls = GETCLASS(c);
@@ -86,23 +83,22 @@ protected:
   PRUint32 mCurrentCharLen;
   PRUint32 mCurrentBytePos;
 
-  SMModel *mModel;
+  const SMModel *mModel;
 };
 
-extern SMModel UTF8SMModel;
-extern SMModel Big5SMModel;
-extern SMModel EUCJPSMModel;
-extern SMModel EUCKRSMModel;
-extern SMModel EUCTWSMModel;
-extern SMModel GB18030SMModel;
-extern SMModel SJISSMModel;
-extern SMModel UCS2BESMModel;
+extern const SMModel UTF8SMModel;
+extern const SMModel Big5SMModel;
+extern const SMModel EUCJPSMModel;
+extern const SMModel EUCKRSMModel;
+extern const SMModel EUCTWSMModel;
+extern const SMModel GB18030SMModel;
+extern const SMModel SJISSMModel;
 
 
-extern SMModel HZSMModel;
-extern SMModel ISO2022CNSMModel;
-extern SMModel ISO2022JPSMModel;
-extern SMModel ISO2022KRSMModel;
+extern const SMModel HZSMModel;
+extern const SMModel ISO2022CNSMModel;
+extern const SMModel ISO2022JPSMModel;
+extern const SMModel ISO2022KRSMModel;
 
 #endif /* nsCodingStateMachine_h__ */
 
diff --git a/src/nsEUCJPProber.cpp b/src/nsEUCJPProber.cpp
index fb0d296..54861b3 100644
--- a/src/nsEUCJPProber.cpp
+++ b/src/nsEUCJPProber.cpp
@@ -46,8 +46,8 @@ void  nsEUCJPProber::Reset(void)
 {
   mCodingSM->Reset(); 
   mState = eDetecting;
-  mContextAnalyser.Reset();
-  mDistributionAnalyser.Reset();
+  mContextAnalyser.Reset(mIsPreferredLanguage);
+  mDistributionAnalyser.Reset(mIsPreferredLanguage);
 }
 
 nsProbingState nsEUCJPProber::HandleData(const char* aBuf, PRUint32 aLen)
@@ -57,11 +57,6 @@ nsProbingState nsEUCJPProber::HandleData(const char* aBuf, PRUint32 aLen)
   for (PRUint32 i = 0; i < aLen; i++)
   {
     codingState = mCodingSM->NextState(aBuf[i]);
-    if (codingState == eError)
-    {
-      mState = eNotMe;
-      break;
-    }
     if (codingState == eItsMe)
     {
       mState = eFoundIt;
diff --git a/src/nsEUCJPProber.h b/src/nsEUCJPProber.h
index e4efa5a..a7a2f51 100644
--- a/src/nsEUCJPProber.h
+++ b/src/nsEUCJPProber.h
@@ -43,7 +43,6 @@
 #ifndef nsEUCJPProber_h__
 #define nsEUCJPProber_h__
 
-#include "uchardetDefine.h"
 #include "nsCharSetProber.h"
 #include "nsCodingStateMachine.h"
 #include "JpCntx.h"
@@ -51,11 +50,13 @@
 
 class nsEUCJPProber: public nsCharSetProber {
 public:
-  nsEUCJPProber(void){mCodingSM = new nsCodingStateMachine(&EUCJPSMModel);
-                      Reset();}
+  nsEUCJPProber(PRBool aIsPreferredLanguage)
+    :mIsPreferredLanguage(aIsPreferredLanguage)
+  {mCodingSM = new nsCodingStateMachine(&EUCJPSMModel);
+    Reset();}
   virtual ~nsEUCJPProber(void){delete mCodingSM;}
   nsProbingState HandleData(const char* aBuf, PRUint32 aLen);
-  const char* GetCharSetName() {return CHARDET_ENCODING_EUC_JP;}
+  const char* GetCharSetName() {return "EUC-JP";}
   nsProbingState GetState(void) {return mState;}
   void      Reset(void);
   float     GetConfidence(void);
@@ -69,6 +70,7 @@ protected:
   EUCJPDistributionAnalysis mDistributionAnalyser;
 
   char mLastChar[2];
+  PRBool mIsPreferredLanguage;
 };
 
 
diff --git a/src/nsEUCKRProber.cpp b/src/nsEUCKRProber.cpp
index c91a97e..3632f1f 100644
--- a/src/nsEUCKRProber.cpp
+++ b/src/nsEUCKRProber.cpp
@@ -41,7 +41,7 @@ void  nsEUCKRProber::Reset(void)
 {
   mCodingSM->Reset(); 
   mState = eDetecting;
-  mDistributionAnalyser.Reset();
+  mDistributionAnalyser.Reset(mIsPreferredLanguage);
   //mContextAnalyser.Reset();
 }
 
@@ -52,11 +52,6 @@ nsProbingState nsEUCKRProber::HandleData(const char* aBuf, PRUint32 aLen)
   for (PRUint32 i = 0; i < aLen; i++)
   {
     codingState = mCodingSM->NextState(aBuf[i]);
-    if (codingState == eError)
-    {
-      mState = eNotMe;
-      break;
-    }
     if (codingState == eItsMe)
     {
       mState = eFoundIt;
diff --git a/src/nsEUCKRProber.h b/src/nsEUCKRProber.h
index 53e9f30..8e09984 100644
--- a/src/nsEUCKRProber.h
+++ b/src/nsEUCKRProber.h
@@ -38,18 +38,20 @@
 #ifndef nsEUCKRProber_h__
 #define nsEUCKRProber_h__
 
-#include "uchardetDefine.h"
 #include "nsCharSetProber.h"
 #include "nsCodingStateMachine.h"
 #include "CharDistribution.h"
 
 class nsEUCKRProber: public nsCharSetProber {
 public:
-  nsEUCKRProber(void){mCodingSM = new nsCodingStateMachine(&EUCKRSMModel);
-                      Reset();}
+  nsEUCKRProber(PRBool aIsPreferredLanguage)
+    :mIsPreferredLanguage(aIsPreferredLanguage)
+  {mCodingSM = new nsCodingStateMachine(&EUCKRSMModel);
+    Reset();
+  }
   virtual ~nsEUCKRProber(void){delete mCodingSM;}
   nsProbingState HandleData(const char* aBuf, PRUint32 aLen);
-  const char* GetCharSetName() {return CHARDET_ENCODING_EUC_KR;}
+  const char* GetCharSetName() {return "EUC-KR";}
   nsProbingState GetState(void) {return mState;}
   void      Reset(void);
   float     GetConfidence(void);
@@ -64,6 +66,7 @@ protected:
   //EUCKRContextAnalysis mContextAnalyser;
   EUCKRDistributionAnalysis mDistributionAnalyser;
   char mLastChar[2];
+  PRBool mIsPreferredLanguage;
 
 };
 
diff --git a/src/nsEUCTWProber.cpp b/src/nsEUCTWProber.cpp
index 8552941..a06e074 100644
--- a/src/nsEUCTWProber.cpp
+++ b/src/nsEUCTWProber.cpp
@@ -41,7 +41,7 @@ void  nsEUCTWProber::Reset(void)
 {
   mCodingSM->Reset(); 
   mState = eDetecting;
-  mDistributionAnalyser.Reset();
+  mDistributionAnalyser.Reset(mIsPreferredLanguage);
   //mContextAnalyser.Reset();
 }
 
@@ -52,11 +52,6 @@ nsProbingState nsEUCTWProber::HandleData(const char* aBuf, PRUint32 aLen)
   for (PRUint32 i = 0; i < aLen; i++)
   {
     codingState = mCodingSM->NextState(aBuf[i]);
-    if (codingState == eError)
-    {
-      mState = eNotMe;
-      break;
-    }
     if (codingState == eItsMe)
     {
       mState = eFoundIt;
diff --git a/src/nsEUCTWProber.h b/src/nsEUCTWProber.h
index 7df1120..911d50b 100644
--- a/src/nsEUCTWProber.h
+++ b/src/nsEUCTWProber.h
@@ -38,18 +38,19 @@
 #ifndef nsEUCTWProber_h__
 #define nsEUCTWProber_h__
 
-#include "uchardetDefine.h"
 #include "nsCharSetProber.h"
 #include "nsCodingStateMachine.h"
 #include "CharDistribution.h"
 
 class nsEUCTWProber: public nsCharSetProber {
 public:
-  nsEUCTWProber(void){mCodingSM = new nsCodingStateMachine(&EUCTWSMModel);
-                      Reset();}
+  nsEUCTWProber(PRBool aIsPreferredLanguage)
+    :mIsPreferredLanguage(aIsPreferredLanguage)
+  {mCodingSM = new nsCodingStateMachine(&EUCTWSMModel);
+    Reset();}
   virtual ~nsEUCTWProber(void){delete mCodingSM;}
   nsProbingState HandleData(const char* aBuf, PRUint32 aLen);
-  const char* GetCharSetName() {return CHARDET_ENCODING_EUC_TW;}
+  const char* GetCharSetName() {return "x-euc-tw";}
   nsProbingState GetState(void) {return mState;}
   void      Reset(void);
   float     GetConfidence(void);
@@ -64,6 +65,7 @@ protected:
   //EUCTWContextAnalysis mContextAnalyser;
   EUCTWDistributionAnalysis mDistributionAnalyser;
   char mLastChar[2];
+  PRBool mIsPreferredLanguage;
 
 };
 
diff --git a/src/nsEscCharsetProber.cpp b/src/nsEscCharsetProber.cpp
index a816bab..464c753 100644
--- a/src/nsEscCharsetProber.cpp
+++ b/src/nsEscCharsetProber.cpp
@@ -37,13 +37,21 @@
 
 
 #include "nsEscCharsetProber.h"
+#include "nsUniversalDetector.h"
 
-nsEscCharSetProber::nsEscCharSetProber(void)
+nsEscCharSetProber::nsEscCharSetProber(PRUint32 aLanguageFilter)
 {
-  mCodingSM[0] = new nsCodingStateMachine(&HZSMModel);
-  mCodingSM[1] = new nsCodingStateMachine(&ISO2022CNSMModel);
-  mCodingSM[2] = new nsCodingStateMachine(&ISO2022JPSMModel);
-  mCodingSM[3] = new nsCodingStateMachine(&ISO2022KRSMModel);
+  for (PRUint32 i = 0; i < NUM_OF_ESC_CHARSETS; i++)
+    mCodingSM[i] = nsnull;
+  if (aLanguageFilter & NS_FILTER_CHINESE_SIMPLIFIED) 
+  {
+    mCodingSM[0] = new nsCodingStateMachine(&HZSMModel);
+    mCodingSM[1] = new nsCodingStateMachine(&ISO2022CNSMModel);
+  }
+  if (aLanguageFilter & NS_FILTER_JAPANESE)
+    mCodingSM[2] = new nsCodingStateMachine(&ISO2022JPSMModel);
+  if (aLanguageFilter & NS_FILTER_KOREAN)
+    mCodingSM[3] = new nsCodingStateMachine(&ISO2022KRSMModel);
   mActiveSM = NUM_OF_ESC_CHARSETS;
   mState = eDetecting;
   mDetectedCharset = nsnull;
@@ -59,7 +67,8 @@ void nsEscCharSetProber::Reset(void)
 {
   mState = eDetecting;
   for (PRUint32 i = 0; i < NUM_OF_ESC_CHARSETS; i++)
-    mCodingSM[i]->Reset();
+    if (mCodingSM[i])
+      mCodingSM[i]->Reset();
   mActiveSM = NUM_OF_ESC_CHARSETS;
   mDetectedCharset = nsnull;
 }
@@ -74,30 +83,15 @@ nsProbingState nsEscCharSetProber::HandleData(const char* aBuf, PRUint32 aLen)
   {
     for (j = mActiveSM-1; j>= 0; j--)
     {
-      //byte is feed to all active state machine 
-      codingState = mCodingSM[j]->NextState(aBuf[i]);
-      if (codingState == eError)
+      if (mCodingSM[j])
       {
-        //got negative answer for this state machine, make it inactive
-        mActiveSM--;
-        if (mActiveSM == 0)
+        codingState = mCodingSM[j]->NextState(aBuf[i]);
+        if (codingState == eItsMe)
         {
-          mState = eNotMe;
+          mState = eFoundIt;
+          mDetectedCharset = mCodingSM[j]->GetCodingStateMachine();
           return mState;
         }
-        else if (j != (PRInt32)mActiveSM)
-        {
-          nsCodingStateMachine* t;
-          t = mCodingSM[mActiveSM];
-          mCodingSM[mActiveSM] = mCodingSM[j];
-          mCodingSM[j] = t;
-        }
-      }
-      else if (codingState == eItsMe)
-      {
-        mState = eFoundIt;
-        mDetectedCharset = mCodingSM[j]->GetCodingStateMachine();
-        return mState;
       }
     }
   }
diff --git a/src/nsEscCharsetProber.h b/src/nsEscCharsetProber.h
index c08f442..4b648e0 100644
--- a/src/nsEscCharsetProber.h
+++ b/src/nsEscCharsetProber.h
@@ -45,7 +45,7 @@
 
 class nsEscCharSetProber: public nsCharSetProber {
 public:
-  nsEscCharSetProber(void);
+  nsEscCharSetProber(PRUint32 aLanguageFilter);
   virtual ~nsEscCharSetProber(void);
   nsProbingState HandleData(const char* aBuf, PRUint32 aLen);
   const char* GetCharSetName() {return mDetectedCharset;}
diff --git a/src/nsEscSM.cpp b/src/nsEscSM.cpp
index f3b4a8d..eed1b7c 100644
--- a/src/nsEscSM.cpp
+++ b/src/nsEscSM.cpp
@@ -20,7 +20,6 @@
  * the Initial Developer. All Rights Reserved.
  *
  * Contributor(s):
- *   Kazutoshi Satoda
  *
  * Alternatively, the contents of this file may be used under the terms of
  * either the GNU General Public License Version 2 or later (the "GPL"), or
@@ -35,10 +34,9 @@
  * the terms of any one of the MPL, the GPL or the LGPL.
  *
  * ***** END LICENSE BLOCK ***** */
-#include "uchardetDefine.h"
 #include "nsCodingStateMachine.h"
 
-static PRUint32 HZ_cls[ 256 / 8 ] = {
+static const PRUint32 HZ_cls[ 256 / 8 ] = {
 PCK4BITS(1,0,0,0,0,0,0,0),  // 00 - 07 
 PCK4BITS(0,0,0,0,0,0,0,0),  // 08 - 0f 
 PCK4BITS(0,0,0,0,0,0,0,0),  // 10 - 17 
@@ -74,7 +72,7 @@ PCK4BITS(1,1,1,1,1,1,1,1)   // f8 - ff
 };
 
 
-static PRUint32 HZ_st [ 6] = {
+static const PRUint32 HZ_st [ 6] = {
 PCK4BITS(eStart,eError,     3,eStart,eStart,eStart,eError,eError),//00-07 
 PCK4BITS(eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe),//08-0f 
 PCK4BITS(eItsMe,eItsMe,eError,eError,eStart,eStart,     4,eError),//10-17 
@@ -85,16 +83,16 @@ PCK4BITS(     4,eItsMe,eStart,eStart,eStart,eStart,eStart,eStart) //28-2f
 
 static const PRUint32 HZCharLenTable[] = {0, 0, 0, 0, 0, 0};
 
-SMModel HZSMModel = {
+const SMModel HZSMModel = {
   {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, HZ_cls },
    6,
   {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, HZ_st },
   HZCharLenTable,
-  CHARDET_ENCODING_HZ_GB_2312,
+  "HZ-GB-2312",
 };
 
 
-static PRUint32 ISO2022CN_cls [ 256 / 8 ] = {
+static const PRUint32 ISO2022CN_cls [ 256 / 8 ] = {
 PCK4BITS(2,0,0,0,0,0,0,0),  // 00 - 07 
 PCK4BITS(0,0,0,0,0,0,0,0),  // 08 - 0f 
 PCK4BITS(0,0,0,0,0,0,0,0),  // 10 - 17 
@@ -130,7 +128,7 @@ PCK4BITS(2,2,2,2,2,2,2,2)   // f8 - ff
 };
 
 
-static PRUint32 ISO2022CN_st [ 8] = {
+static const PRUint32 ISO2022CN_st [ 8] = {
 PCK4BITS(eStart,     3,eError,eStart,eStart,eStart,eStart,eStart),//00-07 
 PCK4BITS(eStart,eError,eError,eError,eError,eError,eError,eError),//08-0f 
 PCK4BITS(eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe),//10-17 
@@ -143,15 +141,15 @@ PCK4BITS(eError,eError,eError,eError,eError,eItsMe,eError,eStart) //38-3f
 
 static const PRUint32 ISO2022CNCharLenTable[] = {0, 0, 0, 0, 0, 0, 0, 0, 0};
 
-SMModel ISO2022CNSMModel = {
+const SMModel ISO2022CNSMModel = {
   {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022CN_cls },
   9,
   {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022CN_st },
   ISO2022CNCharLenTable,
-  CHARDET_ENCODING_ISO_2022_CN,
+  "ISO-2022-CN",
 };
 
-static PRUint32 ISO2022JP_cls [ 256 / 8 ] = {
+static const PRUint32 ISO2022JP_cls [ 256 / 8 ] = {
 PCK4BITS(2,0,0,0,0,0,0,0),  // 00 - 07 
 PCK4BITS(0,0,0,0,0,0,2,2),  // 08 - 0f 
 PCK4BITS(0,0,0,0,0,0,0,0),  // 10 - 17 
@@ -187,7 +185,7 @@ PCK4BITS(2,2,2,2,2,2,2,2)   // f8 - ff
 };
 
 
-static PRUint32 ISO2022JP_st [ 9] = {
+static const PRUint32 ISO2022JP_st [ 9] = {
 PCK4BITS(eStart,     3,eError,eStart,eStart,eStart,eStart,eStart),//00-07 
 PCK4BITS(eStart,eStart,eError,eError,eError,eError,eError,eError),//08-0f 
 PCK4BITS(eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe),//10-17 
@@ -199,17 +197,17 @@ PCK4BITS(eError,eError,eError,eItsMe,eError,eError,eError,eError),//38-3f
 PCK4BITS(eError,eError,eError,eError,eItsMe,eError,eStart,eStart) //40-47 
 };
 
-static const PRUint32 ISO2022JPCharLenTable[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+static const PRUint32 ISO2022JPCharLenTable[] = {0, 0, 0, 0, 0, 0, 0, 0};
 
-SMModel ISO2022JPSMModel = {
+const SMModel ISO2022JPSMModel = {
   {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022JP_cls },
   10,
   {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022JP_st },
   ISO2022JPCharLenTable,
-  CHARDET_ENCODING_ISO_2022_JP,
+  "ISO-2022-JP",
 };
 
-static PRUint32 ISO2022KR_cls [ 256 / 8 ] = {
+static const PRUint32 ISO2022KR_cls [ 256 / 8 ] = {
 PCK4BITS(2,0,0,0,0,0,0,0),  // 00 - 07 
 PCK4BITS(0,0,0,0,0,0,0,0),  // 08 - 0f 
 PCK4BITS(0,0,0,0,0,0,0,0),  // 10 - 17 
@@ -245,7 +243,7 @@ PCK4BITS(2,2,2,2,2,2,2,2)   // f8 - ff
 };
 
 
-static PRUint32 ISO2022KR_st [ 5] = {
+static const PRUint32 ISO2022KR_st [ 5] = {
 PCK4BITS(eStart,     3,eError,eStart,eStart,eStart,eError,eError),//00-07 
 PCK4BITS(eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe),//08-0f 
 PCK4BITS(eItsMe,eItsMe,eError,eError,eError,     4,eError,eError),//10-17 
@@ -255,11 +253,11 @@ PCK4BITS(eError,eError,eError,eItsMe,eStart,eStart,eStart,eStart) //20-27
 
 static const PRUint32 ISO2022KRCharLenTable[] = {0, 0, 0, 0, 0, 0};
 
-SMModel ISO2022KRSMModel = {
+const SMModel ISO2022KRSMModel = {
   {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022KR_cls },
    6,
   {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022KR_st },
   ISO2022KRCharLenTable,
-  CHARDET_ENCODING_ISO_2022_KR,
+  "ISO-2022-KR",
 };
 
diff --git a/src/nsGB2312Prober.cpp b/src/nsGB2312Prober.cpp
index 576dcd6..b6d469c 100644
--- a/src/nsGB2312Prober.cpp
+++ b/src/nsGB2312Prober.cpp
@@ -46,7 +46,7 @@ void  nsGB18030Prober::Reset(void)
 {
   mCodingSM->Reset(); 
   mState = eDetecting;
-  mDistributionAnalyser.Reset();
+  mDistributionAnalyser.Reset(mIsPreferredLanguage);
   //mContextAnalyser.Reset();
 }
 
@@ -57,11 +57,6 @@ nsProbingState nsGB18030Prober::HandleData(const char* aBuf, PRUint32 aLen)
   for (PRUint32 i = 0; i < aLen; i++)
   {
     codingState = mCodingSM->NextState(aBuf[i]);
-    if (codingState == eError)
-    {
-      mState = eNotMe;
-      break;
-    }
     if (codingState == eItsMe)
     {
       mState = eFoundIt;
diff --git a/src/nsGB2312Prober.h b/src/nsGB2312Prober.h
index 18edae5..4bdac3b 100644
--- a/src/nsGB2312Prober.h
+++ b/src/nsGB2312Prober.h
@@ -38,7 +38,6 @@
 #ifndef nsGB2312Prober_h__
 #define nsGB2312Prober_h__
 
-#include "uchardetDefine.h"
 #include "nsCharSetProber.h"
 #include "nsCodingStateMachine.h"
 #include "CharDistribution.h"
@@ -47,11 +46,13 @@
 
 class nsGB18030Prober: public nsCharSetProber {
 public:
-  nsGB18030Prober(void){mCodingSM = new nsCodingStateMachine(&GB18030SMModel);
-                      Reset();}
+  nsGB18030Prober(PRBool aIsPreferredLanguage)
+    :mIsPreferredLanguage(aIsPreferredLanguage)
+  {mCodingSM = new nsCodingStateMachine(&GB18030SMModel);
+    Reset();}
   virtual ~nsGB18030Prober(void){delete mCodingSM;}
   nsProbingState HandleData(const char* aBuf, PRUint32 aLen);
-  const char* GetCharSetName() {return CHARDET_ENCODING_GB18030;}
+  const char* GetCharSetName() {return "gb18030";}
   nsProbingState GetState(void) {return mState;}
   void      Reset(void);
   float     GetConfidence(void);
@@ -66,6 +67,7 @@ protected:
   //GB2312ContextAnalysis mContextAnalyser;
   GB2312DistributionAnalysis mDistributionAnalyser;
   char mLastChar[2];
+  PRBool mIsPreferredLanguage;
 
 };
 
diff --git a/src/nsHebrewProber.cpp b/src/nsHebrewProber.cpp
index 2168de3..b148ce3 100644
--- a/src/nsHebrewProber.cpp
+++ b/src/nsHebrewProber.cpp
@@ -35,7 +35,6 @@
  *
  * ***** END LICENSE BLOCK ***** */
 
-#include "uchardetDefine.h"
 #include "nsHebrewProber.h"
 #include <stdio.h>
 
@@ -59,8 +58,8 @@
 // If the difference is below this, don't rely at all on the model score distance.
 #define MIN_MODEL_DISTANCE (0.01)
 
-#define VISUAL_HEBREW_NAME (CHARDET_ENCODING_ISO_8859_8)
-#define LOGICAL_HEBREW_NAME (CHARDET_ENCODING_WINDOWS_1255)
+#define VISUAL_HEBREW_NAME ("ISO-8859-8")
+#define LOGICAL_HEBREW_NAME ("windows-1255")
 
 PRBool nsHebrewProber::isFinal(char c)
 {
diff --git a/src/nsLatin1Prober.cpp b/src/nsLatin1Prober.cpp
index 05d6823..7694ef7 100644
--- a/src/nsLatin1Prober.cpp
+++ b/src/nsLatin1Prober.cpp
@@ -50,7 +50,7 @@
 #define ASO    7        // accent small other
 #define CLASS_NUM   8    // total classes
 
-static unsigned char Latin1_CharToClass[] = 
+static const unsigned char Latin1_CharToClass[] = 
 {
   OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   // 00 - 07
   OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   // 08 - 0F
@@ -92,7 +92,7 @@ static unsigned char Latin1_CharToClass[] =
    2 : normal 
    3 : very likely
 */
-static unsigned char Latin1ClassModel[] = 
+static const unsigned char Latin1ClassModel[] = 
 {
 /*      UDF OTH ASC ASS ACV ACO ASV ASO  */
 /*UDF*/  0,  0,  0,  0,  0,  0,  0,  0,
diff --git a/src/nsLatin1Prober.h b/src/nsLatin1Prober.h
index 3953d8e..5145e96 100644
--- a/src/nsLatin1Prober.h
+++ b/src/nsLatin1Prober.h
@@ -39,7 +39,6 @@
 #ifndef nsLatin1Prober_h__
 #define nsLatin1Prober_h__
 
-#include "uchardetDefine.h"
 #include "nsCharSetProber.h"
 
 #define FREQ_CAT_NUM    4
@@ -49,7 +48,7 @@ public:
   nsLatin1Prober(void){Reset();}
   virtual ~nsLatin1Prober(void){}
   nsProbingState HandleData(const char* aBuf, PRUint32 aLen);
-  const char* GetCharSetName() {return CHARDET_ENCODING_WINDOWS_1252;}
+  const char* GetCharSetName() {return "windows-1252";}
   nsProbingState GetState(void) {return mState;}
   void      Reset(void);
   float     GetConfidence(void);
diff --git a/src/nsMBCSGroupProber.cpp b/src/nsMBCSGroupProber.cpp
index 35a97be..f161165 100644
--- a/src/nsMBCSGroupProber.cpp
+++ b/src/nsMBCSGroupProber.cpp
@@ -21,6 +21,7 @@
  *
  * Contributor(s):
  *          Shy Shalom <shooshX@gmail.com>
+ *			Proofpoint, Inc.
  *
  * Alternatively, the contents of this file may be used under the terms of
  * either the GNU General Public License Version 2 or later (the "GPL"), or
@@ -36,12 +37,12 @@
  *
  * ***** END LICENSE BLOCK ***** */
 #include <stdio.h>
-#include "prmem.h"
 
 #include "nsMBCSGroupProber.h"
+#include "nsUniversalDetector.h"
 
-#ifdef DEBUG_chardet
-char *ProberName[] = 
+#if defined(DEBUG_chardet) || defined(DEBUG_jgmyers)
+const char *ProberName[] = 
 {
   "UTF8",
   "SJIS",
@@ -54,15 +55,26 @@ char *ProberName[] =
 
 #endif
 
-nsMBCSGroupProber::nsMBCSGroupProber()
+nsMBCSGroupProber::nsMBCSGroupProber(PRUint32 aLanguageFilter)
 {
+  for (PRUint32 i = 0; i < NUM_OF_PROBERS; i++)
+    mProbers[i] = nsnull;
+
   mProbers[0] = new nsUTF8Prober();
-  mProbers[1] = new nsSJISProber();
-  mProbers[2] = new nsEUCJPProber();
-  mProbers[3] = new nsGB18030Prober();
-  mProbers[4] = new nsEUCKRProber();
-  mProbers[5] = new nsBig5Prober();
-  mProbers[6] = new nsEUCTWProber();
+  if (aLanguageFilter & NS_FILTER_JAPANESE) 
+  {
+    mProbers[1] = new nsSJISProber(aLanguageFilter == NS_FILTER_JAPANESE);
+    mProbers[2] = new nsEUCJPProber(aLanguageFilter == NS_FILTER_JAPANESE);
+  }
+  if (aLanguageFilter & NS_FILTER_CHINESE_SIMPLIFIED)
+    mProbers[3] = new nsGB18030Prober(aLanguageFilter == NS_FILTER_CHINESE_SIMPLIFIED);
+  if (aLanguageFilter & NS_FILTER_KOREAN)
+    mProbers[4] = new nsEUCKRProber(aLanguageFilter == NS_FILTER_KOREAN);
+  if (aLanguageFilter & NS_FILTER_CHINESE_TRADITIONAL) 
+  {
+    mProbers[5] = new nsBig5Prober(aLanguageFilter == NS_FILTER_CHINESE_TRADITIONAL);
+    mProbers[6] = new nsEUCTWProber(aLanguageFilter == NS_FILTER_CHINESE_TRADITIONAL);
+  }
   Reset();
 }
 
@@ -101,62 +113,59 @@ void  nsMBCSGroupProber::Reset(void)
   }
   mBestGuess = -1;
   mState = eDetecting;
+  mKeepNext = 0;
 }
 
 nsProbingState nsMBCSGroupProber::HandleData(const char* aBuf, PRUint32 aLen)
 {
   nsProbingState st;
-  PRUint32 i;
+  PRUint32 start = 0;
+  PRUint32 keepNext = mKeepNext;
 
   //do filtering to reduce load to probers
-  char *highbyteBuf;
-  char *hptr;
-  PRBool keepNext = PR_TRUE;   //assume previous is not ascii, it will do no harm except add some noise
-  hptr = highbyteBuf = (char*)PR_Malloc(aLen);
-  if (!hptr)
-      return mState;
-  for (i = 0; i < aLen; i++)
+  for (PRUint32 pos = 0; pos < aLen; ++pos)
   {
-    if (aBuf[i] & 0x80)
+    if (aBuf[pos] & 0x80)
     {
-      *hptr++ = aBuf[i];
-      keepNext = PR_TRUE;
+      if (!keepNext)
+        start = pos;
+      keepNext = 2;
     }
-    else
+    else if (keepNext)
     {
-      //if previous is highbyte, keep this even it is a ASCII
-      if (keepNext)
+      if (--keepNext == 0)
       {
-          *hptr++ = aBuf[i];
-          keepNext = PR_FALSE;
+        for (PRUint32 i = 0; i < NUM_OF_PROBERS; i++)
+        {
+          if (!mIsActive[i])
+            continue;
+          st = mProbers[i]->HandleData(aBuf + start, pos + 1 - start);
+          if (st == eFoundIt)
+          {
+            mBestGuess = i;
+            mState = eFoundIt;
+            return mState;
+          }
+        }
       }
     }
   }
 
-  for (i = 0; i < NUM_OF_PROBERS; i++)
-  {
-     if (!mIsActive[i])
-       continue;
-     st = mProbers[i]->HandleData(highbyteBuf, hptr - highbyteBuf);
-     if (st == eFoundIt)
-     {
-       mBestGuess = i;
-       mState = eFoundIt;
-       break;
-     }
-     else if (st == eNotMe)
-     {
-       mIsActive[i] = PR_FALSE;
-       mActiveNum--;
-       if (mActiveNum <= 0)
-       {
-         mState = eNotMe;
-         break;
-       }
-     }
+  if (keepNext) {
+    for (PRUint32 i = 0; i < NUM_OF_PROBERS; i++)
+    {
+      if (!mIsActive[i])
+        continue;
+      st = mProbers[i]->HandleData(aBuf + start, aLen - start);
+      if (st == eFoundIt)
+      {
+        mBestGuess = i;
+        mState = eFoundIt;
+        return mState;
+      }
+    }
   }
-
-  PR_FREEIF(highbyteBuf);
+  mKeepNext = keepNext;
 
   return mState;
 }
@@ -207,3 +216,15 @@ void nsMBCSGroupProber::DumpStatus()
   }
 }
 #endif
+
+#ifdef DEBUG_jgmyers
+void nsMBCSGroupProber::GetDetectorState(nsUniversalDetector::DetectorState (&states)[nsUniversalDetector::NumDetectors], PRUint32 &offset)
+{
+  for (PRUint32 i = 0; i < NUM_OF_PROBERS; ++i) {
+    states[offset].name = ProberName[i];
+    states[offset].isActive = mIsActive[i];
+    states[offset].confidence = mIsActive[i] ? mProbers[i]->GetConfidence() : 0.0;
+    ++offset;
+  }
+}
+#endif /* DEBUG_jgmyers */
diff --git a/src/nsMBCSGroupProber.h b/src/nsMBCSGroupProber.h
index 6d2730a..c4e9964 100644
--- a/src/nsMBCSGroupProber.h
+++ b/src/nsMBCSGroupProber.h
@@ -20,6 +20,7 @@
  * the Initial Developer. All Rights Reserved.
  *
  * Contributor(s):
+ *			Proofpoint, Inc.
  *
  * Alternatively, the contents of this file may be used under the terms of
  * either the GNU General Public License Version 2 or later (the "GPL"), or
@@ -50,7 +51,7 @@
 
 class nsMBCSGroupProber: public nsCharSetProber {
 public:
-  nsMBCSGroupProber();
+  nsMBCSGroupProber(PRUint32 aLanguageFilter);
   virtual ~nsMBCSGroupProber();
   nsProbingState HandleData(const char* aBuf, PRUint32 aLen);
   const char* GetCharSetName();
@@ -62,6 +63,9 @@ public:
 #ifdef DEBUG_chardet
   void  DumpStatus();
 #endif
+#ifdef DEBUG_jgmyers
+  void GetDetectorState(nsUniversalDetector::DetectorState (&states)[nsUniversalDetector::NumDetectors], PRUint32 &offset);
+#endif
 
 protected:
   nsProbingState mState;
@@ -69,6 +73,7 @@ protected:
   PRBool          mIsActive[NUM_OF_PROBERS];
   PRInt32 mBestGuess;
   PRUint32 mActiveNum;
+  PRUint32 mKeepNext;
 };
 
 #endif /* nsMBCSGroupProber_h__ */
diff --git a/src/nsMBCSSM.cpp b/src/nsMBCSSM.cpp
index 0231ad6..584e931 100644
--- a/src/nsMBCSSM.cpp
+++ b/src/nsMBCSSM.cpp
@@ -34,7 +34,6 @@
  * the terms of any one of the MPL, the GPL or the LGPL.
  *
  * ***** END LICENSE BLOCK ***** */
-#include "uchardetDefine.h"
 #include "nsCodingStateMachine.h"
 
 /*
@@ -45,7 +44,7 @@ Modification from frank tang's original work:
 
 // BIG5 
 
-static PRUint32 BIG5_cls [ 256 / 8 ] = {
+static const PRUint32 BIG5_cls [ 256 / 8 ] = {
 //PCK4BITS(0,1,1,1,1,1,1,1),  // 00 - 07 
 PCK4BITS(1,1,1,1,1,1,1,1),  // 00 - 07    //allow 0x00 as legal value
 PCK4BITS(1,1,1,1,1,1,0,0),  // 08 - 0f 
@@ -82,7 +81,7 @@ PCK4BITS(3,3,3,3,3,3,3,0)   // f8 - ff
 };
 
 
-static PRUint32 BIG5_st [ 3] = {
+static const PRUint32 BIG5_st [ 3] = {
 PCK4BITS(eError,eStart,eStart,     3,eError,eError,eError,eError),//00-07 
 PCK4BITS(eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError),//08-0f 
 PCK4BITS(eError,eStart,eStart,eStart,eStart,eStart,eStart,eStart) //10-17 
@@ -90,15 +89,15 @@ PCK4BITS(eError,eStart,eStart,eStart,eStart,eStart,eStart,eStart) //10-17
 
 static const PRUint32 Big5CharLenTable[] = {0, 1, 1, 2, 0};
 
-SMModel Big5SMModel = {
+SMModel const Big5SMModel = {
   {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, BIG5_cls },
     5,
   {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, BIG5_st },
   Big5CharLenTable,
-  CHARDET_ENCODING_BIG5,
+  "Big5",
 };
 
-static PRUint32 EUCJP_cls [ 256 / 8 ] = {
+static const PRUint32 EUCJP_cls [ 256 / 8 ] = {
 //PCK4BITS(5,4,4,4,4,4,4,4),  // 00 - 07 
 PCK4BITS(4,4,4,4,4,4,4,4),  // 00 - 07 
 PCK4BITS(4,4,4,4,4,4,5,5),  // 08 - 0f 
@@ -135,7 +134,7 @@ PCK4BITS(0,0,0,0,0,0,0,5)   // f8 - ff
 };
 
 
-static PRUint32 EUCJP_st [ 5] = {
+static const PRUint32 EUCJP_st [ 5] = {
 PCK4BITS(     3,     4,     3,     5,eStart,eError,eError,eError),//00-07 
 PCK4BITS(eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe),//08-0f 
 PCK4BITS(eItsMe,eItsMe,eStart,eError,eStart,eError,eError,eError),//10-17 
@@ -145,15 +144,15 @@ PCK4BITS(     3,eError,eError,eError,eStart,eStart,eStart,eStart) //20-27
 
 static const PRUint32 EUCJPCharLenTable[] = {2, 2, 2, 3, 1, 0};
 
-SMModel EUCJPSMModel = {
+const SMModel EUCJPSMModel = {
   {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, EUCJP_cls },
    6,
   {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, EUCJP_st },
   EUCJPCharLenTable,
-  CHARDET_ENCODING_EUC_JP,
+  "EUC-JP",
 };
 
-static PRUint32 EUCKR_cls [ 256 / 8 ] = {
+static const PRUint32 EUCKR_cls [ 256 / 8 ] = {
 //PCK4BITS(0,1,1,1,1,1,1,1),  // 00 - 07 
 PCK4BITS(1,1,1,1,1,1,1,1),  // 00 - 07 
 PCK4BITS(1,1,1,1,1,1,0,0),  // 08 - 0f 
@@ -190,22 +189,22 @@ PCK4BITS(2,2,2,2,2,2,2,0)   // f8 - ff
 };
 
 
-static PRUint32 EUCKR_st [ 2] = {
+static const PRUint32 EUCKR_st [ 2] = {
 PCK4BITS(eError,eStart,     3,eError,eError,eError,eError,eError),//00-07 
 PCK4BITS(eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,eStart,eStart) //08-0f 
 };
 
 static const PRUint32 EUCKRCharLenTable[] = {0, 1, 2, 0};
 
-SMModel EUCKRSMModel = {
+const SMModel EUCKRSMModel = {
   {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, EUCKR_cls },
   4,
   {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, EUCKR_st },
   EUCKRCharLenTable,
-  CHARDET_ENCODING_EUC_KR,
+  "EUC-KR",
 };
 
-static PRUint32 EUCTW_cls [ 256 / 8 ] = {
+static const PRUint32 EUCTW_cls [ 256 / 8 ] = {
 //PCK4BITS(0,2,2,2,2,2,2,2),  // 00 - 07 
 PCK4BITS(2,2,2,2,2,2,2,2),  // 00 - 07 
 PCK4BITS(2,2,2,2,2,2,0,0),  // 08 - 0f 
@@ -242,7 +241,7 @@ PCK4BITS(3,3,3,3,3,3,3,0)   // f8 - ff
 };
 
 
-static PRUint32 EUCTW_st [ 6] = {
+static const PRUint32 EUCTW_st [ 6] = {
 PCK4BITS(eError,eError,eStart,     3,     3,     3,     4,eError),//00-07 
 PCK4BITS(eError,eError,eError,eError,eError,eError,eItsMe,eItsMe),//08-0f 
 PCK4BITS(eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eStart,eError),//10-17 
@@ -253,12 +252,12 @@ PCK4BITS(eStart,eError,eStart,eStart,eStart,eStart,eStart,eStart) //28-2f
 
 static const PRUint32 EUCTWCharLenTable[] = {0, 0, 1, 2, 2, 2, 3};
 
-SMModel EUCTWSMModel = {
+const SMModel EUCTWSMModel = {
   {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, EUCTW_cls },
    7,
   {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, EUCTW_st },
   EUCTWCharLenTable,
-  CHARDET_ENCODING_EUC_TW,
+  "x-euc-tw",
 };
 
 /* obsolete GB2312 by gb18030
@@ -317,7 +316,7 @@ SMModel GB2312SMModel = {
 
 // the following state machine data was created by perl script in 
 // intl/chardet/tools. It should be the same as in PSM detector.
-static PRUint32 GB18030_cls [ 256 / 8 ] = {
+static const PRUint32 GB18030_cls [ 256 / 8 ] = {
 PCK4BITS(1,1,1,1,1,1,1,1),  // 00 - 07 
 PCK4BITS(1,1,1,1,1,1,0,0),  // 08 - 0f 
 PCK4BITS(1,1,1,1,1,1,1,1),  // 10 - 17 
@@ -353,7 +352,7 @@ PCK4BITS(6,6,6,6,6,6,6,0)   // f8 - ff
 };
 
 
-static PRUint32 GB18030_st [ 6] = {
+static const PRUint32 GB18030_st [ 6] = {
 PCK4BITS(eError,eStart,eStart,eStart,eStart,eStart,     3,eError),//00-07 
 PCK4BITS(eError,eError,eError,eError,eError,eError,eItsMe,eItsMe),//08-0f 
 PCK4BITS(eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,eStart),//10-17 
@@ -369,17 +368,17 @@ PCK4BITS(eError,eError,eStart,eStart,eStart,eStart,eStart,eStart) //28-2f
 // 2 here. 
 static const PRUint32 GB18030CharLenTable[] = {0, 1, 1, 1, 1, 1, 2};
 
-SMModel GB18030SMModel = {
+const SMModel GB18030SMModel = {
   {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, GB18030_cls },
    7,
   {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, GB18030_st },
   GB18030CharLenTable,
-  CHARDET_ENCODING_GB18030,
+  "GB18030",
 };
 
 // sjis
 
-static PRUint32 SJIS_cls [ 256 / 8 ] = {
+static const PRUint32 SJIS_cls [ 256 / 8 ] = {
 //PCK4BITS(0,1,1,1,1,1,1,1),  // 00 - 07 
 PCK4BITS(1,1,1,1,1,1,1,1),  // 00 - 07 
 PCK4BITS(1,1,1,1,1,1,0,0),  // 08 - 0f 
@@ -418,7 +417,7 @@ PCK4BITS(4,4,4,4,4,0,0,0)   // f8 - ff
 };
 
 
-static PRUint32 SJIS_st [ 3] = {
+static const PRUint32 SJIS_st [ 3] = {
 PCK4BITS(eError,eStart,eStart,     3,eError,eError,eError,eError),//00-07 
 PCK4BITS(eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe),//08-0f 
 PCK4BITS(eItsMe,eItsMe,eError,eError,eStart,eStart,eStart,eStart) //10-17 
@@ -426,129 +425,16 @@ PCK4BITS(eItsMe,eItsMe,eError,eError,eStart,eStart,eStart,eStart) //10-17
 
 static const PRUint32 SJISCharLenTable[] = {0, 1, 1, 2, 0, 0};
 
-SMModel SJISSMModel = {
+const SMModel SJISSMModel = {
   {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, SJIS_cls },
    6,
   {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, SJIS_st },
   SJISCharLenTable,
-  CHARDET_ENCODING_SHIFT_JIS,
+  "Shift_JIS",
 };
 
 
-static PRUint32 UCS2BE_cls [ 256 / 8 ] = {
-PCK4BITS(0,0,0,0,0,0,0,0),  // 00 - 07 
-PCK4BITS(0,0,1,0,0,2,0,0),  // 08 - 0f 
-PCK4BITS(0,0,0,0,0,0,0,0),  // 10 - 17 
-PCK4BITS(0,0,0,3,0,0,0,0),  // 18 - 1f 
-PCK4BITS(0,0,0,0,0,0,0,0),  // 20 - 27 
-PCK4BITS(0,3,3,3,3,3,0,0),  // 28 - 2f 
-PCK4BITS(0,0,0,0,0,0,0,0),  // 30 - 37 
-PCK4BITS(0,0,0,0,0,0,0,0),  // 38 - 3f 
-PCK4BITS(0,0,0,0,0,0,0,0),  // 40 - 47 
-PCK4BITS(0,0,0,0,0,0,0,0),  // 48 - 4f 
-PCK4BITS(0,0,0,0,0,0,0,0),  // 50 - 57 
-PCK4BITS(0,0,0,0,0,0,0,0),  // 58 - 5f 
-PCK4BITS(0,0,0,0,0,0,0,0),  // 60 - 67 
-PCK4BITS(0,0,0,0,0,0,0,0),  // 68 - 6f 
-PCK4BITS(0,0,0,0,0,0,0,0),  // 70 - 77 
-PCK4BITS(0,0,0,0,0,0,0,0),  // 78 - 7f 
-PCK4BITS(0,0,0,0,0,0,0,0),  // 80 - 87 
-PCK4BITS(0,0,0,0,0,0,0,0),  // 88 - 8f 
-PCK4BITS(0,0,0,0,0,0,0,0),  // 90 - 97 
-PCK4BITS(0,0,0,0,0,0,0,0),  // 98 - 9f 
-PCK4BITS(0,0,0,0,0,0,0,0),  // a0 - a7 
-PCK4BITS(0,0,0,0,0,0,0,0),  // a8 - af 
-PCK4BITS(0,0,0,0,0,0,0,0),  // b0 - b7 
-PCK4BITS(0,0,0,0,0,0,0,0),  // b8 - bf 
-PCK4BITS(0,0,0,0,0,0,0,0),  // c0 - c7 
-PCK4BITS(0,0,0,0,0,0,0,0),  // c8 - cf 
-PCK4BITS(0,0,0,0,0,0,0,0),  // d0 - d7 
-PCK4BITS(0,0,0,0,0,0,0,0),  // d8 - df 
-PCK4BITS(0,0,0,0,0,0,0,0),  // e0 - e7 
-PCK4BITS(0,0,0,0,0,0,0,0),  // e8 - ef 
-PCK4BITS(0,0,0,0,0,0,0,0),  // f0 - f7 
-PCK4BITS(0,0,0,0,0,0,4,5)   // f8 - ff 
-};
-
-
-static PRUint32 UCS2BE_st [ 7] = {
-PCK4BITS(     5,     7,     7,eError,     4,     3,eError,eError),//00-07 
-PCK4BITS(eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe),//08-0f 
-PCK4BITS(eItsMe,eItsMe,     6,     6,     6,     6,eError,eError),//10-17 
-PCK4BITS(     6,     6,     6,     6,     6,eItsMe,     6,     6),//18-1f 
-PCK4BITS(     6,     6,     6,     6,     5,     7,     7,eError),//20-27 
-PCK4BITS(     5,     8,     6,     6,eError,     6,     6,     6),//28-2f 
-PCK4BITS(     6,     6,     6,     6,eError,eError,eStart,eStart) //30-37 
-};
-
-static const PRUint32 UCS2BECharLenTable[] = {2, 2, 2, 0, 2, 2};
-
-SMModel UCS2BESMModel = {
-  {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, UCS2BE_cls },
-   6,
-  {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, UCS2BE_st },
-  UCS2BECharLenTable,
-  CHARDET_ENCODING_UTF_16BE,
-};
-
-static PRUint32 UCS2LE_cls [ 256 / 8 ] = {
-PCK4BITS(0,0,0,0,0,0,0,0),  // 00 - 07 
-PCK4BITS(0,0,1,0,0,2,0,0),  // 08 - 0f 
-PCK4BITS(0,0,0,0,0,0,0,0),  // 10 - 17 
-PCK4BITS(0,0,0,3,0,0,0,0),  // 18 - 1f 
-PCK4BITS(0,0,0,0,0,0,0,0),  // 20 - 27 
-PCK4BITS(0,3,3,3,3,3,0,0),  // 28 - 2f 
-PCK4BITS(0,0,0,0,0,0,0,0),  // 30 - 37 
-PCK4BITS(0,0,0,0,0,0,0,0),  // 38 - 3f 
-PCK4BITS(0,0,0,0,0,0,0,0),  // 40 - 47 
-PCK4BITS(0,0,0,0,0,0,0,0),  // 48 - 4f 
-PCK4BITS(0,0,0,0,0,0,0,0),  // 50 - 57 
-PCK4BITS(0,0,0,0,0,0,0,0),  // 58 - 5f 
-PCK4BITS(0,0,0,0,0,0,0,0),  // 60 - 67 
-PCK4BITS(0,0,0,0,0,0,0,0),  // 68 - 6f 
-PCK4BITS(0,0,0,0,0,0,0,0),  // 70 - 77 
-PCK4BITS(0,0,0,0,0,0,0,0),  // 78 - 7f 
-PCK4BITS(0,0,0,0,0,0,0,0),  // 80 - 87 
-PCK4BITS(0,0,0,0,0,0,0,0),  // 88 - 8f 
-PCK4BITS(0,0,0,0,0,0,0,0),  // 90 - 97 
-PCK4BITS(0,0,0,0,0,0,0,0),  // 98 - 9f 
-PCK4BITS(0,0,0,0,0,0,0,0),  // a0 - a7 
-PCK4BITS(0,0,0,0,0,0,0,0),  // a8 - af 
-PCK4BITS(0,0,0,0,0,0,0,0),  // b0 - b7 
-PCK4BITS(0,0,0,0,0,0,0,0),  // b8 - bf 
-PCK4BITS(0,0,0,0,0,0,0,0),  // c0 - c7 
-PCK4BITS(0,0,0,0,0,0,0,0),  // c8 - cf 
-PCK4BITS(0,0,0,0,0,0,0,0),  // d0 - d7 
-PCK4BITS(0,0,0,0,0,0,0,0),  // d8 - df 
-PCK4BITS(0,0,0,0,0,0,0,0),  // e0 - e7 
-PCK4BITS(0,0,0,0,0,0,0,0),  // e8 - ef 
-PCK4BITS(0,0,0,0,0,0,0,0),  // f0 - f7 
-PCK4BITS(0,0,0,0,0,0,4,5)   // f8 - ff 
-};
-
-
-static PRUint32 UCS2LE_st [ 7] = {
-PCK4BITS(     6,     6,     7,     6,     4,     3,eError,eError),//00-07 
-PCK4BITS(eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe),//08-0f 
-PCK4BITS(eItsMe,eItsMe,     5,     5,     5,eError,eItsMe,eError),//10-17 
-PCK4BITS(     5,     5,     5,eError,     5,eError,     6,     6),//18-1f 
-PCK4BITS(     7,     6,     8,     8,     5,     5,     5,eError),//20-27 
-PCK4BITS(     5,     5,     5,eError,eError,eError,     5,     5),//28-2f 
-PCK4BITS(     5,     5,     5,eError,     5,eError,eStart,eStart) //30-37 
-};
-
-static const PRUint32 UCS2LECharLenTable[] = {2, 2, 2, 2, 2, 2};
-
-SMModel UCS2LESMModel = {
-  {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, UCS2LE_cls },
-   6,
-  {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, UCS2LE_st },
-  UCS2LECharLenTable,
-  CHARDET_ENCODING_UTF_16LE,
-};
-
-
-static PRUint32 UTF8_cls [ 256 / 8 ] = {
+static const PRUint32 UTF8_cls [ 256 / 8 ] = {
 //PCK4BITS(0,1,1,1,1,1,1,1),  // 00 - 07 
 PCK4BITS(1,1,1,1,1,1,1,1),  // 00 - 07  //allow 0x00 as a legal value
 PCK4BITS(1,1,1,1,1,1,0,0),  // 08 - 0f 
@@ -585,7 +471,7 @@ PCK4BITS(12,13,13,13,14,15,0,0)   // f8 - ff
 };
 
 
-static PRUint32 UTF8_st [ 26] = {
+static const PRUint32 UTF8_st [ 26] = {
 PCK4BITS(eError,eStart,eError,eError,eError,eError,     12,     10),//00-07 
 PCK4BITS(     9,     11,     8,     7,     6,     5,     4,     3),//08-0f 
 PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError),//10-17 
@@ -617,11 +503,11 @@ PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError) //c8-cf
 static const PRUint32 UTF8CharLenTable[] = {0, 1, 0, 0, 0, 0, 2, 3, 
                             3, 3, 4, 4, 5, 5, 6, 6 };
 
-SMModel UTF8SMModel = {
+const SMModel UTF8SMModel = {
   {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, UTF8_cls },
    16,
   {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, UTF8_st },
   UTF8CharLenTable,
-  CHARDET_ENCODING_UTF_8,
+  "UTF-8",
 };
 
diff --git a/src/nsPkgInt.h b/src/nsPkgInt.h
index 7617d6c..3caa912 100644
--- a/src/nsPkgInt.h
+++ b/src/nsPkgInt.h
@@ -68,7 +68,7 @@ typedef struct nsPkgInt {
   nsSftMsk  sftmsk;
   nsBitSft  bitsft;
   nsUnitMsk unitmsk;
-  PRUint32  *data;
+  const PRUint32* const data;
 } nsPkgInt;
 
 
diff --git a/src/nsSBCSGroupProber.cpp b/src/nsSBCSGroupProber.cpp
index 65afdfe..d8fef87 100644
--- a/src/nsSBCSGroupProber.cpp
+++ b/src/nsSBCSGroupProber.cpp
@@ -56,21 +56,22 @@ nsSBCSGroupProber::nsSBCSGroupProber()
   mProbers[7] = new nsSingleByteCharSetProber(&Win1253Model);
   mProbers[8] = new nsSingleByteCharSetProber(&Latin5BulgarianModel);
   mProbers[9] = new nsSingleByteCharSetProber(&Win1251BulgarianModel);
+  mProbers[10] = new nsSingleByteCharSetProber(&TIS620ThaiModel);
 
   nsHebrewProber *hebprober = new nsHebrewProber();
   // Notice: Any change in these indexes - 10,11,12 must be reflected
   // in the code below as well.
-  mProbers[10] = hebprober;
-  mProbers[11] = new nsSingleByteCharSetProber(&Win1255Model, PR_FALSE, hebprober); // Logical Hebrew
-  mProbers[12] = new nsSingleByteCharSetProber(&Win1255Model, PR_TRUE, hebprober); // Visual Hebrew
+  mProbers[11] = hebprober;
+  mProbers[12] = new nsSingleByteCharSetProber(&Win1255Model, PR_FALSE, hebprober); // Logical Hebrew
+  mProbers[13] = new nsSingleByteCharSetProber(&Win1255Model, PR_TRUE, hebprober); // Visual Hebrew
   // Tell the Hebrew prober about the logical and visual probers
-  if (mProbers[10] && mProbers[11] && mProbers[12]) // all are not null
+  if (mProbers[11] && mProbers[12] && mProbers[13]) // all are not null
   {
-    hebprober->SetModelProbers(mProbers[11], mProbers[12]);
+    hebprober->SetModelProbers(mProbers[12], mProbers[13]);
   }
   else // One or more is null. avoid any Hebrew probing, null them all
   {
-    for (PRUint32 i = 10; i <= 12; ++i)
+    for (PRUint32 i = 11; i <= 13; ++i)
     { 
       delete mProbers[i]; 
       mProbers[i] = 0; 
diff --git a/src/nsSBCSGroupProber.h b/src/nsSBCSGroupProber.h
index faa57ed..cfbf7e1 100644
--- a/src/nsSBCSGroupProber.h
+++ b/src/nsSBCSGroupProber.h
@@ -40,7 +40,7 @@
 #define nsSBCSGroupProber_h__
 
 
-#define NUM_OF_SBCS_PROBERS    13
+#define NUM_OF_SBCS_PROBERS    14
 
 class nsCharSetProber;
 class nsSBCSGroupProber: public nsCharSetProber {
diff --git a/src/nsSBCharSetProber.h b/src/nsSBCharSetProber.h
index 277ab07..d7180dc 100644
--- a/src/nsSBCharSetProber.h
+++ b/src/nsSBCharSetProber.h
@@ -51,19 +51,19 @@
 
 typedef struct
 {
-  unsigned char *charToOrderMap;    // [256] table use to find a char's order
-  char *precedenceMatrix;           // [SAMPLE_SIZE][SAMPLE_SIZE]; table to find a 2-char sequence's frequency
+  const unsigned char* const charToOrderMap;    // [256] table use to find a char's order
+  const PRUint8* const precedenceMatrix;  // [SAMPLE_SIZE][SAMPLE_SIZE]; table to find a 2-char sequence's frequency
   float  mTypicalPositiveRatio;     // = freqSeqs / totalSeqs 
   PRBool keepEnglishLetter;         // says if this script contains English characters (not implemented)
-  const char* charsetName;
+  const char* const charsetName;
 } SequenceModel;
 
 
 class nsSingleByteCharSetProber : public nsCharSetProber{
 public:
-  nsSingleByteCharSetProber(SequenceModel *model) 
+  nsSingleByteCharSetProber(const SequenceModel *model) 
     :mModel(model), mReversed(PR_FALSE), mNameProber(0) { Reset(); }
-  nsSingleByteCharSetProber(SequenceModel *model, PRBool reversed, nsCharSetProber* nameProber)
+  nsSingleByteCharSetProber(const SequenceModel *model, PRBool reversed, nsCharSetProber* nameProber)
     :mModel(model), mReversed(reversed), mNameProber(nameProber) { Reset(); }
 
   virtual const char* GetCharSetName();
@@ -87,7 +87,7 @@ public:
 
 protected:
   nsProbingState mState;
-  const SequenceModel *mModel;
+  const SequenceModel* const mModel;
   const PRBool mReversed; // PR_TRUE if we need to reverse every pair in the model lookup
 
   //char order of last character
@@ -106,19 +106,20 @@ protected:
 };
 
 
-extern SequenceModel Koi8rModel;
-extern SequenceModel Win1251Model;
-extern SequenceModel Latin5Model;
-extern SequenceModel MacCyrillicModel;
-extern SequenceModel Ibm866Model;
-extern SequenceModel Ibm855Model;
-extern SequenceModel Latin7Model;
-extern SequenceModel Win1253Model;
-extern SequenceModel Latin5BulgarianModel;
-extern SequenceModel Win1251BulgarianModel;
-extern SequenceModel Latin2HungarianModel;
-extern SequenceModel Win1250HungarianModel;
-extern SequenceModel Win1255Model;
+extern const SequenceModel Koi8rModel;
+extern const SequenceModel Win1251Model;
+extern const SequenceModel Latin5Model;
+extern const SequenceModel MacCyrillicModel;
+extern const SequenceModel Ibm866Model;
+extern const SequenceModel Ibm855Model;
+extern const SequenceModel Latin7Model;
+extern const SequenceModel Win1253Model;
+extern const SequenceModel Latin5BulgarianModel;
+extern const SequenceModel Win1251BulgarianModel;
+extern const SequenceModel Latin2HungarianModel;
+extern const SequenceModel Win1250HungarianModel;
+extern const SequenceModel Win1255Model;
+extern const SequenceModel TIS620ThaiModel;
 
 #endif /* nsSingleByteCharSetProber_h__ */
 
diff --git a/src/nsSJISProber.cpp b/src/nsSJISProber.cpp
index 9bab506..c7842f6 100644
--- a/src/nsSJISProber.cpp
+++ b/src/nsSJISProber.cpp
@@ -46,8 +46,8 @@ void  nsSJISProber::Reset(void)
 {
   mCodingSM->Reset(); 
   mState = eDetecting;
-  mContextAnalyser.Reset();
-  mDistributionAnalyser.Reset();
+  mContextAnalyser.Reset(mIsPreferredLanguage);
+  mDistributionAnalyser.Reset(mIsPreferredLanguage);
 }
 
 nsProbingState nsSJISProber::HandleData(const char* aBuf, PRUint32 aLen)
@@ -57,11 +57,6 @@ nsProbingState nsSJISProber::HandleData(const char* aBuf, PRUint32 aLen)
   for (PRUint32 i = 0; i < aLen; i++)
   {
     codingState = mCodingSM->NextState(aBuf[i]);
-    if (codingState == eError)
-    {
-      mState = eNotMe;
-      break;
-    }
     if (codingState == eItsMe)
     {
       mState = eFoundIt;
diff --git a/src/nsSJISProber.h b/src/nsSJISProber.h
index 77a7085..1efb6e3 100644
--- a/src/nsSJISProber.h
+++ b/src/nsSJISProber.h
@@ -43,7 +43,6 @@
 #ifndef nsSJISProber_h__
 #define nsSJISProber_h__
 
-#include "uchardetDefine.h"
 #include "nsCharSetProber.h"
 #include "nsCodingStateMachine.h"
 #include "JpCntx.h"
@@ -52,11 +51,13 @@
 
 class nsSJISProber: public nsCharSetProber {
 public:
-  nsSJISProber(void){mCodingSM = new nsCodingStateMachine(&SJISSMModel);
-                      Reset();}
+  nsSJISProber(PRBool aIsPreferredLanguage)
+    :mIsPreferredLanguage(aIsPreferredLanguage)
+  {mCodingSM = new nsCodingStateMachine(&SJISSMModel);
+    Reset();}
   virtual ~nsSJISProber(void){delete mCodingSM;}
   nsProbingState HandleData(const char* aBuf, PRUint32 aLen);
-  const char* GetCharSetName() {return CHARDET_ENCODING_SHIFT_JIS;}
+  const char* GetCharSetName() {return "Shift_JIS";}
   nsProbingState GetState(void) {return mState;}
   void      Reset(void);
   float     GetConfidence(void);
@@ -70,6 +71,7 @@ protected:
   SJISDistributionAnalysis mDistributionAnalyser;
 
   char mLastChar[2];
+  PRBool mIsPreferredLanguage;
 
 };
 
diff --git a/src/nsUTF8Prober.cpp b/src/nsUTF8Prober.cpp
index 6d590b4..ab8d9f7 100644
--- a/src/nsUTF8Prober.cpp
+++ b/src/nsUTF8Prober.cpp
@@ -51,11 +51,6 @@ nsProbingState nsUTF8Prober::HandleData(const char* aBuf, PRUint32 aLen)
   for (PRUint32 i = 0; i < aLen; i++)
   {
     codingState = mCodingSM->NextState(aBuf[i]);
-    if (codingState == eError)
-    {
-      mState = eNotMe;
-      break;
-    }
     if (codingState == eItsMe)
     {
       mState = eFoundIt;
diff --git a/src/nsUTF8Prober.h b/src/nsUTF8Prober.h
index 3bc8874..21c91c4 100644
--- a/src/nsUTF8Prober.h
+++ b/src/nsUTF8Prober.h
@@ -38,7 +38,6 @@
 #ifndef nsUTF8Prober_h__
 #define nsUTF8Prober_h__
 
-#include "uchardetDefine.h"
 #include "nsCharSetProber.h"
 #include "nsCodingStateMachine.h"
 
@@ -49,7 +48,7 @@ public:
                 Reset(); }
   virtual ~nsUTF8Prober(){delete mCodingSM;}
   nsProbingState HandleData(const char* aBuf, PRUint32 aLen);
-  const char* GetCharSetName() {return CHARDET_ENCODING_UTF_8;}
+  const char* GetCharSetName() {return "UTF-8";}
   nsProbingState GetState(void) {return mState;}
   void      Reset(void);
   float     GetConfidence(void);
diff --git a/src/nsUniversalDetector.cpp b/src/nsUniversalDetector.cpp
index 1e16002..7af8f95 100644
--- a/src/nsUniversalDetector.cpp
+++ b/src/nsUniversalDetector.cpp
@@ -38,7 +38,6 @@
 
 #include "nscore.h"
 
-#include "uchardetDefine.h"
 #include "nsUniversalDetector.h"
 
 #include "nsMBCSGroupProber.h"
@@ -46,7 +45,7 @@
 #include "nsEscCharsetProber.h"
 #include "nsLatin1Prober.h"
 
-nsUniversalDetector::nsUniversalDetector()
+nsUniversalDetector::nsUniversalDetector(PRUint32 aLanguageFilter)
 {
   mDone = PR_FALSE;
   mBestGuess = -1;   //illegal value as signal
@@ -58,6 +57,7 @@ nsUniversalDetector::nsUniversalDetector()
   mGotData = PR_FALSE;
   mInputState = ePureAscii;
   mLastChar = '\0';
+  mLanguageFilter = aLanguageFilter;
 
   PRUint32 i;
   for (i = 0; i < NUM_OF_CHARSET_PROBERS; i++)
@@ -67,10 +67,9 @@ nsUniversalDetector::nsUniversalDetector()
 nsUniversalDetector::~nsUniversalDetector() 
 {
   for (PRInt32 i = 0; i < NUM_OF_CHARSET_PROBERS; i++)
-    if (mCharSetProbers[i])      
-      delete mCharSetProbers[i];
-  if (mEscCharSetProber)
-    delete mEscCharSetProber;
+    delete mCharSetProbers[i];
+
+  delete mEscCharSetProber;
 }
 
 void 
@@ -111,37 +110,23 @@ nsresult nsUniversalDetector::HandleData(const char* aBuf, PRUint32 aLen)
   if (mStart)
   {
     mStart = PR_FALSE;
-    if (aLen > 3)
+    if (aLen > 2)
       switch (aBuf[0])
         {
         case '\xEF':
           if (('\xBB' == aBuf[1]) && ('\xBF' == aBuf[2]))
             // EF BB BF  UTF-8 encoded BOM
-            mDetectedCharset = CHARDET_ENCODING_UTF_8;
+            mDetectedCharset = "UTF-8";
         break;
         case '\xFE':
-          if (('\xFF' == aBuf[1]) && ('\x00' == aBuf[2]) && ('\x00' == aBuf[3]))
-            // FE FF 00 00  UCS-4, unusual octet order BOM (3412)
-            mDetectedCharset = CHARDET_ENCODING_X_ISO_10646_UCS_4_3412;
-          else if ('\xFF' == aBuf[1])
+          if ('\xFF' == aBuf[1])
             // FE FF  UTF-16, big endian BOM
-            mDetectedCharset = CHARDET_ENCODING_UTF_16BE;
-        break;
-        case '\x00':
-          if (('\x00' == aBuf[1]) && ('\xFE' == aBuf[2]) && ('\xFF' == aBuf[3]))
-            // 00 00 FE FF  UTF-32, big-endian BOM
-            mDetectedCharset = CHARDET_ENCODING_UTF_32BE;
-          else if (('\x00' == aBuf[1]) && ('\xFF' == aBuf[2]) && ('\xFE' == aBuf[3]))
-            // 00 00 FF FE  UCS-4, unusual octet order BOM (2143)
-            mDetectedCharset = CHARDET_ENCODING_X_ISO_10646_UCS_4_2143;
+            mDetectedCharset = "UTF-16";
         break;
         case '\xFF':
-          if (('\xFE' == aBuf[1]) && ('\x00' == aBuf[2]) && ('\x00' == aBuf[3]))
-            // FF FE 00 00  UTF-32, little-endian BOM
-            mDetectedCharset = CHARDET_ENCODING_UTF_32LE;
-          else if ('\xFE' == aBuf[1])
+          if ('\xFE' == aBuf[1])
             // FF FE  UTF-16, little endian BOM
-            mDetectedCharset = CHARDET_ENCODING_UTF_16LE;
+            mDetectedCharset = "UTF-16";
         break;
       }  // switch
 
@@ -172,16 +157,24 @@ nsresult nsUniversalDetector::HandleData(const char* aBuf, PRUint32 aLen)
 
         //start multibyte and singlebyte charset prober
         if (nsnull == mCharSetProbers[0])
-          mCharSetProbers[0] = new nsMBCSGroupProber;
-        if (nsnull == mCharSetProbers[1])
-          mCharSetProbers[1] = new nsSBCSGroupProber;
-        if (nsnull == mCharSetProbers[2])
-          mCharSetProbers[2] = new nsLatin1Prober; 
-
-        if ((nsnull == mCharSetProbers[0]) ||
-            (nsnull == mCharSetProbers[1]) ||
-            (nsnull == mCharSetProbers[2]))
+        {
+          mCharSetProbers[0] = new nsMBCSGroupProber(mLanguageFilter);
+          if (nsnull == mCharSetProbers[0])
             return NS_ERROR_OUT_OF_MEMORY;
+        }
+        if (nsnull == mCharSetProbers[1] &&
+            (mLanguageFilter & NS_FILTER_NON_CJK))
+        {
+          mCharSetProbers[1] = new nsSBCSGroupProber;
+          if (nsnull == mCharSetProbers[1])
+            return NS_ERROR_OUT_OF_MEMORY;
+        }
+        if (nsnull == mCharSetProbers[2])
+        {
+          mCharSetProbers[2] = new nsLatin1Prober; 
+          if (nsnull == mCharSetProbers[2])
+            return NS_ERROR_OUT_OF_MEMORY;
+        }
       }
     }
     else
@@ -202,7 +195,7 @@ nsresult nsUniversalDetector::HandleData(const char* aBuf, PRUint32 aLen)
   {
   case eEscAscii:
     if (nsnull == mEscCharSetProber) {
-      mEscCharSetProber = new nsEscCharSetProber;
+      mEscCharSetProber = new nsEscCharSetProber(mLanguageFilter);
       if (nsnull == mEscCharSetProber)
         return NS_ERROR_OUT_OF_MEMORY;
     }
@@ -216,12 +209,15 @@ nsresult nsUniversalDetector::HandleData(const char* aBuf, PRUint32 aLen)
   case eHighbyte:
     for (i = 0; i < NUM_OF_CHARSET_PROBERS; i++)
     {
-      st = mCharSetProbers[i]->HandleData(aBuf, aLen);
-      if (st == eFoundIt) 
+      if (mCharSetProbers[i])
       {
-        mDone = PR_TRUE;
-        mDetectedCharset = mCharSetProbers[i]->GetCharSetName();
-        return NS_OK;
+        st = mCharSetProbers[i]->HandleData(aBuf, aLen);
+        if (st == eFoundIt) 
+        {
+          mDone = PR_TRUE;
+          mDetectedCharset = mCharSetProbers[i]->GetCharSetName();
+          return NS_OK;
+        }
       } 
     }
     break;
@@ -260,11 +256,14 @@ void nsUniversalDetector::DataEnd()
 
       for (PRInt32 i = 0; i < NUM_OF_CHARSET_PROBERS; i++)
       {
-        proberConfidence = mCharSetProbers[i]->GetConfidence();
-        if (proberConfidence > maxProberConfidence)
+        if (mCharSetProbers[i])
         {
-          maxProberConfidence = proberConfidence;
-          maxProber = i;
+          proberConfidence = mCharSetProbers[i]->GetConfidence();
+          if (proberConfidence > maxProberConfidence)
+          {
+            maxProberConfidence = proberConfidence;
+            maxProber = i;
+          }
         }
       }
       //do not report anything because we are not confident of it, that's in fact a negative answer
diff --git a/src/nsUniversalDetector.h b/src/nsUniversalDetector.h
index 36f3fa0..525f722 100644
--- a/src/nsUniversalDetector.h
+++ b/src/nsUniversalDetector.h
@@ -48,9 +48,22 @@ typedef enum {
   eHighbyte  = 2
 } nsInputState;
 
+#define NS_FILTER_CHINESE_SIMPLIFIED  0x01
+#define NS_FILTER_CHINESE_TRADITIONAL 0x02
+#define NS_FILTER_JAPANESE            0x04
+#define NS_FILTER_KOREAN              0x08
+#define NS_FILTER_NON_CJK             0x10
+#define NS_FILTER_ALL                 0x1F
+#define NS_FILTER_CHINESE (NS_FILTER_CHINESE_SIMPLIFIED | \
+                           NS_FILTER_CHINESE_TRADITIONAL)
+#define NS_FILTER_CJK (NS_FILTER_CHINESE_SIMPLIFIED | \
+                       NS_FILTER_CHINESE_TRADITIONAL | \
+                       NS_FILTER_JAPANESE | \
+                       NS_FILTER_KOREAN)
+
 class nsUniversalDetector {
 public:
-   nsUniversalDetector();
+   nsUniversalDetector(PRUint32 aLanguageFilter);
    virtual ~nsUniversalDetector();
    virtual nsresult HandleData(const char* aBuf, PRUint32 aLen);
    virtual void DataEnd(void);
@@ -66,6 +79,7 @@ protected:
    char    mLastChar;
    const char *  mDetectedCharset;
    PRInt32 mBestGuess;
+   PRUint32 mLanguageFilter;
 
    nsCharSetProber  *mCharSetProbers[NUM_OF_CHARSET_PROBERS];
    nsCharSetProber  *mEscCharSetProber;
diff --git a/src/nscore.h b/src/nscore.h
index 83d7485..e0b5a72 100644
--- a/src/nscore.h
+++ b/src/nscore.h
@@ -42,6 +42,8 @@ typedef int PRInt32;
 typedef unsigned int PRUint32;
 typedef short PRInt16;
 typedef unsigned short PRUint16;
+typedef signed char PRInt8;
+typedef unsigned char PRUint8;
 
 #define PR_FALSE false
 #define PR_TRUE true
diff --git a/src/tools/uchardet.cpp b/src/tools/uchardet.cpp
index 285248c..56765bc 100644
--- a/src/tools/uchardet.cpp
+++ b/src/tools/uchardet.cpp
@@ -44,38 +44,18 @@
 #ifndef VERSION
 #define VERSION "Unknown"
 #endif
-#define BUFFER_SIZE 32768
+#define BUFFER_SIZE 65536
+
+char buffer[BUFFER_SIZE];
 
 void detect(FILE * fp)
 {
     uchardet_t handle = uchardet_new();
 
-    size_t size = BUFFER_SIZE;
-    char * buffer_in = (char *) malloc(size * sizeof(char));
-
-    while (fgets(buffer_in, size, fp) != NULL)
+    while (!feof(fp))
     {
-        size_t freesize = size;
-
-        char * buffer_in_p = buffer_in;
-        size_t line_length = strlen(buffer_in_p);
-        while (line_length + 1 == freesize && buffer_in_p[line_length - 2] != '\n')
-        {
-            buffer_in_p += size - 1;
-            freesize = size + 1;
-            size += size;
-            size_t offset = buffer_in_p - buffer_in;
-            buffer_in = (char *) realloc(buffer_in, size * sizeof(char));
-            buffer_in_p = buffer_in + offset;
-
-            if (fgets(buffer_in_p, freesize, fp) == NULL)
-                break;
-
-            line_length = strlen(buffer_in_p);
-        }
-
-        int retval = uchardet_handle_data(handle, buffer_in, strlen(buffer_in));
-
+        size_t len = fread(buffer, 1, BUFFER_SIZE, fp);
+        int retval = uchardet_handle_data(handle, buffer, len);
         if (retval != 0)
         {
             fprintf(stderr, "Handle data error.\n");
@@ -84,10 +64,10 @@ void detect(FILE * fp)
     }
     uchardet_data_end(handle);
 
-    printf("%s\n", uchardet_get_charset(handle));
+    const char * charset = uchardet_get_charset(handle);
+    printf("%s\n", charset);
 
     uchardet_delete(handle);
-    free(buffer_in);
 }
 
 void show_version()
diff --git a/src/uchardet.cpp b/src/uchardet.cpp
index bc83e0f..74ab63c 100644
--- a/src/uchardet.cpp
+++ b/src/uchardet.cpp
@@ -37,67 +37,69 @@
 #include "uchardet.h"
 #include "nscore.h"
 #include "nsUniversalDetector.h"
-#include <string.h>
+#include <string>
 
-class DllDetector : public nsUniversalDetector
+using std::string;
+
+class HandleUniversalDetector : public nsUniversalDetector
 {
 protected:
-    char charset_[256];
+	string m_charset;
 
 public:
-    DllDetector()
-    : nsUniversalDetector()
+    HandleUniversalDetector()
+    : nsUniversalDetector(NS_FILTER_ALL)
     {
-        *charset_=0;
+        m_charset = "";
     }
 
-    virtual ~DllDetector()
+    virtual ~HandleUniversalDetector()
     {}
 
     virtual void Report(const char* charset)
     {
-        strncpy( charset_ , charset , sizeof(charset_) );
+        m_charset = charset;
     }
 
     virtual void Reset()
     {
         nsUniversalDetector::Reset();
-        *charset_=0;
+        m_charset = "";
     }
 
     const char* GetCharset() const
     {
-        return charset_;
+        return m_charset.c_str();
     }
 };
 
 uchardet_t uchardet_new()
 {
-    return reinterpret_cast<uchardet_t> (new DllDetector());
+    return reinterpret_cast<uchardet_t> (new HandleUniversalDetector());
 }
 
 void uchardet_delete(uchardet_t ud)
 {
-    delete reinterpret_cast<DllDetector*>(ud);
+    delete reinterpret_cast<HandleUniversalDetector*>(ud);
 }
 
 int uchardet_handle_data(uchardet_t ud, const char * data, size_t len)
 {
-    nsresult ret = reinterpret_cast<DllDetector*>(ud)->HandleData(data, (PRUint32)len);
+    nsresult ret = reinterpret_cast<HandleUniversalDetector*>(ud)->HandleData(data, (PRUint32)len);
     return (ret != NS_OK);
 }
 
 void uchardet_data_end(uchardet_t ud)
 {
-    reinterpret_cast<DllDetector*>(ud)->DataEnd();
+    reinterpret_cast<HandleUniversalDetector*>(ud)->DataEnd();
 }
 
 void uchardet_reset(uchardet_t ud)
 {
-    reinterpret_cast<DllDetector*>(ud)->Reset();
+    reinterpret_cast<HandleUniversalDetector*>(ud)->Reset();
 }
 
 const char* uchardet_get_charset(uchardet_t ud)
 {
-    return reinterpret_cast<DllDetector*>(ud)->GetCharset();
+    return reinterpret_cast<HandleUniversalDetector*>(ud)->GetCharset();
 }
diff --git a/win32.sh b/win32.sh
new file mode 100644
index 0000000..b757380
--- /dev/null
+++ b/win32.sh
@@ -0,0 +1,7 @@
+mkdir --parents win32 \
+&& cd win32 \
+&& cmake .. \
+	-G "MSYS Makefiles" \
+	-DCMAKE_BUILD_TYPE=Release \
+	-DCMAKE_INSTALL_PREFIX="" \
+&& make
\ No newline at end of file