diff --git a/src/nsUniversalDetector.cpp b/src/nsUniversalDetector.cpp index 9711618..75474e0 100644 --- a/src/nsUniversalDetector.cpp +++ b/src/nsUniversalDetector.cpp @@ -242,16 +242,6 @@ nsresult nsUniversalDetector::HandleData(const char* aBuf, PRUint32 aLen) mDone = PR_TRUE; mDetectedCharset = mEscCharSetProber->GetCharSetName(); } - else if (mNbspFound) - { - mDetectedCharset = "ISO-8859-1"; - } - else - { - /* ASCII with the ESC character (or the sequence "~{") is still - * ASCII until proven otherwise. */ - mDetectedCharset = "ASCII"; - } break; case eHighbyte: for (i = 0; i < NUM_OF_CHARSET_PROBERS; i++) @@ -270,17 +260,6 @@ nsresult nsUniversalDetector::HandleData(const char* aBuf, PRUint32 aLen) break; default: - if (mNbspFound) - { - /* ISO-8859-1 is a good result candidate for ASCII + NBSP. - * (though it could have been any ISO-8859 encoding). */ - mDetectedCharset = "ISO-8859-1"; - } - else - { - /* Pure ASCII */ - mDetectedCharset = "ASCII"; - } break; } return NS_OK; @@ -297,6 +276,29 @@ void nsUniversalDetector::DataEnd() return; } + if (! mDetectedCharset) + { + switch (mInputState) + { + case eEscAscii: + case ePureAscii: + if (mNbspFound) + { + /* ISO-8859-1 is a good result candidate for ASCII + NBSP. + * (though it could have been any ISO-8859 encoding). */ + mDetectedCharset = "ISO-8859-1"; + } + else + { + /* ASCII with the ESC character (or the sequence "~{") is still + * ASCII until proven otherwise. */ + mDetectedCharset = "ASCII"; + } + default: + break; + } + } + if (mDetectedCharset) { mDone = PR_TRUE;