diff --git a/README.md b/README.md index 66606b9..ec7542e 100644 --- a/README.md +++ b/README.md @@ -45,6 +45,8 @@ Techniques used by universalchardet are described at http://www.mozilla.org/proj * French * ISO-8859-1 * ISO-8859-15 + * English + * ASCII * Others * WINDOWS-1252 diff --git a/src/nsUniversalDetector.cpp b/src/nsUniversalDetector.cpp index f95244e..5e13b81 100644 --- a/src/nsUniversalDetector.cpp +++ b/src/nsUniversalDetector.cpp @@ -229,6 +229,12 @@ nsresult nsUniversalDetector::HandleData(const char* aBuf, PRUint32 aLen) mDone = PR_TRUE; mDetectedCharset = mEscCharSetProber->GetCharSetName(); } + else + { + /* ASCII with the ESC character (or the sequence "~{") is still + * ASCII until proven otherwise. */ + mDetectedCharset = "ASCII"; + } break; case eHighbyte: for (i = 0; i < NUM_OF_CHARSET_PROBERS; i++) @@ -246,8 +252,10 @@ nsresult nsUniversalDetector::HandleData(const char* aBuf, PRUint32 aLen) } break; - default: //pure ascii - ;//do nothing here + default: + /* Pure ASCII */ + mDetectedCharset = "ASCII"; + break; } return NS_OK; } diff --git a/src/tools/uchardet.cpp b/src/tools/uchardet.cpp index 6ea5131..6f6468b 100644 --- a/src/tools/uchardet.cpp +++ b/src/tools/uchardet.cpp @@ -69,7 +69,7 @@ void detect(FILE * fp) if (*charset) printf("%s\n", charset); else - printf("ascii/unknown\n"); + printf("unknown\n"); uchardet_delete(handle); }