From 0289c2a23213aeceb7c37bbab3a4f5827e36334a Mon Sep 17 00:00:00 2001 From: Jehan Date: Sat, 28 Nov 2015 16:44:09 +0100 Subject: [PATCH] Differentiate ASCII and detection failure. The lib used to return "" for both properly detected ASCII and detection failure. And the tool would return "ascii/unknown". Make a proper distinction between the 2 cases. --- README.md | 2 ++ src/nsUniversalDetector.cpp | 12 ++++++++++-- src/tools/uchardet.cpp | 2 +- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 66606b9..ec7542e 100644 --- a/README.md +++ b/README.md @@ -45,6 +45,8 @@ Techniques used by universalchardet are described at http://www.mozilla.org/proj * French * ISO-8859-1 * ISO-8859-15 + * English + * ASCII * Others * WINDOWS-1252 diff --git a/src/nsUniversalDetector.cpp b/src/nsUniversalDetector.cpp index f95244e..5e13b81 100644 --- a/src/nsUniversalDetector.cpp +++ b/src/nsUniversalDetector.cpp @@ -229,6 +229,12 @@ nsresult nsUniversalDetector::HandleData(const char* aBuf, PRUint32 aLen) mDone = PR_TRUE; mDetectedCharset = mEscCharSetProber->GetCharSetName(); } + else + { + /* ASCII with the ESC character (or the sequence "~{") is still + * ASCII until proven otherwise. */ + mDetectedCharset = "ASCII"; + } break; case eHighbyte: for (i = 0; i < NUM_OF_CHARSET_PROBERS; i++) @@ -246,8 +252,10 @@ nsresult nsUniversalDetector::HandleData(const char* aBuf, PRUint32 aLen) } break; - default: //pure ascii - ;//do nothing here + default: + /* Pure ASCII */ + mDetectedCharset = "ASCII"; + break; } return NS_OK; } diff --git a/src/tools/uchardet.cpp b/src/tools/uchardet.cpp index 6ea5131..6f6468b 100644 --- a/src/tools/uchardet.cpp +++ b/src/tools/uchardet.cpp @@ -69,7 +69,7 @@ void detect(FILE * fp) if (*charset) printf("%s\n", charset); else - printf("ascii/unknown\n"); + printf("unknown\n"); uchardet_delete(handle); }