Differentiate ASCII and detection failure.

The lib used to return "" for both properly detected ASCII and detection failure. And the tool would return "ascii/unknown". Make a proper distinction between the 2 cases.
2026-04-30 19:09:25 +08:00 · 2015-11-28 16:44:09 +01:00 · 2015-11-28 16:44:09 +01:00 · 0289c2a232
commit 0289c2a232
parent 4dbc6e7ab3
3 changed files with 13 additions and 3 deletions
--- a/README.md
+++ b/README.md
@ -45,6 +45,8 @@ Techniques used by universalchardet are described at http://www.mozilla.org/proj
  * French
    * ISO-8859-1
    * ISO-8859-15
+  * English
+    * ASCII
  * Others
    * WINDOWS-1252

--- a/src/nsUniversalDetector.cpp
+++ b/src/nsUniversalDetector.cpp
@ -229,6 +229,12 @@ nsresult nsUniversalDetector::HandleData(const char* aBuf, PRUint32 aLen)
      mDone = PR_TRUE;
      mDetectedCharset = mEscCharSetProber->GetCharSetName();
    }
+    else
+    {
+      /* ASCII with the ESC character (or the sequence "~{") is still
+       * ASCII until proven otherwise. */
+      mDetectedCharset = "ASCII";
+    }
    break;
  case eHighbyte:
    for (i = 0; i < NUM_OF_CHARSET_PROBERS; i++)
@ -246,8 +252,10 @@ nsresult nsUniversalDetector::HandleData(const char* aBuf, PRUint32 aLen)
    }
    break;

-  default:  //pure ascii
-    ;//do nothing here
+  default:
+    /* Pure ASCII */
+    mDetectedCharset = "ASCII";
+    break;
  }
  return NS_OK;
 }
--- a/src/tools/uchardet.cpp
+++ b/src/tools/uchardet.cpp
@ -69,7 +69,7 @@ void detect(FILE * fp)
    if (*charset)
    	printf("%s\n", charset);
 	else
-		printf("ascii/unknown\n");
+		printf("unknown\n");
 	
    uchardet_delete(handle);
 }