Differentiate ASCII and detection failure.

The lib used to return "" for both properly detected ASCII and
detection failure. And the tool would return "ascii/unknown".
Make a proper distinction between the 2 cases.
This commit is contained in:
Jehan 2015-11-28 16:44:09 +01:00
parent 4dbc6e7ab3
commit 0289c2a232
3 changed files with 13 additions and 3 deletions

View File

@ -45,6 +45,8 @@ Techniques used by universalchardet are described at http://www.mozilla.org/proj
* French
* ISO-8859-1
* ISO-8859-15
* English
* ASCII
* Others
* WINDOWS-1252

View File

@ -229,6 +229,12 @@ nsresult nsUniversalDetector::HandleData(const char* aBuf, PRUint32 aLen)
mDone = PR_TRUE;
mDetectedCharset = mEscCharSetProber->GetCharSetName();
}
else
{
/* ASCII with the ESC character (or the sequence "~{") is still
* ASCII until proven otherwise. */
mDetectedCharset = "ASCII";
}
break;
case eHighbyte:
for (i = 0; i < NUM_OF_CHARSET_PROBERS; i++)
@ -246,8 +252,10 @@ nsresult nsUniversalDetector::HandleData(const char* aBuf, PRUint32 aLen)
}
break;
default: //pure ascii
;//do nothing here
default:
/* Pure ASCII */
mDetectedCharset = "ASCII";
break;
}
return NS_OK;
}

View File

@ -69,7 +69,7 @@ void detect(FILE * fp)
if (*charset)
printf("%s\n", charset);
else
printf("ascii/unknown\n");
printf("unknown\n");
uchardet_delete(handle);
}