mirror of
https://gitlab.freedesktop.org/uchardet/uchardet.git
synced 2025-12-08 01:36:41 +08:00
Differentiate ASCII and detection failure.
The lib used to return "" for both properly detected ASCII and detection failure. And the tool would return "ascii/unknown". Make a proper distinction between the 2 cases.
This commit is contained in:
parent
4dbc6e7ab3
commit
0289c2a232
@ -45,6 +45,8 @@ Techniques used by universalchardet are described at http://www.mozilla.org/proj
|
||||
* French
|
||||
* ISO-8859-1
|
||||
* ISO-8859-15
|
||||
* English
|
||||
* ASCII
|
||||
* Others
|
||||
* WINDOWS-1252
|
||||
|
||||
|
||||
@ -229,6 +229,12 @@ nsresult nsUniversalDetector::HandleData(const char* aBuf, PRUint32 aLen)
|
||||
mDone = PR_TRUE;
|
||||
mDetectedCharset = mEscCharSetProber->GetCharSetName();
|
||||
}
|
||||
else
|
||||
{
|
||||
/* ASCII with the ESC character (or the sequence "~{") is still
|
||||
* ASCII until proven otherwise. */
|
||||
mDetectedCharset = "ASCII";
|
||||
}
|
||||
break;
|
||||
case eHighbyte:
|
||||
for (i = 0; i < NUM_OF_CHARSET_PROBERS; i++)
|
||||
@ -246,8 +252,10 @@ nsresult nsUniversalDetector::HandleData(const char* aBuf, PRUint32 aLen)
|
||||
}
|
||||
break;
|
||||
|
||||
default: //pure ascii
|
||||
;//do nothing here
|
||||
default:
|
||||
/* Pure ASCII */
|
||||
mDetectedCharset = "ASCII";
|
||||
break;
|
||||
}
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
@ -69,7 +69,7 @@ void detect(FILE * fp)
|
||||
if (*charset)
|
||||
printf("%s\n", charset);
|
||||
else
|
||||
printf("ascii/unknown\n");
|
||||
printf("unknown\n");
|
||||
|
||||
uchardet_delete(handle);
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user