mirror of
https://gitlab.freedesktop.org/uchardet/uchardet.git
synced 2025-12-06 16:56:40 +08:00
Merge branch 'master' of https://github.com/lovasoa/uchardet into lovasoa-master
Let's shortcut Single Byte charset detection on invalid codepoints.
Merging and fixing the contributor's commit conflicts after code
redesign: in particular we added an illegal character concept (they were
mixed with control characters in current charmaps. Yet ctrl characters
are NOT to be considered invalid) and constants instead of hardcoded
numbers ('ILL' rather than 255).
This commit is contained in:
commit
c4fa728e7a
@ -46,6 +46,13 @@ nsProbingState nsSingleByteCharSetProber::HandleData(const char* aBuf, PRUint32
|
|||||||
{
|
{
|
||||||
order = mModel->charToOrderMap[(unsigned char)aBuf[i]];
|
order = mModel->charToOrderMap[(unsigned char)aBuf[i]];
|
||||||
|
|
||||||
|
if (order == ILL)
|
||||||
|
{
|
||||||
|
/* When encountering an illegal codepoint, no need
|
||||||
|
* to continue analyzing data. */
|
||||||
|
mState = eNotMe;
|
||||||
|
break;
|
||||||
|
}
|
||||||
if (order < SYMBOL_CAT_ORDER)
|
if (order < SYMBOL_CAT_ORDER)
|
||||||
mTotalChar++;
|
mTotalChar++;
|
||||||
if (order < mModel->freqCharCount)
|
if (order < mModel->freqCharCount)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user