mirror of
https://gitlab.freedesktop.org/uchardet/uchardet.git
synced 2025-12-06 08:46:40 +08:00
src: handle long sequences of characters.
Actually my previous commit was not handling all cases, though it was taking care of the buffer overflow triggered by the provided byte sequence. Yet I believe it was still possible to craft special input sequences too long for codePointBuffer. This additional commit would handle these other cases by processing the input in manageable sub-strings.
This commit is contained in:
parent
9910941387
commit
ab1d2f1120
@ -310,18 +310,29 @@ nsProbingState nsMBCSGroupProber::HandleData(const char* aBuf, PRUint32 aLen,
|
||||
}
|
||||
|
||||
if (codePointBuffer[i])
|
||||
st = mProbers[i]->HandleData(aBuf + start, sequenceLength,
|
||||
&(codePointBuffer[i]), &(codePointBufferIdx[i]));
|
||||
else
|
||||
st = mProbers[i]->HandleData(aBuf + start, sequenceLength, NULL, NULL);
|
||||
{
|
||||
while (sequenceLength > 0)
|
||||
{
|
||||
int subLength = (sequenceLength > codePointBufferSize[i]) ? codePointBufferSize[i] : sequenceLength;
|
||||
|
||||
if (codePointBufferIdx[i] > 0 && codePointBuffer[i])
|
||||
st = mProbers[i]->HandleData(aBuf + start, subLength,
|
||||
&(codePointBuffer[i]), &(codePointBufferIdx[i]));
|
||||
|
||||
if (codePointBufferIdx[i] > 0)
|
||||
{
|
||||
for (PRUint32 j = 0; j < NUM_OF_LANGUAGES; j++)
|
||||
langDetectors[i][j]->HandleData(codePointBuffer[i], codePointBufferIdx[i]);
|
||||
codePointBufferIdx[i] = 0;
|
||||
}
|
||||
|
||||
sequenceLength -= subLength;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
st = mProbers[i]->HandleData(aBuf + start, sequenceLength, NULL, NULL);
|
||||
}
|
||||
|
||||
if (st == eFoundIt)
|
||||
{
|
||||
float cf = mProbers[i]->GetConfidence(0);
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user