mirror of
https://gitlab.freedesktop.org/uchardet/uchardet.git
synced 2025-12-06 08:46:40 +08:00
src, test: fix the new Johab prober and add a test.
This prober comes from MR !1 on the main branch though it was too agressive then and could not get merged. On the improved API branch, it doesn't detect other tests as Johab anymore. Also fixing it to work with the new API. Finally adding a Johab/ko unit test.
This commit is contained in:
parent
3996b9d648
commit
0d152ff430
@ -45,7 +45,9 @@ void nsJohabProber::Reset(void)
|
|||||||
//mContextAnalyser.Reset();
|
//mContextAnalyser.Reset();
|
||||||
}
|
}
|
||||||
|
|
||||||
nsProbingState nsJohabProber::HandleData(const char* aBuf, PRUint32 aLen)
|
nsProbingState nsJohabProber::HandleData(const char* aBuf, PRUint32 aLen,
|
||||||
|
int** cpBuffer,
|
||||||
|
int* cpBufferIdx)
|
||||||
{
|
{
|
||||||
PRUint32 codingState;
|
PRUint32 codingState;
|
||||||
|
|
||||||
@ -74,7 +76,7 @@ nsProbingState nsJohabProber::HandleData(const char* aBuf, PRUint32 aLen)
|
|||||||
mLastChar[0] = aBuf[aLen-1];
|
mLastChar[0] = aBuf[aLen-1];
|
||||||
|
|
||||||
if (mState == eDetecting)
|
if (mState == eDetecting)
|
||||||
if (mDistributionAnalyser.GotEnoughData() && GetConfidence() > SHORTCUT_THRESHOLD)
|
if (mDistributionAnalyser.GotEnoughData() && GetConfidence(0) > SHORTCUT_THRESHOLD)
|
||||||
mState = eFoundIt;
|
mState = eFoundIt;
|
||||||
// else
|
// else
|
||||||
// mDistributionAnalyser.HandleData(aBuf, aLen);
|
// mDistributionAnalyser.HandleData(aBuf, aLen);
|
||||||
@ -82,7 +84,7 @@ nsProbingState nsJohabProber::HandleData(const char* aBuf, PRUint32 aLen)
|
|||||||
return mState;
|
return mState;
|
||||||
}
|
}
|
||||||
|
|
||||||
float nsJohabProber::GetConfidence(void)
|
float nsJohabProber::GetConfidence(int candidate)
|
||||||
{
|
{
|
||||||
float distribCf = mDistributionAnalyser.GetConfidence();
|
float distribCf = mDistributionAnalyser.GetConfidence();
|
||||||
|
|
||||||
|
|||||||
@ -50,11 +50,15 @@ public:
|
|||||||
Reset();
|
Reset();
|
||||||
}
|
}
|
||||||
virtual ~nsJohabProber(void){delete mCodingSM;}
|
virtual ~nsJohabProber(void){delete mCodingSM;}
|
||||||
nsProbingState HandleData(const char* aBuf, PRUint32 aLen);
|
nsProbingState HandleData(const char* aBuf, PRUint32 aLen,
|
||||||
const char* GetCharSetName() {return "Johab";}
|
int** cpBuffer,
|
||||||
|
int* cpBufferIdx);
|
||||||
|
int GetCandidates() {return 1;}
|
||||||
|
const char* GetCharSetName(int) {return "Johab";}
|
||||||
|
const char* GetLanguage(int) {return "ko";}
|
||||||
nsProbingState GetState(void) {return mState;}
|
nsProbingState GetState(void) {return mState;}
|
||||||
void Reset(void);
|
void Reset(void);
|
||||||
float GetConfidence(void);
|
float GetConfidence(int candidate);
|
||||||
void SetOpion() {}
|
void SetOpion() {}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
|||||||
@ -304,7 +304,7 @@ nsProbingState nsMBCSGroupProber::HandleData(const char* aBuf, PRUint32 aLen,
|
|||||||
|
|
||||||
for (PRUint32 j = 0; j < NUM_OF_LANGUAGES; j++)
|
for (PRUint32 j = 0; j < NUM_OF_LANGUAGES; j++)
|
||||||
{
|
{
|
||||||
float langConf = langDetectors[i][j]->GetConfidence();
|
float langConf = langDetectors[i][j] ? langDetectors[i][j]->GetConfidence() : 1.0;
|
||||||
|
|
||||||
if (cf * langConf > CANDIDATE_THRESHOLD)
|
if (cf * langConf > CANDIDATE_THRESHOLD)
|
||||||
{
|
{
|
||||||
@ -352,7 +352,7 @@ nsProbingState nsMBCSGroupProber::HandleData(const char* aBuf, PRUint32 aLen,
|
|||||||
|
|
||||||
for (PRUint32 j = 0; j < NUM_OF_LANGUAGES; j++)
|
for (PRUint32 j = 0; j < NUM_OF_LANGUAGES; j++)
|
||||||
{
|
{
|
||||||
float langConf = langDetectors[i][j]->GetConfidence();
|
float langConf = langDetectors[i][j] ? langDetectors[i][j]->GetConfidence() : 1.0;
|
||||||
|
|
||||||
if (cf * langConf > CANDIDATE_THRESHOLD)
|
if (cf * langConf > CANDIDATE_THRESHOLD)
|
||||||
{
|
{
|
||||||
|
|||||||
1
test/ko/johab.txt
Normal file
1
test/ko/johab.txt
Normal file
@ -0,0 +1 @@
|
|||||||
|
Ðe‹i·e Ñ©¡Ÿ¡µÁ ”}¡Ÿ¡ ¡¡–<C2A1> ¡Ÿ¡Ëi·i ¥¥˜á e—e ·q¡¢…¸a<C2B8>¡[1] Ðe‹i x‘¤óµA¬á“e ”}¡Ÿ¡ 14‰Á Ñ©¡Ÿ¡ 10 ¡¡–<C2A1> 24·i Îaº…·a<C2B7>¡ ¬q“e”a. "<22>aœu i·¡ º—Š‚‰Á ”iœa" ¢…¹AŸi “a<E2809C>¥ A¹·”<C2B7>µ×·¡ ÐeŠ‚´á“e ¢‰<C2A2>¥ ·¡¶•<C2B6>aœa iŒa»¡ <20>aÈa<C388><61>•¡<E280A2>¢ 1443‘e Àw¹AÐaµa 1446‘e ¤eÍ¡Ðaµv”a. <20>{‹i¸aˆa <20>{¡Ÿ¡ e <20>aÈa<C388><61>£a<C2A3>¡ ·q¡¢…¸a·¡¡a <20>{<7B><><EFBFBD>¡ Á¡¬÷, º—¬÷, ¹·¬÷·i ¢ƒ´á ‘A¡¡ ÄeµA ¡¡´a³a‹¡ ˜<>¢…µA ·q¸é¢…¸a·<61> Ëb¬÷•¡ ˆx“e”a. Àá·qµA“e ”}¡Ÿ¡ 17µA Ñ©¡Ÿ¡ 11 ¡¡–<C2A1> 28·¡´ö·a<C2B7>a –áµA 4·i ·°‰¡ ·¡¹A“e 24 e ³e”a. ”<>Ðe£¥Š‚‰Á ¹¡¬å£¥º<C2A5>º<EFBFBD>·<EFBFBD>·¥£¥‰·ÑÁŠ‚‰Á µ…¥… ¹¡¬å¹¢ ¸aáº<C2A1>µA¬á“e ‰·¶w ¢…¸a<C2B8>¡, ·¥•¡‘A¯¡´a ¦<>É¥ ¬ñµA¬á“e ¿¡´a¿¡´a´á·<C3A1> ¥¡¹¡ ¢…¸a<C2B8>¡ À<>È‚Ðaµv”a. ¹¡¬å£¥º<C2A5>º<EFBFBD>·<EFBFBD>·¥£¥‰·ÑÁŠ‚µA¬á“e ¹¡¬å‹i(óXëh‹i)·¡œa ¦<>Ÿe”a.
|
||||||
Loading…
x
Reference in New Issue
Block a user