src, test: fix the new Johab prober and add a test.

This prober comes from MR !1 on the main branch though it was too
agressive then and could not get merged. On the improved API branch, it
doesn't detect other tests as Johab anymore.

Also fixing it to work with the new API.

Finally adding a Johab/ko unit test.
This commit is contained in:
Jehan 2021-03-18 00:23:13 +01:00
parent 3996b9d648
commit 0d152ff430
4 changed files with 15 additions and 8 deletions

View File

@ -45,7 +45,9 @@ void nsJohabProber::Reset(void)
//mContextAnalyser.Reset(); //mContextAnalyser.Reset();
} }
nsProbingState nsJohabProber::HandleData(const char* aBuf, PRUint32 aLen) nsProbingState nsJohabProber::HandleData(const char* aBuf, PRUint32 aLen,
int** cpBuffer,
int* cpBufferIdx)
{ {
PRUint32 codingState; PRUint32 codingState;
@ -74,7 +76,7 @@ nsProbingState nsJohabProber::HandleData(const char* aBuf, PRUint32 aLen)
mLastChar[0] = aBuf[aLen-1]; mLastChar[0] = aBuf[aLen-1];
if (mState == eDetecting) if (mState == eDetecting)
if (mDistributionAnalyser.GotEnoughData() && GetConfidence() > SHORTCUT_THRESHOLD) if (mDistributionAnalyser.GotEnoughData() && GetConfidence(0) > SHORTCUT_THRESHOLD)
mState = eFoundIt; mState = eFoundIt;
// else // else
// mDistributionAnalyser.HandleData(aBuf, aLen); // mDistributionAnalyser.HandleData(aBuf, aLen);
@ -82,7 +84,7 @@ nsProbingState nsJohabProber::HandleData(const char* aBuf, PRUint32 aLen)
return mState; return mState;
} }
float nsJohabProber::GetConfidence(void) float nsJohabProber::GetConfidence(int candidate)
{ {
float distribCf = mDistributionAnalyser.GetConfidence(); float distribCf = mDistributionAnalyser.GetConfidence();

View File

@ -50,11 +50,15 @@ public:
Reset(); Reset();
} }
virtual ~nsJohabProber(void){delete mCodingSM;} virtual ~nsJohabProber(void){delete mCodingSM;}
nsProbingState HandleData(const char* aBuf, PRUint32 aLen); nsProbingState HandleData(const char* aBuf, PRUint32 aLen,
const char* GetCharSetName() {return "Johab";} int** cpBuffer,
int* cpBufferIdx);
int GetCandidates() {return 1;}
const char* GetCharSetName(int) {return "Johab";}
const char* GetLanguage(int) {return "ko";}
nsProbingState GetState(void) {return mState;} nsProbingState GetState(void) {return mState;}
void Reset(void); void Reset(void);
float GetConfidence(void); float GetConfidence(int candidate);
void SetOpion() {} void SetOpion() {}
protected: protected:

View File

@ -304,7 +304,7 @@ nsProbingState nsMBCSGroupProber::HandleData(const char* aBuf, PRUint32 aLen,
for (PRUint32 j = 0; j < NUM_OF_LANGUAGES; j++) for (PRUint32 j = 0; j < NUM_OF_LANGUAGES; j++)
{ {
float langConf = langDetectors[i][j]->GetConfidence(); float langConf = langDetectors[i][j] ? langDetectors[i][j]->GetConfidence() : 1.0;
if (cf * langConf > CANDIDATE_THRESHOLD) if (cf * langConf > CANDIDATE_THRESHOLD)
{ {
@ -352,7 +352,7 @@ nsProbingState nsMBCSGroupProber::HandleData(const char* aBuf, PRUint32 aLen,
for (PRUint32 j = 0; j < NUM_OF_LANGUAGES; j++) for (PRUint32 j = 0; j < NUM_OF_LANGUAGES; j++)
{ {
float langConf = langDetectors[i][j]->GetConfidence(); float langConf = langDetectors[i][j] ? langDetectors[i][j]->GetConfidence() : 1.0;
if (cf * langConf > CANDIDATE_THRESHOLD) if (cf * langConf > CANDIDATE_THRESHOLD)
{ {

1
test/ko/johab.txt Normal file
View File

@ -0,0 +1 @@
Ðei·e Ñ©­¡Ÿ¡µÁ ”}­¡Ÿ¡ ¡¡<C2A1> ­¡Ÿ¡Ëi·i ¥¥˜á  e—e ·q­¡¢…¸a<C2B8>¡[1] Ðei  ¤óµA¬á“e ”}­¡Ÿ¡ 14‰Á Ñ©­¡Ÿ¡ 10 ¡¡<C2A1> 24·i Îaº…·a<C2B7>¡ ¬q“e”a. "<22>aœu i·¡ º—Š‚‰Á ”iœa" ¢…¹AŸi “a<E2809C>¥ ­A¹·”<C2B7>µ×·¡ ÐeŠ´á“e ¢‰<C2A2>¥ ·¡¶•<C2B6>aœa  iŒa»¡ <20>aÈa<C388><61>•¡<E280A2>¢ 1443e Àw¹AÐaµa 1446e ¤eÍ¡Ðaµv”a. <20>{i¸aˆa <20>{­¡Ÿ¡ e <20>aÈa<C388><61>£a<C2A3>¡ ·q­¡¢…¸a·¡¡a <20>{<7B><><EFBFBD>¡ Á¡¬÷, º—¬÷, ¹·¬÷·i ¢ƒ´á A¡¡ ÄeµA ¡¡´a³a¡ ˜<>¢…µA ·q¸é¢…¸<61> Ëb¬÷•¡ ˆx“e”a. Àá·qµA“e ”}­¡Ÿ¡ 17µA Ñ©­¡Ÿ¡ 11 ¡¡<C2A1> 28·¡´ö·a<C2B7>a áµA 4·i ·°‰¡ ·¡¹A“e 24 e ³e”a. ”<>Ðe£¥Š‰Á ¹¡¬å£¥º<C2A5>º<EFBFBD>·<EFBFBD>·¥£¥‰·ÑÁЉÁ µ…¥… ¹¡¬å¹¢ ¸aáº<C2A1>µA¬á“e ‰·¶w ¢…¸a<C2B8>¡, ·¥•¡A¯¡´a ¦<>É¥ ¬ñµA¬á“e ¿¡´a¿¡´a´á·<C3A1> ¥¡¹¡ ¢…¸a<C2B8>¡ À<>ÈÐaµv”a. ¹¡¬å£¥º<C2A5>º<EFBFBD>·<EFBFBD>·¥£¥‰·ÑÁеA¬á“e ¹¡¬åi(óXëhi)·¡œa ¦<>Ÿe”a.