From ccb5d40a6fa72f9f84721abb040d3237abf9de0a Mon Sep 17 00:00:00 2001 From: Jehan Date: Thu, 18 Mar 2021 00:23:13 +0100 Subject: [PATCH] src, test: fix the new Johab prober and add a test. This prober comes from MR !1 on the main branch though it was too agressive then and could not get merged. On the improved API branch, it doesn't detect other tests as Johab anymore. Also fixing it to work with the new API. Finally adding a Johab/ko unit test. --- src/nsJohabProber.cpp | 8 +++++--- src/nsJohabProber.h | 10 +++++++--- src/nsMBCSGroupProber.cpp | 4 ++-- test/ko/johab.txt | 1 + 4 files changed, 15 insertions(+), 8 deletions(-) create mode 100644 test/ko/johab.txt diff --git a/src/nsJohabProber.cpp b/src/nsJohabProber.cpp index 379a9c3..b5d2321 100644 --- a/src/nsJohabProber.cpp +++ b/src/nsJohabProber.cpp @@ -45,7 +45,9 @@ void nsJohabProber::Reset(void) //mContextAnalyser.Reset(); } -nsProbingState nsJohabProber::HandleData(const char* aBuf, PRUint32 aLen) +nsProbingState nsJohabProber::HandleData(const char* aBuf, PRUint32 aLen, + int** cpBuffer, + int* cpBufferIdx) { PRUint32 codingState; @@ -74,7 +76,7 @@ nsProbingState nsJohabProber::HandleData(const char* aBuf, PRUint32 aLen) mLastChar[0] = aBuf[aLen-1]; if (mState == eDetecting) - if (mDistributionAnalyser.GotEnoughData() && GetConfidence() > SHORTCUT_THRESHOLD) + if (mDistributionAnalyser.GotEnoughData() && GetConfidence(0) > SHORTCUT_THRESHOLD) mState = eFoundIt; // else // mDistributionAnalyser.HandleData(aBuf, aLen); @@ -82,7 +84,7 @@ nsProbingState nsJohabProber::HandleData(const char* aBuf, PRUint32 aLen) return mState; } -float nsJohabProber::GetConfidence(void) +float nsJohabProber::GetConfidence(int candidate) { float distribCf = mDistributionAnalyser.GetConfidence(); diff --git a/src/nsJohabProber.h b/src/nsJohabProber.h index e7bc520..02532b3 100644 --- a/src/nsJohabProber.h +++ b/src/nsJohabProber.h @@ -50,11 +50,15 @@ public: Reset(); } virtual ~nsJohabProber(void){delete mCodingSM;} - nsProbingState HandleData(const char* aBuf, PRUint32 aLen); - const char* GetCharSetName() {return "Johab";} + nsProbingState HandleData(const char* aBuf, PRUint32 aLen, + int** cpBuffer, + int* cpBufferIdx); + int GetCandidates() {return 1;} + const char* GetCharSetName(int) {return "Johab";} + const char* GetLanguage(int) {return "ko";} nsProbingState GetState(void) {return mState;} void Reset(void); - float GetConfidence(void); + float GetConfidence(int candidate); void SetOpion() {} protected: diff --git a/src/nsMBCSGroupProber.cpp b/src/nsMBCSGroupProber.cpp index dfeb0a8..60db2ce 100644 --- a/src/nsMBCSGroupProber.cpp +++ b/src/nsMBCSGroupProber.cpp @@ -304,7 +304,7 @@ nsProbingState nsMBCSGroupProber::HandleData(const char* aBuf, PRUint32 aLen, for (PRUint32 j = 0; j < NUM_OF_LANGUAGES; j++) { - float langConf = langDetectors[i][j]->GetConfidence(); + float langConf = langDetectors[i][j] ? langDetectors[i][j]->GetConfidence() : 1.0; if (cf * langConf > CANDIDATE_THRESHOLD) { @@ -352,7 +352,7 @@ nsProbingState nsMBCSGroupProber::HandleData(const char* aBuf, PRUint32 aLen, for (PRUint32 j = 0; j < NUM_OF_LANGUAGES; j++) { - float langConf = langDetectors[i][j]->GetConfidence(); + float langConf = langDetectors[i][j] ? langDetectors[i][j]->GetConfidence() : 1.0; if (cf * langConf > CANDIDATE_THRESHOLD) { diff --git a/test/ko/johab.txt b/test/ko/johab.txt new file mode 100644 index 0000000..7cf0802 --- /dev/null +++ b/test/ko/johab.txt @@ -0,0 +1 @@ +eie ѩ } ii ee qa[1] ei x‘Ae } 14 ѩ 10 24i aa qea. "aui ia" Ai a A׷ ee aa ia aa 1443e wAaa 1446e e͡ava. {iaa {e aaa qaa { , , i A eA aa A q颅a b xea. qAe } 17A ѩ 11 28aa A 4i Ae 24e ea. e 壥 幢 aáAe w a, Aa ɥ Ae aa᷁ a Ȃava. 壥Ae i(Xhi)a ea.