src: new API to get all candidates and their confidence.

Adding:
- uchardet_get_candidates()
- uchardet_get_encoding()
- uchardet_get_confidence()

Also deprecating uchardet_get_charset() to have developers look at the
new API instead. I was unsure if this should really get deprecated as it
makes the basic case simple, but the new API is just as easy anyway. You
can also directly call uchardet_get_encoding() with candidate 0 (same as
uchardet_get_charset(), it would then return "" when no candidate was
found).
This commit is contained in:
Jehan 2020-04-23 16:40:02 +02:00
parent 15fc8f0a0f
commit 8118133e00
3 changed files with 51 additions and 3 deletions

View File

@ -6,6 +6,9 @@ set(
uchardet_data_end
uchardet_reset
uchardet_get_charset
uchardet_get_candidates
uchardet_get_encoding
uchardet_get_confidence
)
set (LINK_FLAGS "")

View File

@ -111,9 +111,19 @@ public:
candidates.clear();
}
const char* GetCharset() const
size_t GetCandidates() const
{
return (candidates.size() > 0) ? candidates[0].encoding : "";
return candidates.size();
}
const char* GetCharset(size_t i) const
{
return (candidates.size() > i) ? candidates[i].encoding : "";
}
float GetConfidence(size_t i) const
{
return (candidates.size() > i) ? candidates[i].confidence : 0.0;
}
};
@ -149,5 +159,22 @@ void uchardet_reset(uchardet_t ud)
const char* uchardet_get_charset(uchardet_t ud)
{
return reinterpret_cast<HandleUniversalDetector*>(ud)->GetCharset();
return reinterpret_cast<HandleUniversalDetector*>(ud)->GetCharset(0);
}
size_t uchardet_get_candidates (uchardet_t ud)
{
return reinterpret_cast<HandleUniversalDetector*>(ud)->GetCandidates();
}
float uchardet_get_confidence (uchardet_t ud,
size_t candidate)
{
return reinterpret_cast<HandleUniversalDetector*>(ud)->GetConfidence(candidate);
}
const char * uchardet_get_encoding (uchardet_t ud,
size_t candidate)
{
return reinterpret_cast<HandleUniversalDetector*>(ud)->GetCharset(candidate);
}

View File

@ -54,6 +54,16 @@ extern "C" {
#define UCHARDET_INTERFACE
#endif
#if defined(__cplusplus) && (__cplusplus >= 201402L)
#define DEPRECATED(message) [[deprecated(message)]]
#elif defined(__GNUC__) || defined(__clang__)
#define DEPRECATED(message) __attribute__ ((deprecated))
#elif defined(_MSC_VER)
#define DEPRECATED(message) __declspec(deprecated) func
#else
#warning("DEPRECATED macro not available")
#define DEPRECATED(message)
#endif
/**
* A handle for a uchardet encoding detector.
@ -102,8 +112,16 @@ UCHARDET_INTERFACE void uchardet_reset(uchardet_t ud);
* @param ud [in] handle of an instance of uchardet
* @return name of charset on success and "" on failure.
*/
DEPRECATED("use uchardet_get_candidates() and uchardet_get_encoding() instead (since 0.1.0)")
UCHARDET_INTERFACE const char * uchardet_get_charset(uchardet_t ud);
UCHARDET_INTERFACE size_t uchardet_get_candidates (uchardet_t ud);
UCHARDET_INTERFACE float uchardet_get_confidence (uchardet_t ud,
size_t candidate);
UCHARDET_INTERFACE const char * uchardet_get_encoding (uchardet_t ud,
size_t candidate);
#ifdef __cplusplus
}
#endif