uchardet/src/symbols.cmake
Jehan 7f99b91388 src: new weight concept in the C API.
Pretty basic, you can weight prefered language and this will impact the
result. Say the algorithm "hesitates" between encoding E1 in language L1
and encoding E2 in language L2. By setting L2 with a 1.1 weight, for
instance because this is the OS language, or usual prefered language,
you may help the algorithm to overcome very tight cases.

It can also be helpful when you already know for sure the language of a
document, you just don't know its encoding. Then you may set a very high
value for this language, or simply set a default value of 0, and set 1
for this language. Only relevant encoding will be taken into account.

This is still limited though as generic encoding are still implemented
language-agnostic. UTF-8 for instance would be disadvantaged by this
weight system until we make it language-aware.
2021-03-14 00:12:30 +01:00

42 lines
1.1 KiB
CMake

set(
UCHARDET_SYMBOLS
uchardet_new
uchardet_delete
uchardet_handle_data
uchardet_data_end
uchardet_reset
uchardet_get_charset
uchardet_get_candidates
uchardet_get_encoding
uchardet_get_confidence
uchardet_get_language
uchardet_set_default_weight
uchardet_weigh_language
)
set (LINK_FLAGS "")
if (APPLE)
# Create a symbols_list file for the Darwin linker.
string(REPLACE ";" "\n_" _symbols "${UCHARDET_SYMBOLS}")
set(_symbols_list "${CMAKE_CURRENT_BINARY_DIR}/symbols.list")
file(WRITE ${_symbols_list} "_${_symbols}\n")
set(LINK_FLAGS
"${LINK_FLAGS} -Wl,-exported_symbols_list,'${_symbols_list}'")
elseif (CMAKE_CXX_COMPILER_ID STREQUAL GNU)
# Create a version script for the GNU ld.
set(_symbols "{ global: ${UCHARDET_SYMBOLS}; local: *; };")
set(_version_script "${CMAKE_CURRENT_BINARY_DIR}/version.script")
file(WRITE ${_version_script} "${_symbols}\n")
set(LINK_FLAGS "${LINK_FLAGS} -Wl,--version-script,\"${_version_script}\"")
endif (APPLE)
set_target_properties(
${UCHARDET_LIBRARY}
PROPERTIES
LINK_FLAGS
"${LINK_FLAGS}"
)