mirror of
https://gitlab.freedesktop.org/uchardet/uchardet.git
synced 2025-12-08 01:36:41 +08:00
BuildLangModel: add an exception handler on charset spec errors.
This commit is contained in:
parent
4c8316f9cf
commit
7b4eb9827e
@ -359,7 +359,12 @@ for charset in charsets:
|
||||
elif cp_type == NUM:
|
||||
CTOM_str += 'NUM,'
|
||||
else: # LET
|
||||
uchar = bytes([cp]).decode(charset)
|
||||
try:
|
||||
uchar = bytes([cp]).decode(charset)
|
||||
except UnicodeDecodeError:
|
||||
print('Unknown character 0X{:X} in {}.'.format(cp, charset))
|
||||
print('Please verify your charset specification.\n')
|
||||
exit(1)
|
||||
#if lang.case_mapping and uchar.isupper() and \
|
||||
#len(unicodedata.normalize('NFC', uchar.lower())) == 1:
|
||||
# Unless we encounter special cases of characters with no
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user