mirror of
https://gitlab.freedesktop.org/uchardet/uchardet.git
synced 2025-12-06 16:56:40 +08:00
script: improve a bit the management of use_ascii option.
This commit is contained in:
parent
81b83fffa9
commit
6365cad4fd
@ -139,11 +139,14 @@ def local_lowercase(text, lang):
|
|||||||
lowercased += l
|
lowercased += l
|
||||||
return lowercased
|
return lowercased
|
||||||
|
|
||||||
|
if lang.use_ascii:
|
||||||
|
if lang.alphabet is None:
|
||||||
|
lang.alphabet = [chr(l) for l in range(65, 91)] + [chr(l) for l in range(97, 123)]
|
||||||
|
else:
|
||||||
|
lang.alphabet += [chr(l) for l in range(65, 91)] + [chr(l) for l in range(97, 123)]
|
||||||
if lang.alphabet is not None:
|
if lang.alphabet is not None:
|
||||||
# Allowing to provide an alphabet in string format rather than list.
|
# Allowing to provide an alphabet in string format rather than list.
|
||||||
lang.alphabet = list(lang.alphabet)
|
lang.alphabet = list(lang.alphabet)
|
||||||
if lang.use_ascii:
|
|
||||||
lang.alphabet += [chr(l) for l in range(65, 91)] + [chr(l) for l in range(97, 123)]
|
|
||||||
if lang.case_mapping or lang.custom_case_mapping is not None:
|
if lang.case_mapping or lang.custom_case_mapping is not None:
|
||||||
lang.alphabet = [local_lowercase(l, lang) for l in lang.alphabet]
|
lang.alphabet = [local_lowercase(l, lang) for l in lang.alphabet]
|
||||||
#alphabet = []
|
#alphabet = []
|
||||||
@ -242,11 +245,6 @@ def process_text(content, lang):
|
|||||||
if unicode_value in characters:
|
if unicode_value in characters:
|
||||||
characters[unicode_value] += 1
|
characters[unicode_value] += 1
|
||||||
is_letter = True
|
is_letter = True
|
||||||
elif lang.use_ascii and \
|
|
||||||
((unicode_value >= 65 and unicode_value <= 90) or \
|
|
||||||
(unicode_value >= 97 and unicode_value <= 122)):
|
|
||||||
characters[unicode_value] = 1
|
|
||||||
is_letter = True
|
|
||||||
elif lang.unicode_ranges is not None:
|
elif lang.unicode_ranges is not None:
|
||||||
for start, end in lang.unicode_ranges:
|
for start, end in lang.unicode_ranges:
|
||||||
if unicode_value >= start and unicode_value <= end:
|
if unicode_value >= start and unicode_value <= end:
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user