script: improve a bit the management of use_ascii option.

This commit is contained in:
Jehan 2021-11-09 22:18:11 +01:00
parent 81b83fffa9
commit 6365cad4fd

View File

@ -139,11 +139,14 @@ def local_lowercase(text, lang):
lowercased += l lowercased += l
return lowercased return lowercased
if lang.use_ascii:
if lang.alphabet is None:
lang.alphabet = [chr(l) for l in range(65, 91)] + [chr(l) for l in range(97, 123)]
else:
lang.alphabet += [chr(l) for l in range(65, 91)] + [chr(l) for l in range(97, 123)]
if lang.alphabet is not None: if lang.alphabet is not None:
# Allowing to provide an alphabet in string format rather than list. # Allowing to provide an alphabet in string format rather than list.
lang.alphabet = list(lang.alphabet) lang.alphabet = list(lang.alphabet)
if lang.use_ascii:
lang.alphabet += [chr(l) for l in range(65, 91)] + [chr(l) for l in range(97, 123)]
if lang.case_mapping or lang.custom_case_mapping is not None: if lang.case_mapping or lang.custom_case_mapping is not None:
lang.alphabet = [local_lowercase(l, lang) for l in lang.alphabet] lang.alphabet = [local_lowercase(l, lang) for l in lang.alphabet]
#alphabet = [] #alphabet = []
@ -242,11 +245,6 @@ def process_text(content, lang):
if unicode_value in characters: if unicode_value in characters:
characters[unicode_value] += 1 characters[unicode_value] += 1
is_letter = True is_letter = True
elif lang.use_ascii and \
((unicode_value >= 65 and unicode_value <= 90) or \
(unicode_value >= 97 and unicode_value <= 122)):
characters[unicode_value] = 1
is_letter = True
elif lang.unicode_ranges is not None: elif lang.unicode_ranges is not None:
for start, end in lang.unicode_ranges: for start, end in lang.unicode_ranges:
if unicode_value >= start and unicode_value <= end: if unicode_value >= start and unicode_value <= end: