mirror of
https://gitlab.freedesktop.org/uchardet/uchardet.git
synced 2025-12-07 17:26:41 +08:00
script: stupid bug on BuildLangModel.py.
This commit is contained in:
parent
fb1d544007
commit
6cd8c322ad
@ -164,14 +164,14 @@ characters = {}
|
|||||||
sequences = {}
|
sequences = {}
|
||||||
prev_char = None
|
prev_char = None
|
||||||
|
|
||||||
def process_text(text, lang):
|
def process_text(content, lang):
|
||||||
global charsets
|
global charsets
|
||||||
global characters
|
global characters
|
||||||
global sequences
|
global sequences
|
||||||
global prev_char
|
global prev_char
|
||||||
|
|
||||||
if lang.clean_wikipedia_content is not None:
|
if lang.clean_wikipedia_content is not None:
|
||||||
content = lang.clean_wikipedia_content(text)
|
content = lang.clean_wikipedia_content(content)
|
||||||
# Clean out the Wikipedia syntax for titles.
|
# Clean out the Wikipedia syntax for titles.
|
||||||
content = re.sub(r'(=+) *([^=]+) *\1',
|
content = re.sub(r'(=+) *([^=]+) *\1',
|
||||||
r'\2', content)
|
r'\2', content)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user