script: stupid bug on BuildLangModel.py.

This commit is contained in:
Jehan 2016-05-25 15:23:36 +02:00
parent fb1d544007
commit 6cd8c322ad

View File

@ -164,14 +164,14 @@ characters = {}
sequences = {} sequences = {}
prev_char = None prev_char = None
def process_text(text, lang): def process_text(content, lang):
global charsets global charsets
global characters global characters
global sequences global sequences
global prev_char global prev_char
if lang.clean_wikipedia_content is not None: if lang.clean_wikipedia_content is not None:
content = lang.clean_wikipedia_content(text) content = lang.clean_wikipedia_content(content)
# Clean out the Wikipedia syntax for titles. # Clean out the Wikipedia syntax for titles.
content = re.sub(r'(=+) *([^=]+) *\1', content = re.sub(r'(=+) *([^=]+) *\1',
r'\2', content) r'\2', content)