mirror of
https://gitlab.freedesktop.org/uchardet/uchardet.git
synced 2025-12-06 16:56:40 +08:00
script: Wikipedia API's python wrapper does not return garbage text anymore.
I can't see new commits since 2014. So I am assuming the issue was on Wikipedia side and that it has been fixed.
This commit is contained in:
parent
37024460fe
commit
d24bd7d578
@ -57,7 +57,7 @@ case_mapping = True
|
||||
# A function to clean content returned by the `wikipedia` python lib,
|
||||
# in case some unwanted data has been overlooked.
|
||||
def clean_wikipedia_content(content):
|
||||
cleaned = re.sub(r'(=+) *([^=]+) *Επεξεργασία \1',
|
||||
cleaned = re.sub(r'(=+) *([^=]+) *\1',
|
||||
r'\2',
|
||||
content)
|
||||
return cleaned
|
||||
|
||||
@ -71,7 +71,7 @@ case_mapping = True
|
||||
# A function to clean content returned by the `wikipedia` python lib,
|
||||
# in case some unwanted data has been overlooked.
|
||||
def clean_wikipedia_content(content):
|
||||
cleaned = re.sub(r'(=+) *([^=]+) *Editar \1',
|
||||
cleaned = re.sub(r'(=+) *([^=]+) *\1',
|
||||
r'\2',
|
||||
content)
|
||||
return cleaned
|
||||
|
||||
@ -72,7 +72,7 @@ case_mapping = True
|
||||
# in case some unwanted data has been overlooked.
|
||||
def clean_wikipedia_content(content):
|
||||
# We get modify link in the text: "=== Articles connexesModifier ==="
|
||||
cleaned = re.sub(r'(=+) *([^=]+) *Modifier \1',
|
||||
cleaned = re.sub(r'(=+) *([^=]+) *\1',
|
||||
r'\2',
|
||||
content)
|
||||
return cleaned
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user