mirror of
https://gitlab.freedesktop.org/uchardet/uchardet.git
synced 2025-12-07 09:16:42 +08:00
script: Wikipedia API's python wrapper does not return garbage text anymore.
I can't see new commits since 2014. So I am assuming the issue was on Wikipedia side and that it has been fixed.
This commit is contained in:
parent
37024460fe
commit
d24bd7d578
@ -57,7 +57,7 @@ case_mapping = True
|
|||||||
# A function to clean content returned by the `wikipedia` python lib,
|
# A function to clean content returned by the `wikipedia` python lib,
|
||||||
# in case some unwanted data has been overlooked.
|
# in case some unwanted data has been overlooked.
|
||||||
def clean_wikipedia_content(content):
|
def clean_wikipedia_content(content):
|
||||||
cleaned = re.sub(r'(=+) *([^=]+) *Επεξεργασία \1',
|
cleaned = re.sub(r'(=+) *([^=]+) *\1',
|
||||||
r'\2',
|
r'\2',
|
||||||
content)
|
content)
|
||||||
return cleaned
|
return cleaned
|
||||||
|
|||||||
@ -71,7 +71,7 @@ case_mapping = True
|
|||||||
# A function to clean content returned by the `wikipedia` python lib,
|
# A function to clean content returned by the `wikipedia` python lib,
|
||||||
# in case some unwanted data has been overlooked.
|
# in case some unwanted data has been overlooked.
|
||||||
def clean_wikipedia_content(content):
|
def clean_wikipedia_content(content):
|
||||||
cleaned = re.sub(r'(=+) *([^=]+) *Editar \1',
|
cleaned = re.sub(r'(=+) *([^=]+) *\1',
|
||||||
r'\2',
|
r'\2',
|
||||||
content)
|
content)
|
||||||
return cleaned
|
return cleaned
|
||||||
|
|||||||
@ -72,7 +72,7 @@ case_mapping = True
|
|||||||
# in case some unwanted data has been overlooked.
|
# in case some unwanted data has been overlooked.
|
||||||
def clean_wikipedia_content(content):
|
def clean_wikipedia_content(content):
|
||||||
# We get modify link in the text: "=== Articles connexesModifier ==="
|
# We get modify link in the text: "=== Articles connexesModifier ==="
|
||||||
cleaned = re.sub(r'(=+) *([^=]+) *Modifier \1',
|
cleaned = re.sub(r'(=+) *([^=]+) *\1',
|
||||||
r'\2',
|
r'\2',
|
||||||
content)
|
content)
|
||||||
return cleaned
|
return cleaned
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user