mirror of
https://gitlab.freedesktop.org/uchardet/uchardet.git
synced 2025-12-07 17:26:41 +08:00
script: work around recent issue of python wikipedia module.
Adding `auto_suggest=False` to the wikipedia.page() call because this auto-suggest is completely broken, searching "mar ot" instead of "marmot" or "ground hug" instead of "Groundhog" (this one is extra funny but not so useful!). I actually wonder why it even needs to suggest anything when the Wikipedia pages do actually exist! Anyway the script BuildLangModel.py was very broken because of this, now it's better. See: https://github.com/goldsmith/Wikipedia/issues/295 Also printing the error message when we discard a page, which helps debugging.
This commit is contained in:
parent
a3ff09bece
commit
81b83fffa9
@ -322,11 +322,11 @@ def visit_pages(titles, depth, lang, logfd):
|
|||||||
|
|
||||||
visited_pages += [title]
|
visited_pages += [title]
|
||||||
try:
|
try:
|
||||||
page = wikipedia.page(title)
|
page = wikipedia.page(title, auto_suggest=False)
|
||||||
except (wikipedia.exceptions.PageError,
|
except (wikipedia.exceptions.PageError,
|
||||||
wikipedia.exceptions.DisambiguationError):
|
wikipedia.exceptions.DisambiguationError) as error:
|
||||||
# Let's just discard a page when I get an exception.
|
# Let's just discard a page when I get an exception.
|
||||||
print("Discarding page {}.\n".format(title))
|
print("Discarding page {}: {}\n".format(title, error))
|
||||||
continue
|
continue
|
||||||
logfd.write("\n{} (revision {})".format(title, page.revision_id))
|
logfd.write("\n{} (revision {})".format(title, page.revision_id))
|
||||||
logfd.flush()
|
logfd.flush()
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user