From 81b83fffa9b0fa044878fdd154f4e6adf9aa4e68 Mon Sep 17 00:00:00 2001 From: Jehan Date: Tue, 9 Nov 2021 22:06:47 +0100 Subject: [PATCH] script: work around recent issue of python wikipedia module. Adding `auto_suggest=False` to the wikipedia.page() call because this auto-suggest is completely broken, searching "mar ot" instead of "marmot" or "ground hug" instead of "Groundhog" (this one is extra funny but not so useful!). I actually wonder why it even needs to suggest anything when the Wikipedia pages do actually exist! Anyway the script BuildLangModel.py was very broken because of this, now it's better. See: https://github.com/goldsmith/Wikipedia/issues/295 Also printing the error message when we discard a page, which helps debugging. --- script/BuildLangModel.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/script/BuildLangModel.py b/script/BuildLangModel.py index d4f315c..faf28bd 100755 --- a/script/BuildLangModel.py +++ b/script/BuildLangModel.py @@ -322,11 +322,11 @@ def visit_pages(titles, depth, lang, logfd): visited_pages += [title] try: - page = wikipedia.page(title) + page = wikipedia.page(title, auto_suggest=False) except (wikipedia.exceptions.PageError, - wikipedia.exceptions.DisambiguationError): + wikipedia.exceptions.DisambiguationError) as error: # Let's just discard a page when I get an exception. - print("Discarding page {}.\n".format(title)) + print("Discarding page {}: {}\n".format(title, error)) continue logfd.write("\n{} (revision {})".format(title, page.revision_id)) logfd.flush()