mirror of
https://gitlab.freedesktop.org/uchardet/uchardet.git
synced 2025-12-06 16:56:40 +08:00
script: fix small issues with commits e41e8a4 and 8d15d6b.
This commit is contained in:
parent
c11c362b89
commit
5aa628272b
@ -241,7 +241,10 @@ def visit_pages(titles, depth, lang, logfd):
|
||||
return
|
||||
|
||||
next_titles = []
|
||||
max_titles = int(options.max_page/(options.max_depth * options.max_depth))
|
||||
if options.max_page is not None:
|
||||
max_titles = int(options.max_page/(options.max_depth * options.max_depth))
|
||||
else:
|
||||
max_titles = sys.maxsize
|
||||
for title in titles:
|
||||
if options.max_page is not None and \
|
||||
len(visited_pages) > options.max_page:
|
||||
@ -266,12 +269,12 @@ def visit_pages(titles, depth, lang, logfd):
|
||||
logfd.flush()
|
||||
|
||||
process_text(page.content, lang)
|
||||
links = page.links
|
||||
random.shuffle(links)
|
||||
if len(links) > max_titles:
|
||||
links = links[:max_titles]
|
||||
try:
|
||||
next_titles += links
|
||||
links = page.links
|
||||
random.shuffle(links)
|
||||
if len(links) > max_titles:
|
||||
links = links[:max_titles]
|
||||
next_titles += links
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
@ -300,6 +303,7 @@ except requests.exceptions.ConnectionError:
|
||||
logfd.write('\n\n== End of Parsed pages ==')
|
||||
logfd.write('\n\n- Wikipedia parsing ended at: {}\n'.format(str(datetime.datetime.now())))
|
||||
logfd.flush()
|
||||
|
||||
########### CHARACTERS ###########
|
||||
|
||||
# Character ratios.
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user