mirror of
https://gitlab.freedesktop.org/uchardet/uchardet.git
synced 2025-12-07 17:26:41 +08:00
script: fix small issues with commits e41e8a4 and 8d15d6b.
This commit is contained in:
parent
c11c362b89
commit
5aa628272b
@ -241,7 +241,10 @@ def visit_pages(titles, depth, lang, logfd):
|
|||||||
return
|
return
|
||||||
|
|
||||||
next_titles = []
|
next_titles = []
|
||||||
max_titles = int(options.max_page/(options.max_depth * options.max_depth))
|
if options.max_page is not None:
|
||||||
|
max_titles = int(options.max_page/(options.max_depth * options.max_depth))
|
||||||
|
else:
|
||||||
|
max_titles = sys.maxsize
|
||||||
for title in titles:
|
for title in titles:
|
||||||
if options.max_page is not None and \
|
if options.max_page is not None and \
|
||||||
len(visited_pages) > options.max_page:
|
len(visited_pages) > options.max_page:
|
||||||
@ -266,12 +269,12 @@ def visit_pages(titles, depth, lang, logfd):
|
|||||||
logfd.flush()
|
logfd.flush()
|
||||||
|
|
||||||
process_text(page.content, lang)
|
process_text(page.content, lang)
|
||||||
links = page.links
|
|
||||||
random.shuffle(links)
|
|
||||||
if len(links) > max_titles:
|
|
||||||
links = links[:max_titles]
|
|
||||||
try:
|
try:
|
||||||
next_titles += links
|
links = page.links
|
||||||
|
random.shuffle(links)
|
||||||
|
if len(links) > max_titles:
|
||||||
|
links = links[:max_titles]
|
||||||
|
next_titles += links
|
||||||
except KeyError:
|
except KeyError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@ -300,6 +303,7 @@ except requests.exceptions.ConnectionError:
|
|||||||
logfd.write('\n\n== End of Parsed pages ==')
|
logfd.write('\n\n== End of Parsed pages ==')
|
||||||
logfd.write('\n\n- Wikipedia parsing ended at: {}\n'.format(str(datetime.datetime.now())))
|
logfd.write('\n\n- Wikipedia parsing ended at: {}\n'.format(str(datetime.datetime.now())))
|
||||||
logfd.flush()
|
logfd.flush()
|
||||||
|
|
||||||
########### CHARACTERS ###########
|
########### CHARACTERS ###########
|
||||||
|
|
||||||
# Character ratios.
|
# Character ratios.
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user