Skip to content

Commit f73219f

Browse files
committed
made warning filter multithread-save
1 parent 2ac3212 commit f73219f

1 file changed

Lines changed: 17 additions & 18 deletions

File tree

contextplus/wiki.py

Lines changed: 17 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -22,20 +22,22 @@ def get_text_chunks(page_titles, chunk_length=512, verbose=False):
2222
:return: list of wiki text chunks
2323
"""
2424
wiki_chunks = []
25-
with concurrent.futures.ThreadPoolExecutor() as executor:
26-
future_to_page = {executor.submit(get_page_content, page_title): page_title for page_title in page_titles}
27-
for future in concurrent.futures.as_completed(future_to_page):
28-
page_title = future_to_page[future]
29-
try:
30-
wiki_content = future.result()
31-
wiki_content = preprocess_and_chunk_wiki_content(wiki_content, chunk_length=chunk_length)
32-
if verbose:
33-
print(f"getting content of page {page_title}")
34-
wiki_chunks.extend(wiki_content)
35-
except (wikipedia.exceptions.PageError, wikipedia.exceptions.DisambiguationError):
36-
if verbose:
37-
print(f"page {page_title} not found")
38-
continue # skip the page if it is not available
25+
with warnings.catch_warnings():
26+
warnings.filterwarnings("ignore", category=UserWarning)
27+
with concurrent.futures.ThreadPoolExecutor() as executor:
28+
future_to_page = {executor.submit(get_page_content, page_title): page_title for page_title in page_titles}
29+
for future in concurrent.futures.as_completed(future_to_page):
30+
page_title = future_to_page[future]
31+
try:
32+
wiki_content = future.result()
33+
wiki_content = preprocess_and_chunk_wiki_content(wiki_content, chunk_length=chunk_length)
34+
if verbose:
35+
print(f"getting content of page {page_title}")
36+
wiki_chunks.extend(wiki_content)
37+
except (wikipedia.exceptions.PageError, wikipedia.exceptions.DisambiguationError):
38+
if verbose:
39+
print(f"page {page_title} not found")
40+
continue # skip the page if it is not available
3941
return wiki_chunks
4042

4143

@@ -45,10 +47,7 @@ def get_page_content(page_title):
4547
:param page_title: page_title of the wikipedia page from which the content should be extracted
4648
:return: content of the wikipedia page
4749
"""
48-
with warnings.catch_warnings():
49-
warnings.filterwarnings("ignore", category=UserWarning)
50-
page_content = wikipedia.page(page_title, auto_suggest=False).content
51-
return page_content
50+
return wikipedia.page(page_title, auto_suggest=False).content
5251

5352

5453
def preprocess_and_chunk_wiki_content(wiki_content, chunk_length=512):

0 commit comments

Comments
 (0)