Skip to content
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion hivemind_etl/website/website_etl.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import logging
from typing import Any

from hivemind_etl.website.crawlee_client import CrawleeClient
Expand Down Expand Up @@ -47,7 +48,13 @@ async def extract(
"""
if not urls:
raise ValueError("No URLs provided for crawling")
extracted_data = await self.crawlee_client.crawl(urls)

extracted_data = []
for url in urls:
logging.info(f"Crawling {url} and its routes!")
extracted_data.extend(await self.crawlee_client.crawl(links=[url]))

logging.info(f"Extracted {len(extracted_data)} documents!")

if not extracted_data:
raise ValueError(f"No data extracted from URLs: {urls}")
Expand Down
Loading