diff --git a/openviking/parse/parsers/html.py b/openviking/parse/parsers/html.py
index c71ecee..7e867a3 100644
--- a/openviking/parse/parsers/html.py
+++ b/openviking/parse/parsers/html.py
@@ -351,6 +351,7 @@ async def _handle_download_link(
Returns:
ParseResult from delegated parser
"""
+ temp_path = None
try:
# Download to temporary file
temp_path = await self._download_file(url)
@@ -358,36 +359,25 @@ async def _handle_download_link(
# Get appropriate parser
if file_type == "pdf":
from openviking.parse.parsers.pdf import PDFParser
-
parser = PDFParser()
+ result = await parser.parse(temp_path)
elif file_type == "markdown":
from openviking.parse.parsers.markdown import MarkdownParser
-
parser = MarkdownParser()
+ result = await parser.parse(temp_path)
elif file_type == "text":
from openviking.parse.parsers.text import TextParser
-
parser = TextParser()
+ result = await parser.parse(temp_path)
elif file_type == "html":
# Parse downloaded HTML locally
return await self._parse_local_file(Path(temp_path), start_time, **kwargs)
else:
raise ValueError(f"Unsupported file type: {file_type}")
- # Parse downloaded file
- result = await parser.parse(temp_path)
-
- # Update metadata
result.meta.update(meta)
result.meta["downloaded_from"] = url
result.meta["url_type"] = f"download_{file_type}"
-
- # Clean up temp file
- try:
- Path(temp_path).unlink()
- except Exception:
- pass
-
return result
except Exception as e:
@@ -399,6 +389,14 @@ async def _handle_download_link(
parse_time=time.time() - start_time,
warnings=[f"Failed to download/parse link: {e}"],
)
+ finally:
+ if temp_path:
+ try:
+ p = Path(temp_path)
+ if p.exists():
+ p.unlink()
+ except Exception:
+ pass
async def _handle_code_repository(
self, url: str, start_time: float, meta: Dict[str, Any], **kwargs