Skip to content

Commit bd2f618

Browse files
committed
v0.9.2
1 parent 9d1a707 commit bd2f618

1 file changed

Lines changed: 13 additions & 26 deletions

File tree

  • packages/fetchcraft-parsing-docling/src/fetchcraft/parsing/docling

packages/fetchcraft-parsing-docling/src/fetchcraft/parsing/docling/server.py

Lines changed: 13 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -362,14 +362,16 @@ async def parse_single_file(
362362
"""
363363
Parse a single file and return DocumentNodes.
364364
365+
This function runs the CPU-intensive parsing in a thread pool
366+
to avoid blocking the event loop.
367+
365368
Args:
366369
file: Uploaded file
367370
temp_dir: Temporary directory for file storage
368371
369372
Returns:
370373
ParseResponse with parsing results
371374
"""
372-
start_time = time.time()
373375
filename = file.filename or "unknown"
374376

375377
try:
@@ -393,41 +395,26 @@ async def parse_single_file(
393395
buffer.write(content)
394396

395397
# Use file semaphore to limit concurrent file processing
398+
# Run parsing in thread pool to avoid blocking the event loop
396399
async with app_state.file_semaphore:
397-
# Create parser for this file
398-
parser = DoclingDocumentParser.from_file(
399-
file_path=temp_path,
400-
page_chunks=PAGE_CHUNKS,
401-
do_ocr=DO_OCR,
402-
do_table_structure=DO_TABLE_STRUCTURE
403-
)
404-
405-
# Parse document and collect nodes
406-
nodes = []
407-
async for node in parser.get_documents():
408-
# Convert node to dictionary for JSON serialization
409-
nodes.append(node.model_dump())
410-
411-
processing_time = (time.time() - start_time) * 1000
412-
413-
return ParseResponse(
414-
filename=filename,
415-
success=True,
416-
nodes=nodes,
417-
error=None,
418-
num_nodes=len(nodes),
419-
processing_time_ms=round(processing_time, 2)
400+
loop = asyncio.get_event_loop()
401+
result = await loop.run_in_executor(
402+
app_state.executor,
403+
parse_file_sync,
404+
temp_path
420405
)
406+
# Yield control back to event loop
407+
await asyncio.sleep(0)
408+
return result
421409

422410
except Exception as e:
423-
processing_time = (time.time() - start_time) * 1000
424411
return ParseResponse(
425412
filename=filename,
426413
success=False,
427414
nodes=[],
428415
error=str(e),
429416
num_nodes=0,
430-
processing_time_ms=round(processing_time, 2)
417+
processing_time_ms=0
431418
)
432419

433420

0 commit comments

Comments
 (0)