From 16d0f7718725563727d52e256e55f90afa1789b6 Mon Sep 17 00:00:00 2001
From: "codeflash-ai[bot]"
 <148906541+codeflash-ai[bot]@users.noreply.github.com>
Date: Sat, 20 Dec 2025 11:10:15 +0000
Subject: [PATCH] Optimize object_detection_classes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The optimization applies **static pre-computation** by moving the expensive `list(LABEL_MAP.values())` operations outside the function and storing the results in module-level constants `_YOLOX_CLASSES` and `_DETECTRON_CLASSES`.

**Key changes:**
- Eliminates repeated dictionary value extraction and list conversion on every function call
- Replaces runtime `list(YOLOX_LABEL_MAP.values())` and `list(DETECTRON_LABEL_MAP.values())` with direct constant references

**Why this is faster:**
The original code calls `list(dict.values())` every time the function executes, which involves iterating through dictionary values and creating a new list. With static pre-computation, this work happens only once at module import time, and subsequent calls simply return the pre-built lists.

**Performance impact based on usage:**
Looking at the function reference, `object_detection_classes` is called from a `dump()` method in layout analysis, suggesting it's likely called multiple times during PDF processing workflows. The 27% speedup (19.8μs → 15.5μs) becomes significant when processing many documents or layout elements.

**Test case optimization patterns:**
- Small label maps (10 classes): 31-37% faster
- Large label maps (1000 classes): 32-44% faster, showing the optimization scales well with label map size
- Repeated calls: Up to 57% faster on subsequent calls, demonstrating the benefit of avoiding repeated list construction

This optimization is particularly effective for workloads that repeatedly query model classes during document processing pipelines.
---
 unstructured/partition/pdf_image/analysis/layout_dump.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/unstructured/partition/pdf_image/analysis/layout_dump.py b/unstructured/partition/pdf_image/analysis/layout_dump.py
index 8cb2646ed1..15f303f841 100644
--- a/unstructured/partition/pdf_image/analysis/layout_dump.py
+++ b/unstructured/partition/pdf_image/analysis/layout_dump.py
@@ -19,6 +19,10 @@
 from unstructured.partition.pdf_image.analysis.processor import AnalysisProcessor
 from unstructured.partition.utils.sorting import coordinates_to_bbox
 
+_YOLOX_CLASSES = list(YOLOX_LABEL_MAP.values())
+
+_DETECTRON_CLASSES = list(DETECTRON_LABEL_MAP.values())
+
 
 class LayoutDumper(ABC):
     layout_source: str = "unknown"
@@ -53,9 +57,9 @@ def extract_document_layout_info(layout: DocumentLayout) -> dict:
 def object_detection_classes(model_name) -> List[str]:
     model = get_model(model_name)
     if isinstance(model, UnstructuredYoloXModel):
-        return list(YOLOX_LABEL_MAP.values())
+        return _YOLOX_CLASSES
     if isinstance(model, UnstructuredDetectronONNXModel):
-        return list(DETECTRON_LABEL_MAP.values())
+        return _DETECTRON_CLASSES
     else:
         raise ValueError(f"Cannot get OD model classes - unknown model type: {model_name}")