From c11174bedc7d42d3c28186e219b43f3fd6f4b3de Mon Sep 17 00:00:00 2001
From: Joaquin <joaquingc123@gmail.com>
Date: Tue, 26 Aug 2025 15:11:11 -0500
Subject: [PATCH] fix: handle empty collections in download endpoint
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Previously, the download endpoint would throw a 500 error when attempting to download from empty collections due to division by zero when calculating docs_limit. Now gracefully handles this case by falling back to MAX_WEB_DOWNLOAD_SIZE when the collection is empty or size estimation fails.

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 estela-api/api/views/job_data.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/estela-api/api/views/job_data.py b/estela-api/api/views/job_data.py
index b604edf6..145c4adb 100644
--- a/estela-api/api/views/job_data.py
+++ b/estela-api/api/views/job_data.py
@@ -190,13 +190,14 @@ def download(self, request, *args, **kwargs):
                 kwargs["pid"], job_collection_name
             )
         else:
-            docs_limit = max(
-                1,
-                settings.MAX_WEB_DOWNLOAD_SIZE
-                // spiderdata_db_client.get_estimated_item_size(
+            try:
+                estimated_size = spiderdata_db_client.get_estimated_item_size(
                     kwargs["pid"], job_collection_name
-                ),
-            )
+                )
+                docs_limit = max(1, settings.MAX_WEB_DOWNLOAD_SIZE // estimated_size)
+            except:
+                docs_limit = settings.MAX_WEB_DOWNLOAD_SIZE
+            
             data = spiderdata_db_client.get_dataset_data(
                 kwargs["pid"], job_collection_name, docs_limit
             )