From 1b17e965191b038cd1220024c7e44b65fca98a2e Mon Sep 17 00:00:00 2001
From: Shantanu Bala <shantanu@roboflow.com>
Date: Tue, 25 Mar 2025 10:14:06 -0400
Subject: [PATCH 1/2] Fix handling of invalid annotation descriptions and add
 tests to dataset uploads

---
 roboflow/core/workspace.py |   5 +-
 tests/test_project.py      | 253 ++++++++++++++++++++++++++++++++++++-
 2 files changed, 254 insertions(+), 4 deletions(-)

diff --git a/roboflow/core/workspace.py b/roboflow/core/workspace.py
index a73da13b..db0dbe08 100644
--- a/roboflow/core/workspace.py
+++ b/roboflow/core/workspace.py
@@ -360,7 +360,7 @@ def _save_annotation(image_id, imagedesc):
             annotation_path = None
 
             annotationdesc = imagedesc.get("annotationfile")
-            if annotationdesc:
+            if isinstance(annotationdesc, dict):
                 if annotationdesc.get("rawText"):
                     annotation_path = annotationdesc
                 else:
@@ -369,8 +369,7 @@ def _save_annotation(image_id, imagedesc):
 
                 if isinstance(labelmap, str):
                     labelmap = load_labelmap(labelmap)
-
-            if not annotation_path:
+            else:
                 return None, None
 
             annotation, upload_time, _retry_attempts = project.save_annotation(
diff --git a/tests/test_project.py b/tests/test_project.py
index 4949fde1..b8ffb992 100644
--- a/tests/test_project.py
+++ b/tests/test_project.py
@@ -1,14 +1,82 @@
 import requests
 import responses
 from responses.matchers import json_params_matcher
+from unittest.mock import patch
 
 from roboflow import API_URL
 from roboflow.adapters.rfapi import AnnotationSaveError, ImageUploadError
 from roboflow.config import DEFAULT_BATCH_NAME
-from tests import PROJECT_NAME, ROBOFLOW_API_KEY, WORKSPACE_NAME, RoboflowTest
+from tests import PROJECT_NAME, ROBOFLOW_API_KEY, WORKSPACE_NAME, RoboflowTest, ordered
 
 
 class TestProject(RoboflowTest):
+    def _create_test_dataset(self, images=None):
+        """
+        Create a test dataset with specified images or a default image
+        
+        Args:
+            images: List of image dictionaries. If None, a default image will be used.
+            
+        Returns:
+            Dictionary representing a parsed dataset
+        """
+        if images is None:
+            images = [
+                {
+                    "file": "image1.jpg",
+                    "split": "train",
+                    "annotationfile": {
+                        "file": "image1.xml"
+                    }
+                }
+            ]
+            
+        return {
+            "location": "/test/location/",
+            "images": images
+        }
+    
+    def _setup_upload_dataset_mocks(self, test_dataset=None, image_return=None, annotation_return=None, 
+                                    project_created=False, save_annotation_side_effect=None, 
+                                    upload_image_side_effect=None):
+        """
+        Set up common mocks for upload_dataset tests
+        
+        Args:
+            test_dataset: The dataset to return from parsefolder. If None, creates a default dataset
+            image_return: Return value for upload_image. Default is successful upload
+            annotation_return: Return value for save_annotation. Default is successful annotation
+            project_created: Whether to simulate a newly created project
+            save_annotation_side_effect: Side effect function for save_annotation
+            upload_image_side_effect: Side effect function for upload_image
+            
+        Returns:
+            Dictionary of mock objects with start and stop methods
+        """
+        if test_dataset is None:
+            test_dataset = self._create_test_dataset()
+            
+        if image_return is None:
+            image_return = ({"id": "test-id", "success": True}, 0.1, 0)
+            
+        if annotation_return is None:
+            annotation_return = ({"success": True}, 0.1, 0)
+            
+        # Create the mock objects
+        mocks = {
+            'parser': patch('roboflow.core.workspace.folderparser.parsefolder', return_value=test_dataset),
+            'upload': patch('roboflow.core.workspace.Project.upload_image', 
+                          side_effect=upload_image_side_effect) if upload_image_side_effect 
+                          else patch('roboflow.core.workspace.Project.upload_image', return_value=image_return),
+            'save_annotation': patch('roboflow.core.workspace.Project.save_annotation', 
+                                   side_effect=save_annotation_side_effect) if save_annotation_side_effect
+                                   else patch('roboflow.core.workspace.Project.save_annotation', return_value=annotation_return),
+            'get_project': patch('roboflow.core.workspace.Workspace._get_or_create_project', 
+                               return_value=(self.project, project_created))
+        }
+        
+        return mocks
+        
     def test_check_valid_image_with_accepted_formats(self):
         images_to_test = [
             "rabbit.JPG",
@@ -224,3 +292,186 @@ def test_create_annotation_job_error(self):
             )
 
         self.assertEqual(str(context.exception), "Batch not found")
+        
+    @ordered
+    @responses.activate
+    def test_project_upload_dataset(self):
+        """Test upload_dataset functionality with various scenarios"""
+        test_scenarios = [
+            {
+                "name": "string_annotationdesc",
+                "dataset": [{
+                    "file": "test_image.jpg", 
+                    "split": "train",
+                    "annotationfile": "string_annotation.txt"
+                }],
+                "params": {"num_workers": 1},
+                "assertions": {}
+            },
+            {
+                "name": "success_basic",
+                "dataset": [
+                    {"file": "image1.jpg", "split": "train", "annotationfile": {"file": "image1.xml"}},
+                    {"file": "image2.jpg", "split": "valid", "annotationfile": {"file": "image2.xml"}}
+                ],
+                "params": {},
+                "assertions": {
+                    "parser": [("/test/dataset",)],
+                    "upload": {"count": 2},
+                    "save_annotation": {"count": 2}
+                },
+                "image_return": ({"id": "test-id-1", "success": True}, 0.1, 0)
+            },
+            {
+                "name": "custom_parameters",
+                "dataset": None,
+                "params": {
+                    "num_workers": 2,
+                    "project_license": "CC BY 4.0",
+                    "project_type": "classification",
+                    "batch_name": "test-batch",
+                    "num_retries": 3
+                },
+                "assertions": {
+                    "upload": {"count": 1, "kwargs": {"batch_name": "test-batch", "num_retry_uploads": 3}}
+                }
+            },
+            {
+                "name": "project_creation",
+                "dataset": None,
+                "params": {"project_name": "new-project"},
+                "assertions": {},
+                "project_created": True
+            },
+            {
+                "name": "with_labelmap",
+                "dataset": [{
+                    "file": "image1.jpg",
+                    "split": "train",
+                    "annotationfile": {
+                        "file": "image1.xml",
+                        "labelmap": "path/to/labelmap.json"
+                    }
+                }],
+                "params": {},
+                "assertions": {
+                    "save_annotation": {"count": 1},
+                    "load_labelmap": {"count": 1}
+                },
+                "extra_mocks": [
+                    ("load_labelmap", "roboflow.core.workspace.load_labelmap", {"return_value": {"old_label": "new_label"}})
+                ]
+            },
+            {
+                "name": "concurrent_uploads",
+                "dataset": [{"file": f"image{i}.jpg", "split": "train"} for i in range(10)],
+                "params": {"num_workers": 5},
+                "assertions": {
+                    "thread_pool": {"count": 1, "kwargs": {"max_workers": 5}}
+                },
+                "extra_mocks": [
+                    ("thread_pool", "concurrent.futures.ThreadPoolExecutor", {})
+                ]
+            },
+            {
+                "name": "empty_dataset",
+                "dataset": [],
+                "params": {},
+                "assertions": {
+                    "upload": {"count": 0}
+                }
+            },
+            {
+                "name": "raw_text_annotation",
+                "dataset": [{
+                    "file": "image1.jpg",
+                    "split": "train",
+                    "annotationfile": {
+                        "rawText": "annotation content here",
+                        "format": "json"
+                    }
+                }],
+                "params": {},
+                "assertions": {
+                    "save_annotation": {"count": 1}
+                }
+            }
+        ]
+        
+        error_cases = [
+            {
+                "name": "image_upload_error",
+                "side_effect": {
+                    "upload_image_side_effect": lambda *args, **kwargs: 
+                        (_ for _ in ()).throw(ImageUploadError("Failed to upload image"))
+                },
+                "params": {"num_workers": 1}
+            },
+            {
+                "name": "annotation_upload_error",
+                "side_effect": {
+                    "save_annotation_side_effect": lambda *args, **kwargs: 
+                        (_ for _ in ()).throw(AnnotationSaveError("Failed to save annotation"))
+                },
+                "params": {"num_workers": 1}
+            }
+        ]
+                
+        for scenario in test_scenarios:
+            test_dataset = self._create_test_dataset(scenario.get("dataset")) if scenario.get("dataset") is not None else None
+            
+            extra_mocks = {}
+            if "extra_mocks" in scenario:
+                for mock_name, target, config in scenario.get("extra_mocks", []):
+                    extra_mocks[mock_name] = patch(target, **config)
+            
+            mocks = self._setup_upload_dataset_mocks(
+                test_dataset=test_dataset,
+                image_return=scenario.get("image_return"),
+                project_created=scenario.get("project_created", False)
+            )
+            
+            mock_objects = {}
+            for name, mock in mocks.items():
+                mock_objects[name] = mock.start()
+            
+            for name, mock in extra_mocks.items():
+                mock_objects[name] = mock.start()
+            
+            try:
+                params = {"dataset_path": "/test/dataset", "project_name": PROJECT_NAME}
+                params.update(scenario.get("params", {}))
+                
+                self.workspace.upload_dataset(**params)
+                
+                for mock_name, assertion in scenario.get("assertions", {}).items():
+                    if isinstance(assertion, list):
+                        mock_obj = mock_objects.get(mock_name)
+                        call_args_list = [args for args, _ in mock_obj.call_args_list]
+                        for expected_args in assertion:
+                            self.assertIn(expected_args, call_args_list)
+                    elif isinstance(assertion, dict):
+                        mock_obj = mock_objects.get(mock_name)
+                        if "count" in assertion:
+                            self.assertEqual(mock_obj.call_count, assertion["count"])
+                        if "kwargs" in assertion and mock_obj.call_count > 0:
+                            _, kwargs = mock_obj.call_args
+                            for key, value in assertion["kwargs"].items():
+                                self.assertEqual(kwargs.get(key), value)
+            finally:
+                for mock in list(mocks.values()) + list(extra_mocks.values()):
+                    mock.stop()
+        
+        for case in error_cases:
+            mocks = self._setup_upload_dataset_mocks(**case.get("side_effect", {}))
+            
+            for mock in mocks.values():
+                mock.start()
+            
+            try:
+                params = {"dataset_path": "/test/dataset", "project_name": PROJECT_NAME}
+                params.update(case.get("params", {}))
+                self.workspace.upload_dataset(**params)
+            finally:
+                for mock in mocks.values():
+                    mock.stop()

From 74c8b7c06d795a76ba60b0b5f1a87cabc5a509e5 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 25 Mar 2025 14:18:18 +0000
Subject: [PATCH 2/2] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20auto=20?=
 =?UTF-8?q?format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/test_project.py | 209 +++++++++++++++++++-----------------------
 1 file changed, 94 insertions(+), 115 deletions(-)

diff --git a/tests/test_project.py b/tests/test_project.py
index b8ffb992..50c1c37f 100644
--- a/tests/test_project.py
+++ b/tests/test_project.py
@@ -1,7 +1,8 @@
+from unittest.mock import patch
+
 import requests
 import responses
 from responses.matchers import json_params_matcher
-from unittest.mock import patch
 
 from roboflow import API_URL
 from roboflow.adapters.rfapi import AnnotationSaveError, ImageUploadError
@@ -13,35 +14,30 @@ class TestProject(RoboflowTest):
     def _create_test_dataset(self, images=None):
         """
         Create a test dataset with specified images or a default image
-        
+
         Args:
             images: List of image dictionaries. If None, a default image will be used.
-            
+
         Returns:
             Dictionary representing a parsed dataset
         """
         if images is None:
-            images = [
-                {
-                    "file": "image1.jpg",
-                    "split": "train",
-                    "annotationfile": {
-                        "file": "image1.xml"
-                    }
-                }
-            ]
-            
-        return {
-            "location": "/test/location/",
-            "images": images
-        }
-    
-    def _setup_upload_dataset_mocks(self, test_dataset=None, image_return=None, annotation_return=None, 
-                                    project_created=False, save_annotation_side_effect=None, 
-                                    upload_image_side_effect=None):
+            images = [{"file": "image1.jpg", "split": "train", "annotationfile": {"file": "image1.xml"}}]
+
+        return {"location": "/test/location/", "images": images}
+
+    def _setup_upload_dataset_mocks(
+        self,
+        test_dataset=None,
+        image_return=None,
+        annotation_return=None,
+        project_created=False,
+        save_annotation_side_effect=None,
+        upload_image_side_effect=None,
+    ):
         """
         Set up common mocks for upload_dataset tests
-        
+
         Args:
             test_dataset: The dataset to return from parsefolder. If None, creates a default dataset
             image_return: Return value for upload_image. Default is successful upload
@@ -49,34 +45,37 @@ def _setup_upload_dataset_mocks(self, test_dataset=None, image_return=None, anno
             project_created: Whether to simulate a newly created project
             save_annotation_side_effect: Side effect function for save_annotation
             upload_image_side_effect: Side effect function for upload_image
-            
+
         Returns:
             Dictionary of mock objects with start and stop methods
         """
         if test_dataset is None:
             test_dataset = self._create_test_dataset()
-            
+
         if image_return is None:
             image_return = ({"id": "test-id", "success": True}, 0.1, 0)
-            
+
         if annotation_return is None:
             annotation_return = ({"success": True}, 0.1, 0)
-            
+
         # Create the mock objects
         mocks = {
-            'parser': patch('roboflow.core.workspace.folderparser.parsefolder', return_value=test_dataset),
-            'upload': patch('roboflow.core.workspace.Project.upload_image', 
-                          side_effect=upload_image_side_effect) if upload_image_side_effect 
-                          else patch('roboflow.core.workspace.Project.upload_image', return_value=image_return),
-            'save_annotation': patch('roboflow.core.workspace.Project.save_annotation', 
-                                   side_effect=save_annotation_side_effect) if save_annotation_side_effect
-                                   else patch('roboflow.core.workspace.Project.save_annotation', return_value=annotation_return),
-            'get_project': patch('roboflow.core.workspace.Workspace._get_or_create_project', 
-                               return_value=(self.project, project_created))
+            "parser": patch("roboflow.core.workspace.folderparser.parsefolder", return_value=test_dataset),
+            "upload": patch("roboflow.core.workspace.Project.upload_image", side_effect=upload_image_side_effect)
+            if upload_image_side_effect
+            else patch("roboflow.core.workspace.Project.upload_image", return_value=image_return),
+            "save_annotation": patch(
+                "roboflow.core.workspace.Project.save_annotation", side_effect=save_annotation_side_effect
+            )
+            if save_annotation_side_effect
+            else patch("roboflow.core.workspace.Project.save_annotation", return_value=annotation_return),
+            "get_project": patch(
+                "roboflow.core.workspace.Workspace._get_or_create_project", return_value=(self.project, project_created)
+            ),
         }
-        
+
         return mocks
-        
+
     def test_check_valid_image_with_accepted_formats(self):
         images_to_test = [
             "rabbit.JPG",
@@ -292,7 +291,7 @@ def test_create_annotation_job_error(self):
             )
 
         self.assertEqual(str(context.exception), "Batch not found")
-        
+
     @ordered
     @responses.activate
     def test_project_upload_dataset(self):
@@ -300,27 +299,19 @@ def test_project_upload_dataset(self):
         test_scenarios = [
             {
                 "name": "string_annotationdesc",
-                "dataset": [{
-                    "file": "test_image.jpg", 
-                    "split": "train",
-                    "annotationfile": "string_annotation.txt"
-                }],
+                "dataset": [{"file": "test_image.jpg", "split": "train", "annotationfile": "string_annotation.txt"}],
                 "params": {"num_workers": 1},
-                "assertions": {}
+                "assertions": {},
             },
             {
                 "name": "success_basic",
                 "dataset": [
                     {"file": "image1.jpg", "split": "train", "annotationfile": {"file": "image1.xml"}},
-                    {"file": "image2.jpg", "split": "valid", "annotationfile": {"file": "image2.xml"}}
+                    {"file": "image2.jpg", "split": "valid", "annotationfile": {"file": "image2.xml"}},
                 ],
                 "params": {},
-                "assertions": {
-                    "parser": [("/test/dataset",)],
-                    "upload": {"count": 2},
-                    "save_annotation": {"count": 2}
-                },
-                "image_return": ({"id": "test-id-1", "success": True}, 0.1, 0)
+                "assertions": {"parser": [("/test/dataset",)], "upload": {"count": 2}, "save_annotation": {"count": 2}},
+                "image_return": ({"id": "test-id-1", "success": True}, 0.1, 0),
             },
             {
                 "name": "custom_parameters",
@@ -330,120 +321,108 @@ def test_project_upload_dataset(self):
                     "project_license": "CC BY 4.0",
                     "project_type": "classification",
                     "batch_name": "test-batch",
-                    "num_retries": 3
+                    "num_retries": 3,
                 },
-                "assertions": {
-                    "upload": {"count": 1, "kwargs": {"batch_name": "test-batch", "num_retry_uploads": 3}}
-                }
+                "assertions": {"upload": {"count": 1, "kwargs": {"batch_name": "test-batch", "num_retry_uploads": 3}}},
             },
             {
                 "name": "project_creation",
                 "dataset": None,
                 "params": {"project_name": "new-project"},
                 "assertions": {},
-                "project_created": True
+                "project_created": True,
             },
             {
                 "name": "with_labelmap",
-                "dataset": [{
-                    "file": "image1.jpg",
-                    "split": "train",
-                    "annotationfile": {
-                        "file": "image1.xml",
-                        "labelmap": "path/to/labelmap.json"
+                "dataset": [
+                    {
+                        "file": "image1.jpg",
+                        "split": "train",
+                        "annotationfile": {"file": "image1.xml", "labelmap": "path/to/labelmap.json"},
                     }
-                }],
+                ],
                 "params": {},
-                "assertions": {
-                    "save_annotation": {"count": 1},
-                    "load_labelmap": {"count": 1}
-                },
+                "assertions": {"save_annotation": {"count": 1}, "load_labelmap": {"count": 1}},
                 "extra_mocks": [
-                    ("load_labelmap", "roboflow.core.workspace.load_labelmap", {"return_value": {"old_label": "new_label"}})
-                ]
+                    (
+                        "load_labelmap",
+                        "roboflow.core.workspace.load_labelmap",
+                        {"return_value": {"old_label": "new_label"}},
+                    )
+                ],
             },
             {
                 "name": "concurrent_uploads",
                 "dataset": [{"file": f"image{i}.jpg", "split": "train"} for i in range(10)],
                 "params": {"num_workers": 5},
-                "assertions": {
-                    "thread_pool": {"count": 1, "kwargs": {"max_workers": 5}}
-                },
-                "extra_mocks": [
-                    ("thread_pool", "concurrent.futures.ThreadPoolExecutor", {})
-                ]
-            },
-            {
-                "name": "empty_dataset",
-                "dataset": [],
-                "params": {},
-                "assertions": {
-                    "upload": {"count": 0}
-                }
+                "assertions": {"thread_pool": {"count": 1, "kwargs": {"max_workers": 5}}},
+                "extra_mocks": [("thread_pool", "concurrent.futures.ThreadPoolExecutor", {})],
             },
+            {"name": "empty_dataset", "dataset": [], "params": {}, "assertions": {"upload": {"count": 0}}},
             {
                 "name": "raw_text_annotation",
-                "dataset": [{
-                    "file": "image1.jpg",
-                    "split": "train",
-                    "annotationfile": {
-                        "rawText": "annotation content here",
-                        "format": "json"
+                "dataset": [
+                    {
+                        "file": "image1.jpg",
+                        "split": "train",
+                        "annotationfile": {"rawText": "annotation content here", "format": "json"},
                     }
-                }],
+                ],
                 "params": {},
-                "assertions": {
-                    "save_annotation": {"count": 1}
-                }
-            }
+                "assertions": {"save_annotation": {"count": 1}},
+            },
         ]
-        
+
         error_cases = [
             {
                 "name": "image_upload_error",
                 "side_effect": {
-                    "upload_image_side_effect": lambda *args, **kwargs: 
-                        (_ for _ in ()).throw(ImageUploadError("Failed to upload image"))
+                    "upload_image_side_effect": lambda *args, **kwargs: (_ for _ in ()).throw(
+                        ImageUploadError("Failed to upload image")
+                    )
                 },
-                "params": {"num_workers": 1}
+                "params": {"num_workers": 1},
             },
             {
                 "name": "annotation_upload_error",
                 "side_effect": {
-                    "save_annotation_side_effect": lambda *args, **kwargs: 
-                        (_ for _ in ()).throw(AnnotationSaveError("Failed to save annotation"))
+                    "save_annotation_side_effect": lambda *args, **kwargs: (_ for _ in ()).throw(
+                        AnnotationSaveError("Failed to save annotation")
+                    )
                 },
-                "params": {"num_workers": 1}
-            }
+                "params": {"num_workers": 1},
+            },
         ]
-                
+
         for scenario in test_scenarios:
-            test_dataset = self._create_test_dataset(scenario.get("dataset")) if scenario.get("dataset") is not None else None
-            
+            test_dataset = (
+                self._create_test_dataset(scenario.get("dataset")) if scenario.get("dataset") is not None else None
+            )
+
             extra_mocks = {}
             if "extra_mocks" in scenario:
                 for mock_name, target, config in scenario.get("extra_mocks", []):
                     extra_mocks[mock_name] = patch(target, **config)
-            
+
             mocks = self._setup_upload_dataset_mocks(
                 test_dataset=test_dataset,
                 image_return=scenario.get("image_return"),
-                project_created=scenario.get("project_created", False)
+                project_created=scenario.get("project_created", False),
             )
-            
+
             mock_objects = {}
             for name, mock in mocks.items():
                 mock_objects[name] = mock.start()
-            
+
             for name, mock in extra_mocks.items():
                 mock_objects[name] = mock.start()
-            
+
             try:
                 params = {"dataset_path": "/test/dataset", "project_name": PROJECT_NAME}
                 params.update(scenario.get("params", {}))
-                
+
                 self.workspace.upload_dataset(**params)
-                
+
                 for mock_name, assertion in scenario.get("assertions", {}).items():
                     if isinstance(assertion, list):
                         mock_obj = mock_objects.get(mock_name)
@@ -461,13 +440,13 @@ def test_project_upload_dataset(self):
             finally:
                 for mock in list(mocks.values()) + list(extra_mocks.values()):
                     mock.stop()
-        
+
         for case in error_cases:
             mocks = self._setup_upload_dataset_mocks(**case.get("side_effect", {}))
-            
+
             for mock in mocks.values():
                 mock.start()
-            
+
             try:
                 params = {"dataset_path": "/test/dataset", "project_name": PROJECT_NAME}
                 params.update(case.get("params", {}))