Update to use /tasks endpoint without task id to avoid cached response (?)

beveradb · beveradb · commit 001437876d20 · 2025-10-13T01:34:57.000-04:00
diff --git a/lyrics_transcriber/transcribers/audioshake.py b/lyrics_transcriber/transcribers/audioshake.py
@@ -16,7 +16,7 @@ class AudioShakeConfig:
     api_token: Optional[str] = None
     base_url: str = "https://api.audioshake.ai"
     output_prefix: Optional[str] = None
-    timeout_minutes: int = 10  # Added timeout configuration
+    timeout_minutes: int = 20  # Added timeout configuration
 
 
 class AudioShakeAPI:
@@ -73,12 +73,13 @@ def wait_for_task_result(self, task_id: str) -> Dict[str, Any]:
         """Poll for task completion and return results."""
         self.logger.info(f"Getting task result for task {task_id}")
 
-        url = f"{self.config.base_url}/tasks/{task_id}"
+        # Use the list endpoint which has fresh data, not the individual task endpoint which caches
+        url = f"{self.config.base_url}/tasks"
         start_time = time.time()
         last_status_log = start_time
         timeout_seconds = self.config.timeout_minutes * 60
         
-        # Add initial retry logic for 404 errors (task ID not yet available)
+        # Add initial retry logic for when task is not found yet
         initial_retry_count = 0
         max_initial_retries = 5
         initial_retry_delay = 2  # seconds
@@ -99,7 +100,24 @@ def wait_for_task_result(self, task_id: str) -> Dict[str, Any]:
             try:
                 response = requests.get(url, headers=self._get_headers())
                 response.raise_for_status()
-                task_data = response.json()
+                tasks_list = response.json()
+                
+                # Find our specific task in the list
+                task_data = None
+                for task in tasks_list:
+                    if task.get("id") == task_id:
+                        task_data = task
+                        break
+                
+                if not task_data:
+                    # Task not found in list yet
+                    if initial_retry_count < max_initial_retries:
+                        initial_retry_count += 1
+                        self.logger.info(f"Task not found in list yet (attempt {initial_retry_count}/{max_initial_retries}), retrying in {initial_retry_delay} seconds...")
+                        time.sleep(initial_retry_delay)
+                        continue
+                    else:
+                        raise TranscriptionError(f"Task {task_id} not found in task list after {max_initial_retries} retries")
                 
                 # Log the full response for debugging
                 self.logger.debug(f"Task status response: {task_data}")
@@ -130,15 +148,7 @@ def wait_for_task_result(self, task_id: str) -> Dict[str, Any]:
                 initial_retry_count = 0
                 
             except requests.exceptions.HTTPError as e:
-                if e.response.status_code == 404 and initial_retry_count < max_initial_retries:
-                    # Task ID not yet available, retry with delay
-                    initial_retry_count += 1
-                    self.logger.info(f"Task ID not yet available (attempt {initial_retry_count}/{max_initial_retries}), retrying in {initial_retry_delay} seconds...")
-                    time.sleep(initial_retry_delay)
-                    continue
-                else:
-                    # Re-raise the error if it's not a 404 or we've exceeded retries
-                    raise
+                raise
 
             time.sleep(30)  # Wait before next poll
 
diff --git a/tests/unit/transcribers/test_audioshake.py b/tests/unit/transcribers/test_audioshake.py
@@ -89,11 +89,14 @@ def test_create_task(self, mock_post, api):
     @patch("requests.get")
     def test_wait_for_task_result_success(self, mock_get, api):
         mock_response = Mock()
-        mock_response.json.return_value = {
-            "id": "task123",
-            "targets": [{"model": "alignment", "status": "completed"}],
-            "data": "test"
-        }
+        # Return a list of tasks (as the /tasks endpoint does)
+        mock_response.json.return_value = [
+            {
+                "id": "task123",
+                "targets": [{"model": "alignment", "status": "completed"}],
+                "data": "test"
+            }
+        ]
         mock_get.return_value = mock_response
 
         result = api.wait_for_task_result("task123")
@@ -105,10 +108,12 @@ def test_wait_for_task_result_success(self, mock_get, api):
     @patch("requests.get")
     def test_wait_for_task_result_failure(self, mock_get, api):
         mock_response = Mock()
-        mock_response.json.return_value = {
-            "id": "task123",
-            "targets": [{"model": "alignment", "status": "failed", "error": "test error"}]
-        }
+        mock_response.json.return_value = [
+            {
+                "id": "task123",
+                "targets": [{"model": "alignment", "status": "failed", "error": "test error"}]
+            }
+        ]
         mock_get.return_value = mock_response
 
         with pytest.raises(Exception, match="Target alignment failed: test error"):
@@ -119,9 +124,9 @@ def test_wait_for_task_result_failure(self, mock_get, api):
     def test_wait_for_task_result_polling(self, mock_sleep, mock_get, api):
         """Test polling behavior with in-progress status before completion"""
         mock_responses = [
-            Mock(json=lambda: {"id": "task123", "targets": [{"model": "alignment", "status": "processing"}]}),
-            Mock(json=lambda: {"id": "task123", "targets": [{"model": "alignment", "status": "processing"}]}),
-            Mock(json=lambda: {"id": "task123", "targets": [{"model": "alignment", "status": "completed"}], "data": "test"}),
+            Mock(json=lambda: [{"id": "task123", "targets": [{"model": "alignment", "status": "processing"}]}]),
+            Mock(json=lambda: [{"id": "task123", "targets": [{"model": "alignment", "status": "processing"}]}]),
+            Mock(json=lambda: [{"id": "task123", "targets": [{"model": "alignment", "status": "completed"}], "data": "test"}]),
         ]
         mock_get.side_effect = mock_responses
 
@@ -148,10 +153,12 @@ def test_wait_for_task_result_with_retries(self, mock_get, api):
     def test_wait_for_task_result_timeout(self, mock_time, mock_get, api):
         """Test that task polling times out after configured duration"""
         mock_time.side_effect = [0, api.config.timeout_minutes * 60 + 1]  # Simulate timeout
-        mock_get.return_value = Mock(json=lambda: {
-            "id": "task123",
-            "targets": [{"model": "alignment", "status": "processing"}]
-        })
+        mock_get.return_value = Mock(json=lambda: [
+            {
+                "id": "task123",
+                "targets": [{"model": "alignment", "status": "processing"}]
+            }
+        ])
 
         with pytest.raises(TranscriptionError, match=f"Transcription timed out after {api.config.timeout_minutes} minutes"):
             api.wait_for_task_result("task123")
@@ -163,9 +170,9 @@ def test_wait_for_task_result_logs_status(self, mock_sleep, mock_time, mock_get,
         """Test that task polling logs status periodically"""
         mock_time.side_effect = [0, 30, 61, 90]  # Simulate time passing
         mock_get.side_effect = [
-            Mock(json=lambda: {"id": "task123", "targets": [{"model": "alignment", "status": "processing"}]}),
-            Mock(json=lambda: {"id": "task123", "targets": [{"model": "alignment", "status": "processing"}]}),
-            Mock(json=lambda: {"id": "task123", "targets": [{"model": "alignment", "status": "completed"}], "data": "test"}),
+            Mock(json=lambda: [{"id": "task123", "targets": [{"model": "alignment", "status": "processing"}]}]),
+            Mock(json=lambda: [{"id": "task123", "targets": [{"model": "alignment", "status": "processing"}]}]),
+            Mock(json=lambda: [{"id": "task123", "targets": [{"model": "alignment", "status": "completed"}], "data": "test"}]),
         ]
 
         result = api.wait_for_task_result("task123")