Skip to content
This repository was archived by the owner on Jan 19, 2026. It is now read-only.

Commit be4e14e

Browse files
committed
Merge remote-tracking branch 'origin/main' into 001-agentic-ai-corrector
2 parents 727a15a + 86e9382 commit be4e14e

5 files changed

Lines changed: 721 additions & 562 deletions

File tree

.github/workflows/test-and-publish.yml

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,18 @@ jobs:
1717

1818
steps:
1919
- uses: actions/checkout@v3
20-
20+
21+
- name: Free Disk Space
22+
uses: jlumbroso/free-disk-space@main
23+
with:
24+
tool-cache: false
25+
android: true
26+
dotnet: false
27+
haskell: false
28+
large-packages: false
29+
docker-images: false
30+
swap-storage: false
31+
2132
- name: Set up Python ${{ matrix.python-version }}
2233
uses: actions/setup-python@v4
2334
with:

lyrics_transcriber/transcribers/audioshake.py

Lines changed: 110 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,9 @@ class AudioShakeConfig:
1414
"""Configuration for AudioShake transcription service."""
1515

1616
api_token: Optional[str] = None
17-
base_url: str = "https://groovy.audioshake.ai"
17+
base_url: str = "https://api.audioshake.ai"
1818
output_prefix: Optional[str] = None
19-
timeout_minutes: int = 10 # Added timeout configuration
19+
timeout_minutes: int = 20 # Added timeout configuration
2020

2121

2222
class AudioShakeAPI:
@@ -34,46 +34,52 @@ def _validate_config(self) -> None:
3434
def _get_headers(self) -> Dict[str, str]:
3535
"""Get headers for API requests."""
3636
self._validate_config() # Validate before making any API calls
37-
return {"Authorization": f"Bearer {self.config.api_token}", "Content-Type": "application/json"}
37+
return {"x-api-key": self.config.api_token, "Content-Type": "application/json"}
3838

3939
def upload_file(self, filepath: str) -> str:
40-
"""Upload audio file and return asset ID."""
40+
"""Upload audio file and return file URL."""
4141
self.logger.info(f"Uploading {filepath} to AudioShake")
4242
self._validate_config() # Validate before making API call
4343

4444
url = f"{self.config.base_url}/upload/"
4545
with open(filepath, "rb") as file:
4646
files = {"file": (os.path.basename(filepath), file)}
47-
response = requests.post(url, headers={"Authorization": self._get_headers()["Authorization"]}, files=files)
47+
response = requests.post(url, headers={"x-api-key": self.config.api_token}, files=files)
4848

4949
self.logger.debug(f"Upload response: {response.status_code} - {response.text}")
5050
response.raise_for_status()
51-
return response.json()["id"]
51+
return response.json()["link"]
5252

53-
def create_job(self, asset_id: str) -> str:
54-
"""Create transcription job and return job ID."""
55-
self.logger.info(f"Creating job for asset {asset_id}")
53+
def create_task(self, file_url: str) -> str:
54+
"""Create transcription task and return task ID."""
55+
self.logger.info(f"Creating task for file {file_url}")
5656

57-
url = f"{self.config.base_url}/job/"
57+
url = f"{self.config.base_url}/tasks"
5858
data = {
59-
"metadata": {"format": "json", "name": "alignment", "language": "en"},
60-
"callbackUrl": "https://example.com/webhook/alignment",
61-
"assetId": asset_id,
59+
"url": file_url,
60+
"targets": [
61+
{
62+
"model": "alignment",
63+
"formats": ["json"],
64+
"language": "en"
65+
}
66+
],
6267
}
6368
response = requests.post(url, headers=self._get_headers(), json=data)
6469
response.raise_for_status()
65-
return response.json()["job"]["id"]
70+
return response.json()["id"]
6671

67-
def wait_for_job_result(self, job_id: str) -> Dict[str, Any]:
68-
"""Poll for job completion and return results."""
69-
self.logger.info(f"Getting job result for job {job_id}")
72+
def wait_for_task_result(self, task_id: str) -> Dict[str, Any]:
73+
"""Poll for task completion and return results."""
74+
self.logger.info(f"Getting task result for task {task_id}")
7075

71-
url = f"{self.config.base_url}/job/{job_id}"
76+
# Use the list endpoint which has fresh data, not the individual task endpoint which caches
77+
url = f"{self.config.base_url}/tasks"
7278
start_time = time.time()
7379
last_status_log = start_time
7480
timeout_seconds = self.config.timeout_minutes * 60
7581

76-
# Add initial retry logic for 404 errors (job ID not yet available)
82+
# Add initial retry logic for when task is not found yet
7783
initial_retry_count = 0
7884
max_initial_retries = 5
7985
initial_retry_delay = 2 # seconds
@@ -94,28 +100,57 @@ def wait_for_job_result(self, job_id: str) -> Dict[str, Any]:
94100
try:
95101
response = requests.get(url, headers=self._get_headers())
96102
response.raise_for_status()
97-
job_data = response.json()["job"]
103+
tasks_list = response.json()
104+
105+
# Find our specific task in the list
106+
task_data = None
107+
for task in tasks_list:
108+
if task.get("id") == task_id:
109+
task_data = task
110+
break
111+
112+
if not task_data:
113+
# Task not found in list yet
114+
if initial_retry_count < max_initial_retries:
115+
initial_retry_count += 1
116+
self.logger.info(f"Task not found in list yet (attempt {initial_retry_count}/{max_initial_retries}), retrying in {initial_retry_delay} seconds...")
117+
time.sleep(initial_retry_delay)
118+
continue
119+
else:
120+
raise TranscriptionError(f"Task {task_id} not found in task list after {max_initial_retries} retries")
121+
122+
# Log the full response for debugging
123+
self.logger.debug(f"Task status response: {task_data}")
98124

99-
if job_data["status"] == "completed":
100-
return job_data
101-
elif job_data["status"] == "failed":
102-
raise TranscriptionError(f"Job failed: {job_data.get('error', 'Unknown error')}")
125+
# Check status of targets (not the task itself)
126+
targets = task_data.get("targets", [])
127+
if not targets:
128+
raise TranscriptionError("No targets found in task response")
129+
130+
# Check if all targets are completed or if any failed
131+
all_completed = True
132+
for target in targets:
133+
target_status = target.get("status")
134+
target_model = target.get("model")
135+
self.logger.debug(f"Target {target_model} status: {target_status}")
136+
137+
if target_status == "failed":
138+
error_msg = target.get("error", "Unknown error")
139+
raise TranscriptionError(f"Target {target_model} failed: {error_msg}")
140+
elif target_status != "completed":
141+
all_completed = False
142+
143+
if all_completed:
144+
self.logger.info("All targets completed successfully")
145+
return task_data
103146

104147
# Reset retry count on successful response
105148
initial_retry_count = 0
106149

107150
except requests.exceptions.HTTPError as e:
108-
if e.response.status_code == 404 and initial_retry_count < max_initial_retries:
109-
# Job ID not yet available, retry with delay
110-
initial_retry_count += 1
111-
self.logger.info(f"Job ID not yet available (attempt {initial_retry_count}/{max_initial_retries}), retrying in {initial_retry_delay} seconds...")
112-
time.sleep(initial_retry_delay)
113-
continue
114-
else:
115-
# Re-raise the error if it's not a 404 or we've exceeded retries
116-
raise
151+
raise
117152

118-
time.sleep(5) # Wait before next poll
153+
time.sleep(30) # Wait before next poll
119154

120155

121156
class AudioShakeTranscriber(BaseTranscriber):
@@ -142,13 +177,13 @@ def _perform_transcription(self, audio_filepath: str) -> TranscriptionData:
142177
self.logger.info(f"Starting transcription for {audio_filepath}")
143178

144179
try:
145-
# Start job and get results
180+
# Start task and get results
146181
self.logger.debug("Calling start_transcription()")
147-
job_id = self.start_transcription(audio_filepath)
148-
self.logger.debug(f"Got job_id: {job_id}")
182+
task_id = self.start_transcription(audio_filepath)
183+
self.logger.debug(f"Got task_id: {task_id}")
149184

150185
self.logger.debug("Calling get_transcription_result()")
151-
result = self.get_transcription_result(job_id)
186+
result = self.get_transcription_result(task_id)
152187
self.logger.debug("Got transcription result")
153188

154189
return result
@@ -157,46 +192,61 @@ def _perform_transcription(self, audio_filepath: str) -> TranscriptionData:
157192
raise
158193

159194
def start_transcription(self, audio_filepath: str) -> str:
160-
"""Starts the transcription job and returns the job ID."""
195+
"""Starts the transcription task and returns the task ID."""
161196
self.logger.debug(f"Entering start_transcription() for {audio_filepath}")
162197

163-
# Upload file and create job
164-
asset_id = self.api.upload_file(audio_filepath)
165-
self.logger.debug(f"File uploaded successfully. Asset ID: {asset_id}")
198+
# Upload file and create task
199+
file_url = self.api.upload_file(audio_filepath)
200+
self.logger.debug(f"File uploaded successfully. File URL: {file_url}")
166201

167-
job_id = self.api.create_job(asset_id)
168-
self.logger.debug(f"Job created successfully. Job ID: {job_id}")
202+
task_id = self.api.create_task(file_url)
203+
self.logger.debug(f"Task created successfully. Task ID: {task_id}")
169204

170-
return job_id
205+
return task_id
171206

172-
def get_transcription_result(self, job_id: str) -> Dict[str, Any]:
173-
"""Gets the raw results for a previously started job."""
174-
self.logger.debug(f"Entering get_transcription_result() for job ID: {job_id}")
207+
def get_transcription_result(self, task_id: str) -> Dict[str, Any]:
208+
"""Gets the raw results for a previously started task."""
209+
self.logger.debug(f"Entering get_transcription_result() for task ID: {task_id}")
175210

176-
# Wait for job completion
177-
job_data = self.api.wait_for_job_result(job_id)
178-
self.logger.debug("Job completed. Getting results...")
211+
# Wait for task completion
212+
task_data = self.api.wait_for_task_result(task_id)
213+
self.logger.debug("Task completed. Getting results...")
179214

180-
output_asset = next((asset for asset in job_data.get("outputAssets", []) if asset["name"] == "alignment.json"), None)
181-
if not output_asset:
182-
raise TranscriptionError("Required output not found in job results")
215+
# Find the alignment target output
216+
alignment_target = None
217+
for target in task_data.get("targets", []):
218+
if target.get("model") == "alignment":
219+
alignment_target = target
220+
break
221+
222+
if not alignment_target:
223+
raise TranscriptionError("Required output not found in task results")
224+
225+
# Get the output file URL
226+
output = alignment_target.get("output", [])
227+
if not output:
228+
raise TranscriptionError("No output found in alignment target")
229+
230+
output_url = output[0].get("link")
231+
if not output_url:
232+
raise TranscriptionError("Output link not found in alignment target")
183233

184234
# Fetch transcription data
185-
response = requests.get(output_asset["link"])
235+
response = requests.get(output_url)
186236
response.raise_for_status()
187237

188238
# Return combined raw data
189-
raw_data = {"job_data": job_data, "transcription": response.json()}
239+
raw_data = {"task_data": task_data, "transcription": response.json()}
190240

191241
self.logger.debug("Raw results retrieved successfully")
192242
return raw_data
193243

194244
def _convert_result_format(self, raw_data: Dict[str, Any]) -> TranscriptionData:
195245
"""Process raw Audioshake API response into standard format."""
196-
self.logger.debug(f"Processing result for job {raw_data['job_data']['id']}")
246+
self.logger.debug(f"Processing result for task {raw_data['task_data']['id']}")
197247

198248
transcription_data = raw_data["transcription"]
199-
job_data = raw_data["job_data"]
249+
task_data = raw_data["task_data"]
200250

201251
segments = []
202252
all_words = [] # Collect all words across segments
@@ -230,8 +280,8 @@ def _convert_result_format(self, raw_data: Dict[str, Any]) -> TranscriptionData:
230280
source=self.get_name(),
231281
metadata={
232282
"language": transcription_data.get("metadata", {}).get("language"),
233-
"job_id": job_data["id"],
234-
"duration": job_data.get("statusInfo", {}).get("duration"),
283+
"task_id": task_data["id"],
284+
"duration": task_data.get("duration"),
235285
},
236286
)
237287

0 commit comments

Comments
 (0)