From a57f1995e3bac3014d59c677261fbe66fc66e9f4 Mon Sep 17 00:00:00 2001 From: Georgios Hadjiharalambous Date: Fri, 13 Mar 2026 18:08:16 +0000 Subject: [PATCH] add user_id as input to header too --- sdk/batch/speechmatics/batch/_async_client.py | 27 +++++- tests/batch/test_submit_job.py | 86 ++++++++++++++++++- 2 files changed, 109 insertions(+), 4 deletions(-) diff --git a/sdk/batch/speechmatics/batch/_async_client.py b/sdk/batch/speechmatics/batch/_async_client.py index e80017d..873218b 100644 --- a/sdk/batch/speechmatics/batch/_async_client.py +++ b/sdk/batch/speechmatics/batch/_async_client.py @@ -141,6 +141,7 @@ async def submit_job( config: Optional[JobConfig] = None, transcription_config: Optional[TranscriptionConfig] = None, parallel_engines: Optional[int] = None, + user_id: Optional[str] = None, ) -> JobDetails: """ Submit a new transcription job. @@ -159,6 +160,9 @@ async def submit_job( parallel_engines: Optional number of parallel engines to request for this job. Sent as ``{"parallel_engines": N}`` in the ``X-SM-Processing-Data`` header. This only applies when using the container onPrem on http batch mode. + user_id: Optional user identifier to associate with this job. + Sent as ``{"user_id": "..."}`` in the ``X-SM-Processing-Data`` header. + This only applies when using the container onPrem on http batch mode. Returns: JobDetails object containing the job ID and initial status. @@ -205,7 +209,9 @@ async def submit_job( assert audio_file is not None # for type checker; validated above multipart_data, filename = await self._prepare_file_submission(audio_file, config_dict) - return await self._submit_and_create_job_details(multipart_data, filename, config, parallel_engines) + return await self._submit_and_create_job_details( + multipart_data, filename, config, parallel_engines, user_id + ) except Exception as e: if isinstance(e, (AuthenticationError, BatchError)): raise @@ -441,6 +447,7 @@ async def transcribe( polling_interval: float = 5.0, timeout: Optional[float] = None, parallel_engines: Optional[int] = None, + user_id: Optional[str] = None, ) -> Union[Transcript, str]: """ Complete transcription workflow: submit job and wait for completion. @@ -457,6 +464,9 @@ async def transcribe( parallel_engines: Optional number of parallel engines to request for this job. Sent as ``{"parallel_engines": N}`` in the ``X-SM-Processing-Data`` header. This only applies when using the container onPrem on http batch mode. + user_id: Optional user identifier to associate with this job. + Sent as ``{"user_id": "..."}`` in the ``X-SM-Processing-Data`` header. + This only applies when using the container onPrem on http batch mode. Returns: Transcript object containing the transcript and metadata. @@ -485,6 +495,7 @@ async def transcribe( config=config, transcription_config=transcription_config, parallel_engines=parallel_engines, + user_id=user_id, ) # Wait for completion and return result @@ -538,12 +549,22 @@ async def _prepare_file_submission(self, audio_file: Union[str, BinaryIO], confi return multipart_data, filename async def _submit_and_create_job_details( - self, multipart_data: dict, filename: str, config: JobConfig, parallel_engines: Optional[int] = None + self, + multipart_data: dict, + filename: str, + config: JobConfig, + parallel_engines: Optional[int] = None, + user_id: Optional[str] = None, ) -> JobDetails: """Submit job and create JobDetails response.""" extra_headers: Optional[dict[str, Any]] = None + processing_data: dict[str, Any] = {} if parallel_engines is not None: - extra_headers = {PROCESSING_DATA_HEADER: {"parallel_engines": parallel_engines}} + processing_data["parallel_engines"] = parallel_engines + if user_id is not None: + processing_data["user_id"] = user_id + if processing_data: + extra_headers = {PROCESSING_DATA_HEADER: processing_data} response = await self._transport.post("/jobs", multipart_data=multipart_data, extra_headers=extra_headers) job_id = response.get("id") if not job_id: diff --git a/tests/batch/test_submit_job.py b/tests/batch/test_submit_job.py index c73f984..bbbf364 100644 --- a/tests/batch/test_submit_job.py +++ b/tests/batch/test_submit_job.py @@ -1,4 +1,4 @@ -"""Unit tests for AsyncClient.submit_job, focusing on the parallel_engines feature.""" +"""Unit tests for AsyncClient.submit_job, focusing on the parallel engines and user_id features.""" import json from io import BytesIO @@ -127,6 +127,90 @@ async def test_header_sent_with_fetch_data_config(self): assert payload == {"parallel_engines": 2} +class TestUserIdHeader: + """X-SM-Processing-Data header is set correctly based on user_id.""" + + @pytest.mark.asyncio + async def test_header_sent_when_user_id_provided(self): + client = _make_client() + audio = BytesIO(b"fake-audio") + + with patch.object(client._transport, "post", new_callable=AsyncMock) as mock_post: + mock_post.return_value = _job_response() + await client.submit_job(audio, user_id="user-abc") + + extra_headers = _captured_extra_headers(mock_post) + assert extra_headers is not None + assert PROCESSING_DATA_HEADER in extra_headers + payload = extra_headers[PROCESSING_DATA_HEADER] + assert payload == {"user_id": "user-abc"} + + @pytest.mark.asyncio + async def test_header_not_sent_when_user_id_is_none(self): + client = _make_client() + audio = BytesIO(b"fake-audio") + + with patch.object(client._transport, "post", new_callable=AsyncMock) as mock_post: + mock_post.return_value = _job_response() + await client.submit_job(audio) + + extra_headers = _captured_extra_headers(mock_post) + assert extra_headers is None + + @pytest.mark.asyncio + async def test_user_id_and_parallel_engines_sent_together(self): + client = _make_client() + audio = BytesIO(b"fake-audio") + + with patch.object(client._transport, "post", new_callable=AsyncMock) as mock_post: + mock_post.return_value = _job_response() + await client.submit_job(audio, parallel_engines=4, user_id="user-xyz") + + extra_headers = _captured_extra_headers(mock_post) + assert extra_headers is not None + payload = extra_headers[PROCESSING_DATA_HEADER] + assert payload == {"parallel_engines": 4, "user_id": "user-xyz"} + + @pytest.mark.asyncio + async def test_user_id_does_not_appear_when_only_parallel_engines_set(self): + client = _make_client() + audio = BytesIO(b"fake-audio") + + with patch.object(client._transport, "post", new_callable=AsyncMock) as mock_post: + mock_post.return_value = _job_response() + await client.submit_job(audio, parallel_engines=2) + + payload = _captured_extra_headers(mock_post)[PROCESSING_DATA_HEADER] + assert "user_id" not in payload + + @pytest.mark.asyncio + async def test_parallel_engines_does_not_appear_when_only_user_id_set(self): + client = _make_client() + audio = BytesIO(b"fake-audio") + + with patch.object(client._transport, "post", new_callable=AsyncMock) as mock_post: + mock_post.return_value = _job_response() + await client.submit_job(audio, user_id="u1") + + payload = _captured_extra_headers(mock_post)[PROCESSING_DATA_HEADER] + assert "parallel_engines" not in payload + + @pytest.mark.asyncio + async def test_user_id_forwarded_from_transcribe(self): + client = _make_client() + audio = BytesIO(b"fake-audio") + + with patch.object(client._transport, "post", new_callable=AsyncMock) as mock_post: + mock_post.return_value = _job_response() + with patch.object(client, "wait_for_completion", new_callable=AsyncMock) as mock_wait: + mock_wait.return_value = MagicMock() + await client.transcribe(audio, user_id="transcribe-user") + + extra_headers = _captured_extra_headers(mock_post) + assert extra_headers is not None + assert extra_headers[PROCESSING_DATA_HEADER]["user_id"] == "transcribe-user" + + class TestSubmitJobReturnValue: """submit_job still returns the correct JobDetails regardless of parallel_engines."""