Skip to content

Commit 262013b

Browse files
feat(documents): add support for start ixp extraction from attachment
1 parent 46a7a35 commit 262013b

3 files changed

Lines changed: 185 additions & 14 deletions

File tree

src/uipath/platform/attachments/attachments.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ class AttachmentMode(str, Enum):
1616

1717

1818
class Attachment(BaseModel):
19-
"""Model representing an attachment. Id 'None' is used for uploads."""
19+
"""Model representing an attachment."""
2020

2121
id: uuid.UUID = Field(..., alias="ID")
2222
full_name: str = Field(..., alias="FullName")

src/uipath/platform/documents/_documents_service.py

Lines changed: 109 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
from ..._utils import Endpoint, resource_override
1010
from ...tracing import traced
11+
from ..attachments import Attachment
1112
from ..common import BaseService, FolderContext, UiPathApiConfig, UiPathExecutionContext
1213
from ..errors import OperationFailedException, OperationNotCompleteException
1314
from .documents import (
@@ -207,6 +208,7 @@ def _get_document_id(
207208
project_id=project_id,
208209
file=file,
209210
file_path=file_path,
211+
attachment=None,
210212
)
211213
self._wait_for_digitization(
212214
project_id=project_id,
@@ -229,6 +231,7 @@ async def _get_document_id_async(
229231
project_id=project_id,
230232
file=file,
231233
file_path=file_path,
234+
attachment=None,
232235
)
233236
await self._wait_for_digitization_async(
234237
project_id=project_id,
@@ -289,11 +292,61 @@ async def _get_project_id_and_tag_async(
289292

290293
return project_id, tag
291294

292-
def _start_digitization(
295+
def _start_digitization_from_attachment(
293296
self,
294297
project_id: str,
295-
file: Optional[FileContent] = None,
296-
file_path: Optional[str] = None,
298+
attachment: Attachment,
299+
) -> str:
300+
return self.request(
301+
"POST",
302+
url=Endpoint(
303+
f"/du_/api/framework/projects/{project_id}/digitization/startFromJobAttachment"
304+
),
305+
params={"api-version": 1.1},
306+
headers=self._get_common_headers(),
307+
json={
308+
"attachmentId": str(attachment.id),
309+
"fileName": attachment.full_name,
310+
"mimeType": attachment.mime_type,
311+
"folderId": str(
312+
UUID(
313+
int=0
314+
) # temporary workaround until backend supports null folderId
315+
),
316+
},
317+
).json()["documentId"]
318+
319+
async def _start_digitization_from_attachment_async(
320+
self,
321+
project_id: str,
322+
attachment: Attachment,
323+
) -> str:
324+
return (
325+
await self.request_async(
326+
"POST",
327+
url=Endpoint(
328+
f"/du_/api/framework/projects/{project_id}/digitization/startFromJobAttachment"
329+
),
330+
params={"api-version": 1.1},
331+
headers=self._get_common_headers(),
332+
json={
333+
"attachmentId": str(attachment.id),
334+
"fileName": attachment.full_name,
335+
"mimeType": attachment.mime_type,
336+
"folderId": str(
337+
UUID(
338+
int=0
339+
) # temporary workaround until backend supports null folderId
340+
),
341+
},
342+
)
343+
).json()["documentId"]
344+
345+
def _start_digitization_from_file(
346+
self,
347+
project_id: str,
348+
file: Optional[FileContent],
349+
file_path: Optional[str],
297350
) -> str:
298351
with open(Path(file_path), "rb") if file_path else nullcontext(file) as handle:
299352
return self.request(
@@ -306,11 +359,11 @@ def _start_digitization(
306359
files={"File": handle},
307360
).json()["documentId"]
308361

309-
async def _start_digitization_async(
362+
async def _start_digitization_from_file_async(
310363
self,
311364
project_id: str,
312-
file: Optional[FileContent] = None,
313-
file_path: Optional[str] = None,
365+
file: Optional[FileContent],
366+
file_path: Optional[str],
314367
) -> str:
315368
with open(Path(file_path), "rb") if file_path else nullcontext(file) as handle:
316369
return (
@@ -325,6 +378,44 @@ async def _start_digitization_async(
325378
)
326379
).json()["documentId"]
327380

381+
def _start_digitization(
382+
self,
383+
project_id: str,
384+
file: Optional[FileContent],
385+
file_path: Optional[str],
386+
attachment: Optional[Attachment],
387+
) -> str:
388+
if attachment is not None:
389+
return self._start_digitization_from_attachment(
390+
project_id=project_id,
391+
attachment=attachment,
392+
)
393+
else:
394+
return self._start_digitization_from_file(
395+
project_id=project_id,
396+
file=file,
397+
file_path=file_path,
398+
)
399+
400+
async def _start_digitization_async(
401+
self,
402+
project_id: str,
403+
file: Optional[FileContent],
404+
file_path: Optional[str],
405+
attachment: Optional[Attachment],
406+
) -> str:
407+
if attachment is not None:
408+
return await self._start_digitization_from_attachment_async(
409+
project_id=project_id,
410+
attachment=attachment,
411+
)
412+
else:
413+
return await self._start_digitization_from_file_async(
414+
project_id=project_id,
415+
file=file,
416+
file_path=file_path,
417+
)
418+
328419
def _wait_for_digitization(self, project_id: str, document_id: str) -> None:
329420
def result_getter() -> Tuple[str, Optional[str], Optional[str]]:
330421
result = self.request(
@@ -917,6 +1008,7 @@ def start_ixp_extraction(
9171008
tag: str,
9181009
file: Optional[FileContent] = None,
9191010
file_path: Optional[str] = None,
1011+
attachment: Optional[Attachment] = None,
9201012
) -> StartExtractionResponse:
9211013
"""Start an IXP extraction process without waiting for results (non-blocking).
9221014
@@ -929,9 +1021,10 @@ def start_ixp_extraction(
9291021
tag (str): Tag of the published project version (e.g., "staging").
9301022
file (FileContent, optional): The document file to be processed.
9311023
file_path (str, optional): Path to the document file to be processed.
1024+
attachment (Attachment, optional): An existing attachment to use for digitization.
9321025
9331026
Note:
934-
Either `file` or `file_path` must be provided, but not both.
1027+
Either `file`, `file_path` or `attachment` must be provided, but not more than one.
9351028
9361029
Returns:
9371030
ExtractionStartResponse: Contains the operation_id, document_id, project_id, and tag
@@ -946,14 +1039,14 @@ def start_ixp_extraction(
9461039
# start_response.operation_id can be used to poll for results later
9471040
```
9481041
"""
949-
_exactly_one_must_be_provided(file=file, file_path=file_path)
1042+
_exactly_one_must_be_provided(
1043+
file=file, file_path=file_path, attachment=attachment
1044+
)
9501045

9511046
project_id = self._get_project_id_by_name(project_name, ProjectType.IXP)
9521047

9531048
document_id = self._start_digitization(
954-
project_id=project_id,
955-
file=file,
956-
file_path=file_path,
1049+
project_id=project_id, file=file, file_path=file_path, attachment=attachment
9571050
)
9581051

9591052
return self._start_extraction(
@@ -971,9 +1064,12 @@ async def start_ixp_extraction_async(
9711064
tag: str,
9721065
file: Optional[FileContent] = None,
9731066
file_path: Optional[str] = None,
1067+
attachment: Optional[Attachment] = None,
9741068
) -> StartExtractionResponse:
9751069
"""Asynchronous version of the [`start_ixp_extraction`][uipath.platform.documents._documents_service.DocumentsService.start_ixp_extraction] method."""
976-
_exactly_one_must_be_provided(file=file, file_path=file_path)
1070+
_exactly_one_must_be_provided(
1071+
file=file, file_path=file_path, attachment=attachment
1072+
)
9771073

9781074
project_id = await self._get_project_id_by_name_async(
9791075
project_name, ProjectType.IXP
@@ -983,6 +1079,7 @@ async def start_ixp_extraction_async(
9831079
project_id=project_id,
9841080
file=file,
9851081
file_path=file_path,
1082+
attachment=attachment,
9861083
)
9871084

9881085
return await self._start_extraction_async(

tests/sdk/services/test_documents_service.py

Lines changed: 75 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,12 @@
44
from unittest.mock import Mock, patch
55
from uuid import UUID, uuid4
66

7+
import httpx
78
import pytest
89
from pytest_httpx import HTTPXMock
910

1011
from uipath.platform import UiPathApiConfig, UiPathExecutionContext
12+
from uipath.platform.attachments import Attachment
1113
from uipath.platform.documents import (
1214
ActionPriority,
1315
ClassificationResult,
@@ -2078,6 +2080,78 @@ async def test_start_ixp_extraction(
20782080
assert response.project_id == project_id
20792081
assert response.tag == "staging"
20802082

2083+
@pytest.mark.parametrize("mode", ["sync", "async"])
2084+
@pytest.mark.asyncio
2085+
async def test_start_ixp_extraction_using_attachment(
2086+
self,
2087+
httpx_mock: HTTPXMock,
2088+
service: DocumentsService,
2089+
base_url: str,
2090+
org: str,
2091+
tenant: str,
2092+
mode: str,
2093+
):
2094+
# ARRANGE
2095+
project_id = str(uuid4())
2096+
document_id = str(uuid4())
2097+
operation_id = str(uuid4())
2098+
attachment = Attachment(
2099+
ID=uuid4(), # type: ignore
2100+
FullName="alex.pdf",
2101+
MimeType="application/pdf",
2102+
)
2103+
2104+
httpx_mock.add_response(
2105+
url=f"{base_url}{org}{tenant}/du_/api/framework/projects?api-version=1.1&type=IXP",
2106+
status_code=200,
2107+
match_headers={"X-UiPath-Internal-ConsumptionSourceType": "CodedAgents"},
2108+
json={
2109+
"projects": [
2110+
{"id": project_id, "name": "TestProjectIXP"},
2111+
]
2112+
},
2113+
)
2114+
httpx_mock.add_response(
2115+
url=f"{base_url}{org}{tenant}/du_/api/framework/projects/{project_id}/digitization/startFromJobAttachment?api-version=1.1",
2116+
status_code=200,
2117+
match_headers={"X-UiPath-Internal-ConsumptionSourceType": "CodedAgents"},
2118+
match_json={
2119+
"attachmentId": str(attachment.id),
2120+
"fileName": attachment.full_name,
2121+
"mimeType": attachment.mime_type,
2122+
"folderId": str(UUID(int=0)),
2123+
},
2124+
json={"documentId": document_id},
2125+
)
2126+
httpx_mock.add_response(
2127+
method="POST",
2128+
url=f"{base_url}{org}{tenant}/du_/api/framework/projects/{project_id}/staging/document-types/{UUID(int=0)}/extraction/start?api-version=1.1",
2129+
status_code=200,
2130+
match_headers={"X-UiPath-Internal-ConsumptionSourceType": "CodedAgents"},
2131+
match_json={"documentId": document_id},
2132+
json={"operationId": operation_id},
2133+
)
2134+
2135+
# ACT
2136+
if mode == "async":
2137+
response = await service.start_ixp_extraction_async(
2138+
project_name="TestProjectIXP",
2139+
tag="staging",
2140+
attachment=attachment,
2141+
)
2142+
else:
2143+
response = service.start_ixp_extraction(
2144+
project_name="TestProjectIXP",
2145+
tag="staging",
2146+
attachment=attachment,
2147+
)
2148+
2149+
# ASSERT
2150+
assert response.operation_id == operation_id
2151+
assert response.document_id == document_id
2152+
assert response.project_id == project_id
2153+
assert response.tag == "staging"
2154+
20812155
@pytest.mark.parametrize("mode", ["sync", "async"])
20822156
@pytest.mark.asyncio
20832157
async def test_start_ixp_extraction_invalid_parameters(
@@ -2088,7 +2162,7 @@ async def test_start_ixp_extraction_invalid_parameters(
20882162
# ACT & ASSERT
20892163
with pytest.raises(
20902164
ValueError,
2091-
match="Exactly one of `file, file_path` must be provided",
2165+
match="Exactly one of `file, file_path, attachment` must be provided",
20922166
):
20932167
if mode == "async":
20942168
await service.start_ixp_extraction_async(

0 commit comments

Comments
 (0)