Skip to content

Commit 37d43ea

Browse files
committed
added async minimal client example
1 parent d8c99d2 commit 37d43ea

File tree

4 files changed

+78
-4
lines changed

4 files changed

+78
-4
lines changed

datacrunch/InferenceClient/inference_client.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,19 @@
66
from urllib.parse import urlparse
77
from enum import Enum
88

9+
910
class InferenceClientError(Exception):
1011
"""Base exception for InferenceClient errors."""
1112
pass
1213

14+
1315
class AsyncStatus(int, Enum):
1416
Initialized = 0
1517
Queue = 1
1618
Inference = 2
1719
Completed = 3
1820

21+
1922
@dataclass_json(undefined=Undefined.EXCLUDE)
2023
@dataclass
2124
class InferenceResponse:
@@ -222,6 +225,22 @@ def _make_request(self, method: str, path: str, **kwargs) -> requests.Response:
222225
raise InferenceClientError(f"Request to {path} failed: {str(e)}")
223226

224227
def run_sync(self, data: Dict[str, Any], path: str = "", timeout_seconds: int = 60 * 5, headers: Optional[Dict[str, str]] = None, http_method: str = "POST", stream: bool = False):
228+
"""Make a synchronous request to the inference endpoint.
229+
230+
Args:
231+
data: The data payload to send with the request
232+
path: API endpoint path. Defaults to empty string.
233+
timeout_seconds: Request timeout in seconds. Defaults to 5 minutes.
234+
headers: Optional headers to include in the request
235+
http_method: HTTP method to use. Defaults to "POST".
236+
stream: Whether to stream the response. Defaults to False.
237+
238+
Returns:
239+
InferenceResponse: Object containing the response data.
240+
241+
Raises:
242+
InferenceClientError: If the request fails
243+
"""
225244
response = self._make_request(
226245
http_method, path, json=data, timeout_seconds=timeout_seconds, headers=headers, stream=stream)
227246

@@ -233,6 +252,23 @@ def run_sync(self, data: Dict[str, Any], path: str = "", timeout_seconds: int =
233252
)
234253

235254
def run(self, data: Dict[str, Any], path: str = "", timeout_seconds: int = 60 * 5, headers: Optional[Dict[str, str]] = None, http_method: str = "POST", no_response: bool = False):
255+
"""Make an asynchronous request to the inference endpoint.
256+
257+
Args:
258+
data: The data payload to send with the request
259+
path: API endpoint path. Defaults to empty string.
260+
timeout_seconds: Request timeout in seconds. Defaults to 5 minutes.
261+
headers: Optional headers to include in the request
262+
http_method: HTTP method to use. Defaults to "POST".
263+
no_response: If True, don't wait for response. Defaults to False.
264+
265+
Returns:
266+
AsyncInferenceExecution: Object to track the async execution status.
267+
If no_response is True, returns None.
268+
269+
Raises:
270+
InferenceClientError: If the request fails
271+
"""
236272
# Add relevant headers to the request, to indicate that the request is async
237273
headers = headers or {}
238274
if no_response:

examples/containers/calling_the_endpoint_asynchronously.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,16 @@
44
from datacrunch.InferenceClient.inference_client import AsyncStatus
55

66
# Configuration - replace with your deployment name
7-
DEPLOYMENT_NAME = "sglang-deployment-example-20250411-160652"
7+
DEPLOYMENT_NAME = os.environ.get('DATACRUNCH_DEPLOYMENT_NAME')
88

99
# Get client secret and id from environment variables
1010
DATACRUNCH_CLIENT_ID = os.environ.get('DATACRUNCH_CLIENT_ID')
1111
DATACRUNCH_CLIENT_SECRET = os.environ.get('DATACRUNCH_CLIENT_SECRET')
1212
DATACRUNCH_INFERENCE_KEY = os.environ.get('DATACRUNCH_INFERENCE_KEY')
1313

1414
# DataCrunch client instance
15-
datacrunch = DataCrunchClient(DATACRUNCH_CLIENT_ID, DATACRUNCH_CLIENT_SECRET, inference_key=DATACRUNCH_INFERENCE_KEY)
15+
datacrunch = DataCrunchClient(
16+
DATACRUNCH_CLIENT_ID, DATACRUNCH_CLIENT_SECRET, inference_key=DATACRUNCH_INFERENCE_KEY)
1617

1718
# Get the deployment
1819
deployment = datacrunch.containers.get_deployment_by_name(DEPLOYMENT_NAME)

examples/containers/calling_the_endpoint_synchronously.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,16 @@
22
from datacrunch import DataCrunchClient
33

44
# Configuration - replace with your deployment name
5-
DEPLOYMENT_NAME = "sglang-deployment-example-20250411-160652"
5+
DEPLOYMENT_NAME = os.environ.get('DATACRUNCH_DEPLOYMENT_NAME')
66

77
# Get client secret and id from environment variables
88
DATACRUNCH_CLIENT_ID = os.environ.get('DATACRUNCH_CLIENT_ID')
99
DATACRUNCH_CLIENT_SECRET = os.environ.get('DATACRUNCH_CLIENT_SECRET')
1010
DATACRUNCH_INFERENCE_KEY = os.environ.get('DATACRUNCH_INFERENCE_KEY')
1111

1212
# DataCrunch client instance
13-
datacrunch = DataCrunchClient(DATACRUNCH_CLIENT_ID, DATACRUNCH_CLIENT_SECRET, inference_key=DATACRUNCH_INFERENCE_KEY)
13+
datacrunch = DataCrunchClient(
14+
DATACRUNCH_CLIENT_ID, DATACRUNCH_CLIENT_SECRET, inference_key=DATACRUNCH_INFERENCE_KEY)
1415

1516
# Get the deployment
1617
deployment = datacrunch.containers.get_deployment_by_name(DEPLOYMENT_NAME)
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
import os
2+
from time import sleep
3+
from datacrunch.InferenceClient import InferenceClient
4+
from datacrunch.InferenceClient.inference_client import AsyncStatus
5+
6+
# Get inference key and endpoint base url from environment variables
7+
DATACRUNCH_INFERENCE_KEY = os.environ.get('DATACRUNCH_INFERENCE_KEY')
8+
DATACRUNCH_ENDPOINT_BASE_URL = os.environ.get('DATACRUNCH_ENDPOINT_BASE_URL')
9+
10+
# Create an inference client that uses only the inference key, without client credentials
11+
inference_client = InferenceClient(
12+
inference_key=DATACRUNCH_INFERENCE_KEY,
13+
endpoint_base_url=DATACRUNCH_ENDPOINT_BASE_URL
14+
)
15+
16+
# Make an asynchronous request to the endpoint
17+
# This example demonstrates calling a SGLang deployment which serves LLMs using an OpenAI-compatible API format
18+
data = {
19+
"model": "deepseek-ai/deepseek-llm-7b-chat",
20+
"prompt": "Is consciousness fundamentally computational, or is there something more to subjective experience that cannot be reduced to information processing?",
21+
"max_tokens": 128,
22+
"temperature": 0.7,
23+
"top_p": 0.9
24+
}
25+
26+
# Run the request asynchronously using the inference client
27+
async_inference_execution = inference_client.run(
28+
data=data, path='v1/completions')
29+
30+
# Poll for status until completion
31+
while async_inference_execution.status() != AsyncStatus.Completed:
32+
print(async_inference_execution.status_json())
33+
sleep(1)
34+
35+
# Print the response
36+
print(async_inference_execution.output())

0 commit comments

Comments
 (0)