added async minimal client example

tamirse · tamirse · commit 37d43eae5319 · 2025-04-25T08:09:28.000+03:00
diff --git a/datacrunch/InferenceClient/inference_client.py b/datacrunch/InferenceClient/inference_client.py
@@ -6,16 +6,19 @@
 from urllib.parse import urlparse
 from enum import Enum
 
+
 class InferenceClientError(Exception):
     """Base exception for InferenceClient errors."""
     pass
 
+
 class AsyncStatus(int, Enum):
     Initialized = 0
     Queue = 1
     Inference = 2
     Completed = 3
 
+
 @dataclass_json(undefined=Undefined.EXCLUDE)
 @dataclass
 class InferenceResponse:
@@ -222,6 +225,22 @@ def _make_request(self, method: str, path: str, **kwargs) -> requests.Response:
             raise InferenceClientError(f"Request to {path} failed: {str(e)}")
 
     def run_sync(self, data: Dict[str, Any], path: str = "", timeout_seconds: int = 60 * 5, headers: Optional[Dict[str, str]] = None, http_method: str = "POST", stream: bool = False):
+        """Make a synchronous request to the inference endpoint.
+
+        Args:
+            data: The data payload to send with the request
+            path: API endpoint path. Defaults to empty string.
+            timeout_seconds: Request timeout in seconds. Defaults to 5 minutes.
+            headers: Optional headers to include in the request
+            http_method: HTTP method to use. Defaults to "POST".
+            stream: Whether to stream the response. Defaults to False.
+
+        Returns:
+            InferenceResponse: Object containing the response data.
+
+        Raises:
+            InferenceClientError: If the request fails
+        """
         response = self._make_request(
             http_method, path, json=data, timeout_seconds=timeout_seconds, headers=headers, stream=stream)
 
@@ -233,6 +252,23 @@ def run_sync(self, data: Dict[str, Any], path: str = "", timeout_seconds: int =
         )
 
     def run(self, data: Dict[str, Any], path: str = "", timeout_seconds: int = 60 * 5, headers: Optional[Dict[str, str]] = None, http_method: str = "POST", no_response: bool = False):
+        """Make an asynchronous request to the inference endpoint.
+
+        Args:
+            data: The data payload to send with the request
+            path: API endpoint path. Defaults to empty string.
+            timeout_seconds: Request timeout in seconds. Defaults to 5 minutes.
+            headers: Optional headers to include in the request
+            http_method: HTTP method to use. Defaults to "POST".
+            no_response: If True, don't wait for response. Defaults to False.
+
+        Returns:
+            AsyncInferenceExecution: Object to track the async execution status.
+            If no_response is True, returns None.
+
+        Raises:
+            InferenceClientError: If the request fails
+        """
         # Add relevant headers to the request, to indicate that the request is async
         headers = headers or {}
         if no_response:
diff --git a/examples/containers/calling_the_endpoint_asynchronously.py b/examples/containers/calling_the_endpoint_asynchronously.py
@@ -4,15 +4,16 @@
 from datacrunch.InferenceClient.inference_client import AsyncStatus
 
 # Configuration - replace with your deployment name
-DEPLOYMENT_NAME = "sglang-deployment-example-20250411-160652"
+DEPLOYMENT_NAME = os.environ.get('DATACRUNCH_DEPLOYMENT_NAME')
 
 # Get client secret and id from environment variables
 DATACRUNCH_CLIENT_ID = os.environ.get('DATACRUNCH_CLIENT_ID')
 DATACRUNCH_CLIENT_SECRET = os.environ.get('DATACRUNCH_CLIENT_SECRET')
 DATACRUNCH_INFERENCE_KEY = os.environ.get('DATACRUNCH_INFERENCE_KEY')
 
 # DataCrunch client instance
-datacrunch = DataCrunchClient(DATACRUNCH_CLIENT_ID, DATACRUNCH_CLIENT_SECRET, inference_key=DATACRUNCH_INFERENCE_KEY)
+datacrunch = DataCrunchClient(
+    DATACRUNCH_CLIENT_ID, DATACRUNCH_CLIENT_SECRET, inference_key=DATACRUNCH_INFERENCE_KEY)
 
 # Get the deployment
 deployment = datacrunch.containers.get_deployment_by_name(DEPLOYMENT_NAME)
diff --git a/examples/containers/calling_the_endpoint_synchronously.py b/examples/containers/calling_the_endpoint_synchronously.py
@@ -2,15 +2,16 @@
 from datacrunch import DataCrunchClient
 
 # Configuration - replace with your deployment name
-DEPLOYMENT_NAME = "sglang-deployment-example-20250411-160652"
+DEPLOYMENT_NAME = os.environ.get('DATACRUNCH_DEPLOYMENT_NAME')
 
 # Get client secret and id from environment variables
 DATACRUNCH_CLIENT_ID = os.environ.get('DATACRUNCH_CLIENT_ID')
 DATACRUNCH_CLIENT_SECRET = os.environ.get('DATACRUNCH_CLIENT_SECRET')
 DATACRUNCH_INFERENCE_KEY = os.environ.get('DATACRUNCH_INFERENCE_KEY')
 
 # DataCrunch client instance
-datacrunch = DataCrunchClient(DATACRUNCH_CLIENT_ID, DATACRUNCH_CLIENT_SECRET, inference_key=DATACRUNCH_INFERENCE_KEY)
+datacrunch = DataCrunchClient(
+    DATACRUNCH_CLIENT_ID, DATACRUNCH_CLIENT_SECRET, inference_key=DATACRUNCH_INFERENCE_KEY)
 
 # Get the deployment
 deployment = datacrunch.containers.get_deployment_by_name(DEPLOYMENT_NAME)
diff --git a/examples/containers/calling_the_endpoint_with_inference_key_async.py b/examples/containers/calling_the_endpoint_with_inference_key_async.py
@@ -0,0 +1,36 @@
+import os
+from time import sleep
+from datacrunch.InferenceClient import InferenceClient
+from datacrunch.InferenceClient.inference_client import AsyncStatus
+
+# Get inference key and endpoint base url from environment variables
+DATACRUNCH_INFERENCE_KEY = os.environ.get('DATACRUNCH_INFERENCE_KEY')
+DATACRUNCH_ENDPOINT_BASE_URL = os.environ.get('DATACRUNCH_ENDPOINT_BASE_URL')
+
+# Create an inference client that uses only the inference key, without client credentials
+inference_client = InferenceClient(
+    inference_key=DATACRUNCH_INFERENCE_KEY,
+    endpoint_base_url=DATACRUNCH_ENDPOINT_BASE_URL
+)
+
+# Make an asynchronous request to the endpoint
+# This example demonstrates calling a SGLang deployment which serves LLMs using an OpenAI-compatible API format
+data = {
+    "model": "deepseek-ai/deepseek-llm-7b-chat",
+    "prompt": "Is consciousness fundamentally computational, or is there something more to subjective experience that cannot be reduced to information processing?",
+    "max_tokens": 128,
+    "temperature": 0.7,
+    "top_p": 0.9
+}
+
+# Run the request asynchronously using the inference client
+async_inference_execution = inference_client.run(
+    data=data, path='v1/completions')
+
+# Poll for status until completion
+while async_inference_execution.status() != AsyncStatus.Completed:
+    print(async_inference_execution.status_json())
+    sleep(1)
+
+# Print the response
+print(async_inference_execution.output())