Skip to content

Commit 35bc29d

Browse files
committed
created a sync inference example, improved sglang example
1 parent ba00acb commit 35bc29d

File tree

2 files changed

+86
-96
lines changed

2 files changed

+86
-96
lines changed
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
import os
2+
from datacrunch import DataCrunchClient
3+
4+
# Configuration - replace with your deployment name
5+
DEPLOYMENT_NAME = "sglang-deployment-example"
6+
7+
# Get client secret and id from environment variables
8+
DATACRUNCH_CLIENT_ID = os.environ.get('DATACRUNCH_CLIENT_ID')
9+
DATACRUNCH_CLIENT_SECRET = os.environ.get('DATACRUNCH_CLIENT_SECRET')
10+
DATACRUNCH_INFERENCE_KEY = os.environ.get('DATACRUNCH_INFERENCE_KEY')
11+
12+
# DataCrunch client instance
13+
datacrunch = DataCrunchClient(DATACRUNCH_CLIENT_ID, DATACRUNCH_CLIENT_SECRET)
14+
15+
# Get the deployment
16+
deployment = datacrunch.containers.get_deployment_by_name(DEPLOYMENT_NAME)
17+
18+
# Make a synchronous request to the endpoint.
19+
# This example demonstrates calling a SGLang deployment which serves LLMs using an OpenAI-compatible API format
20+
data = {
21+
"model": "deepseek-ai/deepseek-llm-7b-chat",
22+
"prompt": "Is consciousness fundamentally computational, or is there something more to subjective experience that cannot be reduced to information processing?",
23+
"max_tokens": 128,
24+
"temperature": 0.7,
25+
"top_p": 0.9
26+
}
27+
response = deployment.run_sync(
28+
data=data,
29+
path='v1/completions'
30+
) # wait for the response
31+
32+
# Print the response
33+
print(response.body)

examples/containers/sglang_deployment_example.py

Lines changed: 53 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,7 @@
88
import time
99
import signal
1010
import sys
11-
import requests
12-
11+
from datetime import datetime
1312
from datacrunch import DataCrunchClient
1413
from datacrunch.exceptions import APIException
1514
from datacrunch.containers.containers import (
@@ -29,19 +28,20 @@
2928
ContainerDeploymentStatus,
3029
)
3130

31+
CURRENT_TIMESTAMP = datetime.now().strftime(
32+
"%Y%m%d-%H%M%S").lower() # e.g. 20250403-120000
33+
3234
# Configuration constants
33-
DEPLOYMENT_NAME = "sglang-deployment-tutorial"
34-
CONTAINER_NAME = "sglang-server"
35+
DEPLOYMENT_NAME = f"sglang-deployment-example-{CURRENT_TIMESTAMP}"
3536
MODEL_PATH = "deepseek-ai/deepseek-llm-7b-chat"
3637
HF_SECRET_NAME = "huggingface-token"
3738
IMAGE_URL = "docker.io/lmsysorg/sglang:v0.4.1.post6-cu124"
38-
CONTAINERS_API_URL = f'https://containers.datacrunch.io/{DEPLOYMENT_NAME}'
3939

4040
# Get confidential values from environment variables
4141
DATACRUNCH_CLIENT_ID = os.environ.get('DATACRUNCH_CLIENT_ID')
4242
DATACRUNCH_CLIENT_SECRET = os.environ.get('DATACRUNCH_CLIENT_SECRET')
43+
INFERENCE_KEY = os.environ.get('INFERENCE_KEY')
4344
HF_TOKEN = os.environ.get('HF_TOKEN')
44-
INFERENCE_API_KEY = os.environ.get('INFERENCE_API_KEY')
4545

4646
# DataCrunch client instance (global for graceful shutdown)
4747
datacrunch = None
@@ -99,81 +99,24 @@ def graceful_shutdown(signum, frame) -> None:
9999
sys.exit(0)
100100

101101

102-
def test_deployment(base_url: str, api_key: str) -> None:
103-
"""Test the deployment with a simple request.
104-
105-
Args:
106-
base_url: The base URL of the deployment
107-
api_key: The API key for authentication
108-
"""
109-
# First, check if the model info endpoint is working
110-
model_info_url = f"{base_url}/get_model_info"
111-
headers = {
112-
'Authorization': f'Bearer {api_key}',
113-
'Content-Type': 'application/json'
114-
}
115-
116-
try:
117-
print("\nTesting /get_model_info endpoint...")
118-
response = requests.get(model_info_url, headers=headers)
119-
if response.status_code == 200:
120-
print("Model info endpoint is working!")
121-
print(f"Response: {response.json()}")
122-
else:
123-
print(f"Request failed with status code {response.status_code}")
124-
print(f"Response: {response.text}")
125-
return
126-
127-
# Now test completions endpoint
128-
print("\nTesting completions API with streaming...")
129-
completions_url = f"{base_url}/v1/completions"
130-
131-
headers = {
132-
'Content-Type': 'application/json',
133-
'Authorization': f'Bearer {api_key}',
134-
'Accept': 'text/event-stream',
135-
'Cache-Control': 'no-cache',
136-
'Connection': 'keep-alive',
137-
}
138-
139-
data = {
140-
"model": MODEL_PATH,
141-
"prompt": "Solar wind is a curious phenomenon. Tell me more about it",
142-
"max_tokens": 128,
143-
"temperature": 0.7,
144-
"top_p": 0.9,
145-
"stream": True
146-
}
147-
148-
with requests.post(completions_url, headers=headers, json=data, stream=True) as response:
149-
if response.status_code == 200:
150-
print("Stream started. Receiving first 5 events...\n")
151-
for i, line in enumerate(response.iter_lines(decode_unicode=True)):
152-
if line:
153-
print(line)
154-
if i >= 4: # Only show first 5 events
155-
print("...(response continues)...")
156-
break
157-
else:
158-
print(
159-
f"Request failed with status code {response.status_code}")
160-
print(f"Response: {response.text}")
161-
162-
except requests.RequestException as e:
163-
print(f"An error occurred: {e}")
164-
165-
166102
def main() -> None:
167103
"""Main function demonstrating SGLang deployment."""
168104
try:
169-
if not HF_TOKEN:
170-
print("Please set HF_TOKEN environment variable with your Hugging Face token")
171-
return
105+
# Get the inference API key
106+
inference_key = INFERENCE_KEY
107+
if not inference_key:
108+
inference_key = input(
109+
"Enter your Inference API Key from the DataCrunch dashboard: ")
110+
else:
111+
print("Using Inference API Key from environment")
172112

173-
# Initialize client
113+
# Initialize client with inference key
174114
global datacrunch
175115
datacrunch = DataCrunchClient(
176-
DATACRUNCH_CLIENT_ID, DATACRUNCH_CLIENT_SECRET)
116+
DATACRUNCH_CLIENT_ID,
117+
DATACRUNCH_CLIENT_SECRET,
118+
inference_key=inference_key
119+
)
177120

178121
# Register signal handlers for cleanup
179122
signal.signal(signal.SIGINT, graceful_shutdown)
@@ -188,6 +131,10 @@ def main() -> None:
188131
secret.name == HF_SECRET_NAME for secret in existing_secrets)
189132

190133
if not secret_exists:
134+
# check is HF_TOKEN is set, if not, prompt the user
135+
if not HF_TOKEN:
136+
HF_TOKEN = input(
137+
"Enter your Hugging Face token: ")
191138
datacrunch.containers.create_secret(
192139
HF_SECRET_NAME, HF_TOKEN)
193140
print(f"Secret '{HF_SECRET_NAME}' created successfully")
@@ -258,7 +205,8 @@ def main() -> None:
258205
)
259206

260207
# Create the deployment
261-
created_deployment = datacrunch.containers.create(deployment)
208+
created_deployment = datacrunch.containers.create_deployment(
209+
deployment)
262210
print(f"Created deployment: {created_deployment.name}")
263211
print("This will take several minutes while the model is downloaded and the server starts...")
264212

@@ -268,28 +216,37 @@ def main() -> None:
268216
cleanup_resources(datacrunch)
269217
return
270218

271-
# Get the deployment endpoint URL and inference API key
272-
containers_api_url = CONTAINERS_API_URL
273-
inference_api_key = INFERENCE_API_KEY
274-
275-
# If not provided as environment variables, prompt the user
276-
if not containers_api_url:
277-
containers_api_url = input(
278-
"Enter your Containers API URL from the DataCrunch dashboard: ")
279-
else:
219+
# Test the deployment with a simple request
220+
print("\nTesting the deployment...")
221+
try:
222+
# Test model info endpoint
280223
print(
281-
f"Using Containers API URL from environment: {containers_api_url}")
282-
283-
if not inference_api_key:
284-
inference_api_key = input(
285-
"Enter your Inference API Key from the DataCrunch dashboard: ")
286-
else:
287-
print("Using Inference API Key from environment")
224+
"Testing /get_model_info endpoint by making a sync GET request to the SGLang server...")
225+
model_info_response = created_deployment._inference_client.get(
226+
path="/get_model_info")
227+
print("Model info endpoint is working!")
228+
print(f"Response: {model_info_response}")
229+
230+
# Test completions endpoint
231+
print("\nTesting completions API...")
232+
completions_data = {
233+
"model": MODEL_PATH,
234+
"prompt": "Is consciousness fundamentally computational, or is there something more to subjective experience that cannot be reduced to information processing?",
235+
"max_tokens": 128,
236+
"temperature": 0.7,
237+
"top_p": 0.9,
238+
}
239+
240+
# Make a sync inference request to the SGLang server
241+
completions_response = created_deployment.run_sync(
242+
completions_data,
243+
path="/v1/completions",
244+
)
245+
print("Completions API is working!")
246+
print(f"Response: {completions_response}")
288247

289-
# Test the deployment
290-
if containers_api_url and inference_api_key:
291-
print("\nTesting the deployment...")
292-
test_deployment(containers_api_url, inference_api_key)
248+
except Exception as e:
249+
print(f"Error testing deployment: {e}")
293250

294251
# Cleanup or keep running based on user input
295252
keep_running = input(

0 commit comments

Comments
 (0)