88import time
99import signal
1010import sys
11- import requests
12-
11+ from datetime import datetime
1312from datacrunch import DataCrunchClient
1413from datacrunch .exceptions import APIException
1514from datacrunch .containers .containers import (
2928 ContainerDeploymentStatus ,
3029)
3130
31+ CURRENT_TIMESTAMP = datetime .now ().strftime (
32+ "%Y%m%d-%H%M%S" ).lower () # e.g. 20250403-120000
33+
3234# Configuration constants
33- DEPLOYMENT_NAME = "sglang-deployment-tutorial"
34- CONTAINER_NAME = "sglang-server"
35+ DEPLOYMENT_NAME = f"sglang-deployment-example-{ CURRENT_TIMESTAMP } "
3536MODEL_PATH = "deepseek-ai/deepseek-llm-7b-chat"
3637HF_SECRET_NAME = "huggingface-token"
3738IMAGE_URL = "docker.io/lmsysorg/sglang:v0.4.1.post6-cu124"
38- CONTAINERS_API_URL = f'https://containers.datacrunch.io/{ DEPLOYMENT_NAME } '
3939
4040# Get confidential values from environment variables
4141DATACRUNCH_CLIENT_ID = os .environ .get ('DATACRUNCH_CLIENT_ID' )
4242DATACRUNCH_CLIENT_SECRET = os .environ .get ('DATACRUNCH_CLIENT_SECRET' )
43+ INFERENCE_KEY = os .environ .get ('INFERENCE_KEY' )
4344HF_TOKEN = os .environ .get ('HF_TOKEN' )
44- INFERENCE_API_KEY = os .environ .get ('INFERENCE_API_KEY' )
4545
4646# DataCrunch client instance (global for graceful shutdown)
4747datacrunch = None
@@ -99,81 +99,24 @@ def graceful_shutdown(signum, frame) -> None:
9999 sys .exit (0 )
100100
101101
102- def test_deployment (base_url : str , api_key : str ) -> None :
103- """Test the deployment with a simple request.
104-
105- Args:
106- base_url: The base URL of the deployment
107- api_key: The API key for authentication
108- """
109- # First, check if the model info endpoint is working
110- model_info_url = f"{ base_url } /get_model_info"
111- headers = {
112- 'Authorization' : f'Bearer { api_key } ' ,
113- 'Content-Type' : 'application/json'
114- }
115-
116- try :
117- print ("\n Testing /get_model_info endpoint..." )
118- response = requests .get (model_info_url , headers = headers )
119- if response .status_code == 200 :
120- print ("Model info endpoint is working!" )
121- print (f"Response: { response .json ()} " )
122- else :
123- print (f"Request failed with status code { response .status_code } " )
124- print (f"Response: { response .text } " )
125- return
126-
127- # Now test completions endpoint
128- print ("\n Testing completions API with streaming..." )
129- completions_url = f"{ base_url } /v1/completions"
130-
131- headers = {
132- 'Content-Type' : 'application/json' ,
133- 'Authorization' : f'Bearer { api_key } ' ,
134- 'Accept' : 'text/event-stream' ,
135- 'Cache-Control' : 'no-cache' ,
136- 'Connection' : 'keep-alive' ,
137- }
138-
139- data = {
140- "model" : MODEL_PATH ,
141- "prompt" : "Solar wind is a curious phenomenon. Tell me more about it" ,
142- "max_tokens" : 128 ,
143- "temperature" : 0.7 ,
144- "top_p" : 0.9 ,
145- "stream" : True
146- }
147-
148- with requests .post (completions_url , headers = headers , json = data , stream = True ) as response :
149- if response .status_code == 200 :
150- print ("Stream started. Receiving first 5 events...\n " )
151- for i , line in enumerate (response .iter_lines (decode_unicode = True )):
152- if line :
153- print (line )
154- if i >= 4 : # Only show first 5 events
155- print ("...(response continues)..." )
156- break
157- else :
158- print (
159- f"Request failed with status code { response .status_code } " )
160- print (f"Response: { response .text } " )
161-
162- except requests .RequestException as e :
163- print (f"An error occurred: { e } " )
164-
165-
166102def main () -> None :
167103 """Main function demonstrating SGLang deployment."""
168104 try :
169- if not HF_TOKEN :
170- print ("Please set HF_TOKEN environment variable with your Hugging Face token" )
171- return
105+ # Get the inference API key
106+ inference_key = INFERENCE_KEY
107+ if not inference_key :
108+ inference_key = input (
109+ "Enter your Inference API Key from the DataCrunch dashboard: " )
110+ else :
111+ print ("Using Inference API Key from environment" )
172112
173- # Initialize client
113+ # Initialize client with inference key
174114 global datacrunch
175115 datacrunch = DataCrunchClient (
176- DATACRUNCH_CLIENT_ID , DATACRUNCH_CLIENT_SECRET )
116+ DATACRUNCH_CLIENT_ID ,
117+ DATACRUNCH_CLIENT_SECRET ,
118+ inference_key = inference_key
119+ )
177120
178121 # Register signal handlers for cleanup
179122 signal .signal (signal .SIGINT , graceful_shutdown )
@@ -188,6 +131,10 @@ def main() -> None:
188131 secret .name == HF_SECRET_NAME for secret in existing_secrets )
189132
190133 if not secret_exists :
134+ # check is HF_TOKEN is set, if not, prompt the user
135+ if not HF_TOKEN :
136+ HF_TOKEN = input (
137+ "Enter your Hugging Face token: " )
191138 datacrunch .containers .create_secret (
192139 HF_SECRET_NAME , HF_TOKEN )
193140 print (f"Secret '{ HF_SECRET_NAME } ' created successfully" )
@@ -258,7 +205,8 @@ def main() -> None:
258205 )
259206
260207 # Create the deployment
261- created_deployment = datacrunch .containers .create (deployment )
208+ created_deployment = datacrunch .containers .create_deployment (
209+ deployment )
262210 print (f"Created deployment: { created_deployment .name } " )
263211 print ("This will take several minutes while the model is downloaded and the server starts..." )
264212
@@ -268,28 +216,37 @@ def main() -> None:
268216 cleanup_resources (datacrunch )
269217 return
270218
271- # Get the deployment endpoint URL and inference API key
272- containers_api_url = CONTAINERS_API_URL
273- inference_api_key = INFERENCE_API_KEY
274-
275- # If not provided as environment variables, prompt the user
276- if not containers_api_url :
277- containers_api_url = input (
278- "Enter your Containers API URL from the DataCrunch dashboard: " )
279- else :
219+ # Test the deployment with a simple request
220+ print ("\n Testing the deployment..." )
221+ try :
222+ # Test model info endpoint
280223 print (
281- f"Using Containers API URL from environment: { containers_api_url } " )
282-
283- if not inference_api_key :
284- inference_api_key = input (
285- "Enter your Inference API Key from the DataCrunch dashboard: " )
286- else :
287- print ("Using Inference API Key from environment" )
224+ "Testing /get_model_info endpoint by making a sync GET request to the SGLang server..." )
225+ model_info_response = created_deployment ._inference_client .get (
226+ path = "/get_model_info" )
227+ print ("Model info endpoint is working!" )
228+ print (f"Response: { model_info_response } " )
229+
230+ # Test completions endpoint
231+ print ("\n Testing completions API..." )
232+ completions_data = {
233+ "model" : MODEL_PATH ,
234+ "prompt" : "Is consciousness fundamentally computational, or is there something more to subjective experience that cannot be reduced to information processing?" ,
235+ "max_tokens" : 128 ,
236+ "temperature" : 0.7 ,
237+ "top_p" : 0.9 ,
238+ }
239+
240+ # Make a sync inference request to the SGLang server
241+ completions_response = created_deployment .run_sync (
242+ completions_data ,
243+ path = "/v1/completions" ,
244+ )
245+ print ("Completions API is working!" )
246+ print (f"Response: { completions_response } " )
288247
289- # Test the deployment
290- if containers_api_url and inference_api_key :
291- print ("\n Testing the deployment..." )
292- test_deployment (containers_api_url , inference_api_key )
248+ except Exception as e :
249+ print (f"Error testing deployment: { e } " )
293250
294251 # Cleanup or keep running based on user input
295252 keep_running = input (
0 commit comments