diff --git a/Makefile b/Makefile index 1c2ab74..acc46ee 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # OpenGradient SDK Makefile -# Default model for testing (override with: make chat MODEL=openai/gpt-4o) -MODEL ?= anthropic/claude-3.7-sonnet +# Default model for testing (override with: make chat MODEL=google/gemini-3-pro-preview) +MODEL ?= google/gemini-3-pro-preview # ============================================================================ # Development @@ -67,7 +67,7 @@ infer: completion: python -m opengradient.cli completion \ - --model $(MODEL) --mode TEE \ + --model $(MODEL) \ --prompt "Hello, how are you?" \ --max-tokens 50 @@ -75,16 +75,23 @@ chat: python -m opengradient.cli chat \ --model $(MODEL) \ --messages '[{"role":"user","content":"Tell me a fun fact"}]' \ - --max-tokens 150 + --max-tokens 350 chat-stream: python -m opengradient.cli chat \ --model $(MODEL) \ --messages '[{"role":"user","content":"Tell me a short story"}]' \ - --max-tokens 250 \ + --max-tokens 1250 \ --stream chat-tool: + python -m opengradient.cli chat \ + --model $(MODEL) \ + --messages '[{"role":"user","content":"What is the weather in Tokyo?"}]' \ + --tools '[{"type":"function","function":{"name":"get_weather","description":"Get weather for a location","parameters":{"type":"object","properties":{"location":{"type":"string"},"unit":{"type":"string","enum":["celsius","fahrenheit"]}},"required":["location"]}}}]' \ + --max-tokens 100 + +chat-stream-tool: python -m opengradient.cli chat \ --model $(MODEL) \ --messages '[{"role":"user","content":"What is the weather in Tokyo?"}]' \ @@ -93,4 +100,4 @@ chat-tool: --stream .PHONY: install build publish check docs test utils_test client_test langchain_adapter_test opg_token_test integrationtest examples \ - infer completion chat chat-stream chat-tool + infer completion chat chat-stream chat-tool chat-stream-tool diff --git a/pyproject.toml b/pyproject.toml index ef374cc..262637b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "opengradient" -version = "0.7.4" +version = "0.7.5" description = "Python SDK for OpenGradient decentralized model management & inference services" authors = [{name = "OpenGradient", email = "adam@vannalabs.ai"}] readme = "README.md" @@ -27,7 +27,7 @@ dependencies = [ "langchain>=0.3.7", "openai>=1.58.1", "pydantic>=2.9.2", - "og-test-v2-x402==0.0.9" + "og-test-v2-x402==0.0.11" ] [project.scripts] diff --git a/requirements.txt b/requirements.txt index 7a51cd1..df03caa 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,4 +7,4 @@ requests>=2.32.3 langchain>=0.3.7 openai>=1.58.1 pydantic>=2.9.2 -og-test-v2-x402==0.0.9 \ No newline at end of file +og-test-v2-x402==0.0.11 \ No newline at end of file diff --git a/src/opengradient/cli.py b/src/opengradient/cli.py index 2c5b07e..979f676 100644 --- a/src/opengradient/cli.py +++ b/src/opengradient/cli.py @@ -608,8 +608,16 @@ def print_llm_chat_result(model_cid, tx_hash, finish_reason, chat_output, is_van click.secho("Chat Output:", fg="yellow", bold=True) click.echo() for key, value in chat_output.items(): - if value != None and value != "" and value != "[]" and value != []: - click.echo(f"{key}: {value}") + if value is not None and value not in ("", "[]", []): + # Normalize list-of-blocks content (e.g. Gemini 3 thought signatures) + if key == "content" and isinstance(value, list): + text = " ".join( + block.get("text", "") for block in value + if isinstance(block, dict) and block.get("type") == "text" + ).strip() + click.echo(f"{key}: {text}") + else: + click.echo(f"{key}: {value}") click.echo() diff --git a/src/opengradient/client/llm.py b/src/opengradient/client/llm.py index e1490cb..f95acd3 100644 --- a/src/opengradient/client/llm.py +++ b/src/opengradient/client/llm.py @@ -5,6 +5,10 @@ import threading from queue import Queue from typing import AsyncGenerator, Dict, List, Optional, Union +import ssl +import socket +import tempfile +from urllib.parse import urlparse import httpx from eth_account.account import LocalAccount @@ -36,6 +40,53 @@ ) +def _fetch_tls_cert_as_ssl_context(server_url: str) -> Optional[ssl.SSLContext]: + """ + Connect to a server, retrieve its TLS certificate (TOFU), + and return an ssl.SSLContext that trusts ONLY that certificate. + + Hostname verification is disabled because the TEE server's cert + is typically issued for a hostname but we may connect via IP address. + The pinned certificate itself provides the trust anchor. + + Returns None if the server is not HTTPS or unreachable. + """ + parsed = urlparse(server_url) + if parsed.scheme != "https": + return None + + hostname = parsed.hostname + port = parsed.port or 443 + + # Connect without verification to retrieve the server's certificate + fetch_ctx = ssl.create_default_context() + fetch_ctx.check_hostname = False + fetch_ctx.verify_mode = ssl.CERT_NONE + + try: + with socket.create_connection((hostname, port), timeout=10) as sock: + with fetch_ctx.wrap_socket(sock, server_hostname=hostname) as ssock: + der_cert = ssock.getpeercert(binary_form=True) + pem_cert = ssl.DER_cert_to_PEM_cert(der_cert) + except Exception: + return None + + # Write PEM to a temp file so we can load it into the SSLContext + cert_file = tempfile.NamedTemporaryFile( + prefix="og_tee_tls_", suffix=".pem", delete=False, mode="w" + ) + cert_file.write(pem_cert) + cert_file.flush() + cert_file.close() + + # Build an SSLContext that trusts ONLY this cert, with hostname check disabled + ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT) + ctx.load_verify_locations(cert_file.name) + ctx.check_hostname = False # Cert is for a hostname, but we connect via IP + ctx.verify_mode = ssl.CERT_REQUIRED # Still verify the cert itself + return ctx + + class LLM: """ LLM inference namespace. @@ -64,6 +115,13 @@ def __init__(self, wallet_account: LocalAccount, og_llm_server_url: str, og_llm_ self._og_llm_server_url = og_llm_server_url self._og_llm_streaming_server_url = og_llm_streaming_server_url + self._tls_verify: Union[ssl.SSLContext, bool] = ( + _fetch_tls_cert_as_ssl_context(self._og_llm_server_url) or True + ) + self._streaming_tls_verify: Union[ssl.SSLContext, bool] = ( + _fetch_tls_cert_as_ssl_context(self._og_llm_streaming_server_url) or True + ) + signer = EthAccountSignerv2(self._wallet_account) self._x402_client = x402Clientv2() register_exact_evm_clientv2(self._x402_client, signer, networks=[BASE_TESTNET_NETWORK]) @@ -92,10 +150,10 @@ def _run_coroutine(self, coroutine): async def _initialize_http_clients(self) -> None: if self._request_client is None: - self._request_client_ctx = x402HttpxClientv2(self._x402_client) + self._request_client_ctx = x402HttpxClientv2(self._x402_client, verify=self._tls_verify) self._request_client = await self._request_client_ctx.__aenter__() if self._stream_client is None: - self._stream_client_ctx = x402HttpxClientv2(self._x402_client) + self._stream_client_ctx = x402HttpxClientv2(self._x402_client, verify=self._streaming_tls_verify) self._stream_client = await self._stream_client_ctx.__aenter__() async def _close_http_clients(self) -> None: @@ -223,6 +281,8 @@ async def make_request_v2(): return TextGenerationOutput( transaction_hash="external", completion_output=result.get("completion"), + tee_signature=result.get("tee_signature"), + tee_timestamp=result.get("tee_timestamp"), ) except Exception as e: @@ -348,10 +408,20 @@ async def make_request_v2(): if not choices: raise OpenGradientError(f"Invalid response: 'choices' missing or empty in {result}") + message = choices[0].get("message", {}) + content = message.get("content") + if isinstance(content, list): + message["content"] = " ".join( + block.get("text", "") for block in content + if isinstance(block, dict) and block.get("type") == "text" + ).strip() + return TextGenerationOutput( transaction_hash="external", finish_reason=choices[0].get("finish_reason"), - chat_output=choices[0].get("message"), + chat_output=message, + tee_signature=result.get("tee_signature"), + tee_timestamp=result.get("tee_timestamp"), ) except Exception as e: diff --git a/src/opengradient/defaults.py b/src/opengradient/defaults.py index a23ce6b..c053225 100644 --- a/src/opengradient/defaults.py +++ b/src/opengradient/defaults.py @@ -6,5 +6,6 @@ DEFAULT_INFERENCE_CONTRACT_ADDRESS = "0x8383C9bD7462F12Eb996DD02F78234C0421A6FaE" DEFAULT_SCHEDULER_ADDRESS = "0x7179724De4e7FF9271FA40C0337c7f90C0508eF6" DEFAULT_BLOCKCHAIN_EXPLORER = "https://explorer.opengradient.ai/tx/" -DEFAULT_OPENGRADIENT_LLM_SERVER_URL = "https://llm.opengradient.ai" -DEFAULT_OPENGRADIENT_LLM_STREAMING_SERVER_URL = "https://llm.opengradient.ai" +# TODO (Kyle): Add a process to fetch these IPs from the TEE registry +DEFAULT_OPENGRADIENT_LLM_SERVER_URL = "https://3.15.214.21:443" +DEFAULT_OPENGRADIENT_LLM_STREAMING_SERVER_URL = "https://3.15.214.21:443" diff --git a/src/opengradient/types.py b/src/opengradient/types.py index 866bfba..3a13aac 100644 --- a/src/opengradient/types.py +++ b/src/opengradient/types.py @@ -239,12 +239,16 @@ class StreamChunk: model: Model identifier usage: Token usage information (only in final chunk) is_final: Whether this is the final chunk (before [DONE]) + tee_signature: RSA-PSS signature over the response, present on the final chunk + tee_timestamp: ISO timestamp from the TEE at signing time, present on the final chunk """ choices: List[StreamChoice] model: str usage: Optional[StreamUsage] = None is_final: bool = False + tee_signature: Optional[str] = None + tee_timestamp: Optional[str] = None @classmethod def from_sse_data(cls, data: Dict) -> "StreamChunk": @@ -275,8 +279,14 @@ def from_sse_data(cls, data: Dict) -> "StreamChunk": is_final = any(c.finish_reason is not None for c in choices) or usage is not None - return cls(choices=choices, model=data.get("model", "unknown"), usage=usage, is_final=is_final) - + return cls( + choices=choices, + model=data.get("model", "unknown"), + usage=usage, + is_final=is_final, + tee_signature=data.get("tee_signature"), + tee_timestamp=data.get("tee_timestamp"), + ) @dataclass class TextGenerationStream: @@ -380,6 +390,12 @@ class TextGenerationOutput: payment_hash: Optional[str] = None """Payment hash for x402 transaction""" + tee_signature: Optional[str] = None + """RSA-PSS signature over the response produced by the TEE enclave.""" + + tee_timestamp: Optional[str] = None + """ISO timestamp from the TEE at signing time.""" + @dataclass class AbiFunction: @@ -427,36 +443,35 @@ class TEE_LLM(str, Enum): Usage: # TEE-verified inference result = client.llm.chat( - model=og.TEE_LLM.GPT_4O, + model=og.TEE_LLM.GPT_5, messages=[{"role": "user", "content": "Hello"}], ) """ - - # Existing (Currently turned off) - # META_LLAMA_3_1_70B_INSTRUCT = "meta-llama/Llama-3.1-70B-Instruct" - # OpenAI models via TEE GPT_4_1_2025_04_14 = "openai/gpt-4.1-2025-04-14" - GPT_4O = "openai/gpt-4o" O4_MINI = "openai/o4-mini" + GPT_5 = "openai/gpt-5" + GPT_5_MINI = "openai/gpt-5-mini" + GPT_5_2 = "openai/gpt-5.2" # Anthropic models via TEE - CLAUDE_3_7_SONNET = "anthropic/claude-3.7-sonnet" - CLAUDE_3_5_HAIKU = "anthropic/claude-3.5-haiku" - CLAUDE_4_0_SONNET = "anthropic/claude-4.0-sonnet" + CLAUDE_SONNET_4_5 = "anthropic/claude-sonnet-4-5" + CLAUDE_SONNET_4_6 = "anthropic/claude-sonnet-4-6" + CLAUDE_HAIKU_4_5 = "anthropic/claude-haiku-4-5" + CLAUDE_OPUS_4_5 = "anthropic/claude-opus-4-5" + CLAUDE_OPUS_4_6 = "anthropic/claude-opus-4-6" # Google models via TEE GEMINI_2_5_FLASH = "google/gemini-2.5-flash" GEMINI_2_5_PRO = "google/gemini-2.5-pro" - GEMINI_2_0_FLASH = "google/gemini-2.0-flash" GEMINI_2_5_FLASH_LITE = "google/gemini-2.5-flash-lite" + GEMINI_3_PRO = "google/gemini-3-pro-preview" + GEMINI_3_FLASH = "google/gemini-3-flash-preview" # xAI Grok models via TEE - GROK_3_MINI_BETA = "x-ai/grok-3-mini-beta" - GROK_3_BETA = "x-ai/grok-3-beta" - GROK_2_1212 = "x-ai/grok-2-1212" - GROK_2_VISION_LATEST = "x-ai/grok-2-vision-latest" - GROK_4_1_FAST = "x-ai/grok-4.1-fast" + GROK_4 = "x-ai/grok-4" + GROK_4_FAST = "x-ai/grok-4-fast" + GROK_4_1_FAST = "x-ai/grok-4-1-fast" GROK_4_1_FAST_NON_REASONING = "x-ai/grok-4-1-fast-non-reasoning"