diff --git a/README.md b/README.md index 79d6cfe..aa1d72d 100644 --- a/README.md +++ b/README.md @@ -59,11 +59,22 @@ truffile list apps truffile delete truffile models truffile chat -truffile chat --no-repl "hello" -truffile chat --no-default-tools truffile proxy --host 127.0.0.1 --port 8080 ``` +In `truffile chat`, runtime controls are slash commands (not launch flags): + +- `/help` for all chat commands +- `/config` to show current chat config +- `/reasoning on|off` +- `/stream on|off` +- `/json on|off` +- `/tools on|off` +- `/max_tokens `, `/temperature `, `/top_p `, `/max_rounds ` +- `/models` to switch model +- `/system ` +- `/mcp connect `, `/mcp tools`, `/mcp status`, `/mcp disconnect` + ## Inference Interfaces Direct IF2: diff --git a/pyproject.toml b/pyproject.toml index 4f4c3a0..55190a7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,12 @@ [build-system] -requires = ["setuptools>=61.0", "wheel", "setuptools_scm>=8"] +requires = [ + "setuptools>=70", + "setuptools_scm>=8", + "wheel", + "grpcio-tools==1.76.0", + "protobuf>=6.30.0", + "googleapis-common-protos>=1.63.2", +] build-backend = "setuptools.build_meta" [tool.setuptools_scm] @@ -13,17 +20,19 @@ name = "truffile" dynamic = ["version"] description = "truffile the TruffleOS SDK - Connect and deploy apps to Truffle devices" readme = "README.md" -requires-python = ">=3.10" +requires-python = ">=3.12" license = {text = "MIT"} dependencies = [ - "grpcio>=1.60.0", - "protobuf>=4.25.0", + "protobuf>=6.30.0", + "googleapis-common-protos>=1.63.2", + "grpcio==1.76.0", "httpx>=0.27.0", "pyyaml>=6.0", "platformdirs>=3.10.0", "websockets>=12.0", "zeroconf>=0.131.0", + "mcp==1.26.0", ] [project.scripts] @@ -31,8 +40,8 @@ truffile = "truffile.cli:main" [project.optional-dependencies] dev = [ - "pytest>=8.0.0", - "grpcio-tools>=1.60.0", + "pytest>=9.0.2", + "grpcio-tools==1.76.0", ] [tool.setuptools.packages.find] diff --git a/truffile/__init__.py b/truffile/__init__.py index 0954d6e..32eda02 100644 --- a/truffile/__init__.py +++ b/truffile/__init__.py @@ -1,3 +1,8 @@ +import os + +# Keep gRPC from enabling fork support in this CLI process. +os.environ["GRPC_ENABLE_FORK_SUPPORT"] = "false" + try: from ._version import __version__ except ImportError: diff --git a/truffile/_version.py b/truffile/_version.py index a4168bf..1c50a72 100644 --- a/truffile/_version.py +++ b/truffile/_version.py @@ -28,7 +28,7 @@ commit_id: COMMIT_ID __commit_id__: COMMIT_ID -__version__ = version = '0.1.15.dev11' -__version_tuple__ = version_tuple = (0, 1, 15, 'dev11') +__version__ = version = '0.1.15.dev13' +__version_tuple__ = version_tuple = (0, 1, 15, 'dev13') -__commit_id__ = commit_id = 'g708f695c8' +__commit_id__ = commit_id = 'g73970587d' diff --git a/truffile/cli.py b/truffile/cli.py index c55d288..a96056d 100644 --- a/truffile/cli.py +++ b/truffile/cli.py @@ -9,6 +9,7 @@ import sys import threading import time +from dataclasses import dataclass from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer from pathlib import Path from typing import Any, Callable @@ -56,7 +57,26 @@ class C: HAMMER = "🔨" TOOL_TAGS = ("", "") TOOL_TAG_PATTERN = re.compile(r"\s*(.*?)\s*", re.DOTALL) -REPL_COMMANDS = ["/help", "/", "/history", "/reset", "/models", "/exit", "/quit"] +REPL_COMMANDS = [ + "/help", + "/", + "/history", + "/reset", + "/models", + "/config", + "/reasoning", + "/stream", + "/json", + "/tools", + "/max_tokens", + "/temperature", + "/top_p", + "/max_rounds", + "/system", + "/mcp", + "/exit", + "/quit", +] class Spinner: @@ -972,6 +992,125 @@ def _fetch_models_payload(client: httpx.Client, ip: str) -> list[dict[str, Any]] return out +@dataclass +class ChatSettings: + model: str + system_prompt: str | None = None + reasoning: bool = True + stream: bool = True + json_mode: bool = False + max_tokens: int = 512 + temperature: float | None = None + top_p: float | None = None + default_tools: bool = True + max_tool_rounds: int = 8 + + +class ChatMCPClient: + def __init__(self) -> None: + self._group: Any | None = None + self.endpoint: str | None = None + + @property + def connected(self) -> bool: + return self._group is not None + + async def connect_streamable_http(self, endpoint: str) -> None: + from mcp.client.session_group import ClientSessionGroup, StreamableHttpParameters + + await self.disconnect() + group = ClientSessionGroup() + await group.__aenter__() + try: + await group.connect_to_server(StreamableHttpParameters(url=endpoint)) + except Exception: + await group.__aexit__(None, None, None) + raise + self._group = group + self.endpoint = endpoint + + async def disconnect(self) -> None: + if self._group is None: + self.endpoint = None + return + await self._group.__aexit__(None, None, None) + self._group = None + self.endpoint = None + + def list_tool_names(self) -> list[str]: + if self._group is None: + return [] + return sorted(self._group.tools.keys()) + + def build_openai_tools(self) -> list[dict[str, Any]]: + if self._group is None: + return [] + out: list[dict[str, Any]] = [] + for name, tool in sorted(self._group.tools.items(), key=lambda kv: kv[0]): + params = tool.inputSchema if isinstance(tool.inputSchema, dict) else {"type": "object", "properties": {}} + out.append( + { + "type": "function", + "function": { + "name": name, + "description": str(tool.description or f"MCP tool {name}"), + "parameters": params, + }, + } + ) + return out + + def has_tool(self, name: str) -> bool: + if self._group is None: + return False + return name in self._group.tools + + async def call_tool(self, name: str, arguments: dict[str, Any]) -> dict[str, Any]: + if self._group is None: + return {"error": "mcp not connected"} + try: + result = await self._group.call_tool(name=name, arguments=arguments) + content: list[dict[str, Any]] = [] + for part in result.content: + if hasattr(part, "model_dump"): + content.append(part.model_dump()) # type: ignore[call-arg] + elif isinstance(part, dict): + content.append(part) + else: + content.append({"value": str(part)}) + return { + "is_error": bool(result.isError), + "structured_content": result.structuredContent, + "content": content, + } + except Exception as exc: + return {"error": "mcp call failed", "tool": name, "detail": str(exc)} + + +def _print_chat_config(settings: ChatSettings, mcp_client: ChatMCPClient) -> None: + print(f"{C.BLUE}chat config{C.RESET}") + print(f" {C.DIM}model:{C.RESET} {settings.model}") + print(f" {C.DIM}reasoning:{C.RESET} {settings.reasoning}") + print(f" {C.DIM}stream:{C.RESET} {settings.stream}") + print(f" {C.DIM}json:{C.RESET} {settings.json_mode}") + print(f" {C.DIM}tools:{C.RESET} {settings.default_tools}") + print(f" {C.DIM}max_tokens:{C.RESET} {settings.max_tokens}") + print(f" {C.DIM}temperature:{C.RESET} {settings.temperature}") + print(f" {C.DIM}top_p:{C.RESET} {settings.top_p}") + print(f" {C.DIM}max_rounds:{C.RESET} {settings.max_tool_rounds}") + print(f" {C.DIM}system:{C.RESET} {settings.system_prompt or ''}") + print(f" {C.DIM}mcp:{C.RESET} {mcp_client.endpoint or ''}") + + +def _parse_on_off(value: str) -> bool | None: + v = value.strip().lower() + if v in {"on", "true", "1", "yes"}: + return True + if v in {"off", "false", "0", "no"}: + return False + return None + + def _build_default_tools() -> list[dict[str, Any]]: return [ { @@ -1117,7 +1256,7 @@ def _build_chat_payload( *, model: str, messages: list[dict[str, Any]], - args: argparse.Namespace, + settings: ChatSettings, stream: bool, tools: list[dict[str, Any]] | None, ) -> dict[str, Any]: @@ -1125,13 +1264,13 @@ def _build_chat_payload( "model": model, "messages": messages, "stream": stream, - "reasoning": {"enabled": bool(args.reasoning)}, - "max_tokens": int(args.max_tokens) if args.max_tokens is not None else 512, + "reasoning": {"enabled": bool(settings.reasoning)}, + "max_tokens": int(settings.max_tokens), } - if args.temperature is not None: - body["temperature"] = args.temperature - if args.top_p is not None: - body["top_p"] = args.top_p + if settings.temperature is not None: + body["temperature"] = settings.temperature + if settings.top_p is not None: + body["top_p"] = settings.top_p if stream: body["stream_options"] = {"include_usage": True} if tools: @@ -1280,7 +1419,7 @@ def _run_single_chat_request( url: str, headers: dict[str, str], payload: dict[str, Any], - args: argparse.Namespace, + settings: ChatSettings, stream: bool, ) -> tuple[dict[str, Any], dict[str, Any] | None, bool]: wait_anim = MushroomPulse("thinking") @@ -1334,7 +1473,7 @@ def _run_single_chat_request( reasoning_chunk = delta.get("reasoning") if isinstance(reasoning_chunk, str) and reasoning_chunk: reasoning_parts.append(reasoning_chunk) - if args.reasoning: + if settings.reasoning: if not reasoning_stream_started: print(f"{C.GRAY}thinking:{C.RESET}") reasoning_stream_started = True @@ -1343,7 +1482,7 @@ def _run_single_chat_request( content_chunk = delta.get("content") if isinstance(content_chunk, str) and content_chunk: content_parts.append(content_chunk) - if not args.reasoning: + if not settings.reasoning: print(content_chunk, end="", flush=True) for tc in delta.get("tool_calls") or []: @@ -1381,7 +1520,7 @@ def _run_single_chat_request( msg["reasoning_content"] = reasoning_text if tool_calls_by_index: msg["tool_calls"] = [tool_calls_by_index[i] for i in sorted(tool_calls_by_index)] - if args.reasoning: + if settings.reasoning: if reasoning_stream_started: print() response_text = str(msg.get("content") or "") @@ -1400,7 +1539,7 @@ def _run_single_chat_request( body = resp.json() finally: wait_anim.stop() - if args.json: + if settings.json_mode: print(json.dumps(body, indent=2)) choices = body.get("choices", []) @@ -1417,30 +1556,42 @@ def _run_single_chat_request( _print_reasoning_and_response( str(out.get("reasoning_content") or ""), str(out.get("content") or ""), - bool(args.reasoning), + bool(settings.reasoning), ) return out, body.get("usage") if isinstance(body.get("usage"), dict) else None, False -def _run_chat_turn( +async def _run_chat_turn( *, client: httpx.Client, url: str, headers: dict[str, str], model: str, - args: argparse.Namespace, - stream: bool, - tools: list[dict[str, Any]] | None, + settings: ChatSettings, + mcp_client: ChatMCPClient, messages: list[dict[str, Any]], user_text: str, ) -> int: messages.append({"role": "user", "content": user_text}) - max_rounds = max(1, int(getattr(args, "max_tool_rounds", 8))) + max_rounds = max(1, int(settings.max_tool_rounds)) for _ in range(max_rounds): - payload = _build_chat_payload(model=model, messages=messages, args=args, stream=stream, tools=tools) + stream = settings.stream and not settings.json_mode + tools: list[dict[str, Any]] = [] + if settings.default_tools: + tools.extend(_build_default_tools()) + if mcp_client.connected: + tools.extend(mcp_client.build_openai_tools()) + + payload = _build_chat_payload( + model=model, + messages=messages, + settings=settings, + stream=stream, + tools=tools or None, + ) assistant_msg, usage, interrupted = _run_single_chat_request( - client=client, url=url, headers=headers, payload=payload, args=args, stream=stream + client=client, url=url, headers=headers, payload=payload, settings=settings, stream=stream ) messages.append(assistant_msg) if isinstance(usage, dict): @@ -1464,8 +1615,15 @@ def _run_chat_turn( parsed_args = json.loads(raw_args) except json.JSONDecodeError: parsed_args = {"_raw": raw_args} - print(f"{C.CYAN}{HAMMER} tool{C.RESET} {name}") - tool_result = _execute_default_tool(name, parsed_args) + if name in {"web_search", "web_fetch"}: + print(f"{C.CYAN}{HAMMER} tool{C.RESET} {name}") + tool_result = _execute_default_tool(name, parsed_args) + elif mcp_client.has_tool(name): + print(f"{C.CYAN}{HAMMER} mcp{C.RESET} {name}") + tool_result = await mcp_client.call_tool(name, parsed_args) + else: + print(f"{C.YELLOW}{WARN} unknown tool{C.RESET} {name}") + tool_result = {"error": f"unknown tool '{name}'"} messages.append( { "role": "tool", @@ -1479,33 +1637,23 @@ def _run_chat_turn( async def cmd_chat(args, storage: StorageService) -> int: - prompt = args.prompt - if not prompt and args.prompt_words: - prompt = " ".join(args.prompt_words).strip() - if args.no_repl and not prompt: - error("Missing prompt") - print(f" {C.DIM}Usage: truffile chat --no-repl \"hello\"{C.RESET}") - return 1 + prompt = "" device, ip = await _resolve_connected_device(storage) if not device or not ip: return 1 - model = args.model + spinner = Spinner("Resolving default model") + spinner.start() + model = await _default_model(ip) if not model: - spinner = Spinner("Resolving default model") - spinner.start() - model = await _default_model(ip) - if not model: - spinner.fail("Failed to resolve default model from IF2") - return 1 - spinner.stop(success=True) + spinner.fail("Failed to resolve default model from IF2") + return 1 + spinner.stop(success=True) - stream = not args.no_stream and not args.json - tools = _build_default_tools() if getattr(args, "default_tools", True) else None + settings = ChatSettings(model=model) + mcp_client = ChatMCPClient() messages: list[dict[str, Any]] = [] - if args.system: - messages.append({"role": "system", "content": args.system}) url = f"http://{ip}/if2/v1/chat/completions" headers = {"Content-Type": "application/json"} @@ -1516,43 +1664,31 @@ async def cmd_chat(args, storage: StorageService) -> int: with httpx.Client(timeout=None) as client: spinner.stop(success=True) - # Single-turn mode. - if args.no_repl: - return _run_chat_turn( - client=client, - url=url, - headers=headers, - model=model, - args=args, - stream=stream, - tools=tools, - messages=messages, - user_text=prompt or "", - ) - # REPL mode (default). - print(f"{C.DIM}model: {model}{C.RESET}") + print(f"{C.DIM}model: {settings.model}{C.RESET}") print( - f"{C.DIM}commands: /help, /history, /reset, /models, /exit{C.RESET}" + f"{C.DIM}commands: /help, /history, /reset, /models, /config, /mcp, /exit{C.RESET}" ) cleanup_repl = _install_repl_completer(REPL_COMMANDS) try: if prompt: print(f"{C.CYAN}> {prompt}{C.RESET}") - rc = _run_chat_turn( + rc = await _run_chat_turn( client=client, url=url, headers=headers, - model=model, - args=args, - stream=stream, - tools=tools, + model=settings.model, + settings=settings, + mcp_client=mcp_client, messages=messages, user_text=prompt, ) if rc != 0: - return rc + if rc == 130: + prompt = "" + else: + return rc while True: try: @@ -1576,20 +1712,180 @@ async def cmd_chat(args, storage: StorageService) -> int: continue if line == "/reset": messages = [] - if args.system: - messages.append({"role": "system", "content": args.system}) + if settings.system_prompt: + messages.append({"role": "system", "content": settings.system_prompt}) print(f"{C.YELLOW}history reset{C.RESET}") continue if line == "/models": try: models = _fetch_models_payload(client, ip) - selected_model = _pick_model_interactive(models, model) - if selected_model and selected_model != model: - model = selected_model - print(f"{C.GREEN}{CHECK}{C.RESET} model switched: {model}") + selected_model = _pick_model_interactive(models, settings.model) + if selected_model and selected_model != settings.model: + settings.model = selected_model + print(f"{C.GREEN}{CHECK}{C.RESET} model switched: {settings.model}") except Exception as exc: error(f"failed to list models: {exc}") continue + if line == "/config": + _print_chat_config(settings, mcp_client) + continue + if line.startswith("/reasoning"): + arg = line[len("/reasoning"):].strip() + if not arg: + print(f"{C.DIM}reasoning={settings.reasoning}{C.RESET}") + continue + val = _parse_on_off(arg) + if val is None: + warn("usage: /reasoning ") + continue + settings.reasoning = val + print(f"{C.GREEN}{CHECK}{C.RESET} reasoning={settings.reasoning}") + continue + if line.startswith("/stream"): + arg = line[len("/stream"):].strip() + if not arg: + print(f"{C.DIM}stream={settings.stream}{C.RESET}") + continue + val = _parse_on_off(arg) + if val is None: + warn("usage: /stream ") + continue + settings.stream = val + print(f"{C.GREEN}{CHECK}{C.RESET} stream={settings.stream}") + continue + if line.startswith("/json"): + arg = line[len("/json"):].strip() + if not arg: + print(f"{C.DIM}json={settings.json_mode}{C.RESET}") + continue + val = _parse_on_off(arg) + if val is None: + warn("usage: /json ") + continue + settings.json_mode = val + print(f"{C.GREEN}{CHECK}{C.RESET} json={settings.json_mode}") + continue + if line.startswith("/tools"): + arg = line[len("/tools"):].strip() + if not arg: + print(f"{C.DIM}tools={settings.default_tools}{C.RESET}") + continue + val = _parse_on_off(arg) + if val is None: + warn("usage: /tools ") + continue + settings.default_tools = val + print(f"{C.GREEN}{CHECK}{C.RESET} tools={settings.default_tools}") + continue + if line.startswith("/max_tokens"): + arg = line[len("/max_tokens"):].strip() + if not arg: + print(f"{C.DIM}max_tokens={settings.max_tokens}{C.RESET}") + continue + try: + settings.max_tokens = max(1, int(arg)) + print(f"{C.GREEN}{CHECK}{C.RESET} max_tokens={settings.max_tokens}") + except ValueError: + warn("usage: /max_tokens ") + continue + if line.startswith("/temperature"): + arg = line[len("/temperature"):].strip() + if not arg: + print(f"{C.DIM}temperature={settings.temperature}{C.RESET}") + continue + if arg.lower() in {"off", "none"}: + settings.temperature = None + print(f"{C.GREEN}{CHECK}{C.RESET} temperature=None") + continue + try: + settings.temperature = float(arg) + print(f"{C.GREEN}{CHECK}{C.RESET} temperature={settings.temperature}") + except ValueError: + warn("usage: /temperature ") + continue + if line.startswith("/top_p"): + arg = line[len("/top_p"):].strip() + if not arg: + print(f"{C.DIM}top_p={settings.top_p}{C.RESET}") + continue + if arg.lower() in {"off", "none"}: + settings.top_p = None + print(f"{C.GREEN}{CHECK}{C.RESET} top_p=None") + continue + try: + settings.top_p = float(arg) + print(f"{C.GREEN}{CHECK}{C.RESET} top_p={settings.top_p}") + except ValueError: + warn("usage: /top_p ") + continue + if line.startswith("/max_rounds"): + arg = line[len("/max_rounds"):].strip() + if not arg: + print(f"{C.DIM}max_rounds={settings.max_tool_rounds}{C.RESET}") + continue + try: + settings.max_tool_rounds = max(1, int(arg)) + print(f"{C.GREEN}{CHECK}{C.RESET} max_rounds={settings.max_tool_rounds}") + except ValueError: + warn("usage: /max_rounds ") + continue + if line.startswith("/system"): + arg = line[len("/system"):].strip() + if not arg: + print(f"{C.DIM}system={settings.system_prompt or ''}{C.RESET}") + continue + if arg.lower() in {"off", "none", "clear"}: + settings.system_prompt = None + if messages and messages[0].get("role") == "system": + messages.pop(0) + print(f"{C.GREEN}{CHECK}{C.RESET} system prompt cleared") + continue + settings.system_prompt = arg + if messages and messages[0].get("role") == "system": + messages[0]["content"] = arg + else: + messages.insert(0, {"role": "system", "content": arg}) + print(f"{C.GREEN}{CHECK}{C.RESET} system prompt updated") + continue + if line.startswith("/mcp"): + parts = line.split(maxsplit=2) + if len(parts) == 1 or parts[1] == "status": + print( + f"{C.DIM}mcp={mcp_client.endpoint or ''} " + f"tools={len(mcp_client.list_tool_names())}{C.RESET}" + ) + continue + sub = parts[1].lower() + if sub == "connect": + if len(parts) < 3: + warn("usage: /mcp connect ") + continue + endpoint = parts[2].strip() + if not endpoint.startswith(("http://", "https://")): + warn("mcp endpoint must start with http:// or https://") + continue + try: + await mcp_client.connect_streamable_http(endpoint) + print( + f"{C.GREEN}{CHECK}{C.RESET} mcp connected: {endpoint} " + f"({len(mcp_client.list_tool_names())} tools)" + ) + except Exception as exc: + error(f"mcp connect failed: {exc}") + continue + if sub == "disconnect": + await mcp_client.disconnect() + print(f"{C.GREEN}{CHECK}{C.RESET} mcp disconnected") + continue + if sub == "tools": + names = mcp_client.list_tool_names() + if not names: + print(f"{C.DIM}no mcp tools available{C.RESET}") + else: + print(f"{C.BLUE}mcp tools:{C.RESET} {', '.join(names)}") + continue + warn("usage: /mcp ") + continue if line.startswith("/"): matches = [cmd for cmd in REPL_COMMANDS if cmd.startswith(line)] if matches: @@ -1599,14 +1895,13 @@ async def cmd_chat(args, storage: StorageService) -> int: _print_repl_commands() continue - rc = _run_chat_turn( + rc = await _run_chat_turn( client=client, url=url, headers=headers, - model=model, - args=args, - stream=stream, - tools=tools, + model=settings.model, + settings=settings, + mcp_client=mcp_client, messages=messages, user_text=line, ) @@ -1617,6 +1912,7 @@ async def cmd_chat(args, storage: StorageService) -> int: finally: if cleanup_repl: cleanup_repl() + await mcp_client.disconnect() return 0 except Exception as e: try: @@ -2391,7 +2687,7 @@ def print_help(): print(f" {C.BLUE}delete{C.RESET} Delete installed apps from device") print(f" {C.BLUE}list{C.RESET} List installed apps or devices") print(f" {C.BLUE}models{C.RESET} List models on your Truffle") - print(f" {C.BLUE}chat{C.RESET} [prompt] Chat on your Truffle (REPL by default)") + print(f" {C.BLUE}chat{C.RESET} Chat on your Truffle (REPL by default)") print(f" {C.BLUE}proxy{C.RESET} Run OpenAI-compatible proxy") print() print(f"{C.BOLD}Examples:{C.RESET}") @@ -2404,8 +2700,7 @@ def print_help(): print(f" {C.DIM}truffile list apps{C.RESET}") print(f" {C.DIM}truffile models{C.RESET} {C.DIM}# show models on your Truffle{C.RESET}") print(f" {C.DIM}truffile chat{C.RESET} {C.DIM}# open interactive REPL chat{C.RESET}") - print(f" {C.DIM}truffile chat --no-repl \"hello\"{C.RESET} {C.DIM}# one-shot chat{C.RESET}") - print(f" {C.DIM}truffile chat --no-reasoning{C.RESET} {C.DIM}# disable reasoning output{C.RESET}") + print(f" {C.DIM}# in chat: /help, /config, /reasoning on|off, /mcp connect {C.RESET}") print(f" {C.DIM}truffile proxy{C.RESET} {C.DIM}# run local /v1 proxy{C.RESET}") print() @@ -2447,38 +2742,6 @@ def main() -> int: p_models = subparsers.add_parser("models", add_help=False) p_chat = subparsers.add_parser("chat", add_help=False) - p_chat.add_argument("prompt_words", nargs="*", help="Prompt text (alternative to --prompt)") - p_chat.add_argument("-p", "--prompt", help="Prompt text") - p_chat.add_argument("-m", "--model", help="Model id/uuid (default: first model from IF2 list)") - p_chat.add_argument("--system", help="System prompt") - p_chat.add_argument( - "--reasoning", - action=argparse.BooleanOptionalAction, - default=True, - help="Enable/disable reasoning output (default: enabled)", - ) - p_chat.add_argument("--max-tokens", type=int, help="Max response tokens") - p_chat.add_argument("--temperature", type=float, help="Sampling temperature") - p_chat.add_argument("--top-p", type=float, help="Nucleus sampling top-p") - p_chat.add_argument("--no-stream", action="store_true", help="Disable streaming output") - p_chat.add_argument("--json", action="store_true", help="Print full JSON response (non-stream)") - p_chat.add_argument( - "--default-tools", - action=argparse.BooleanOptionalAction, - default=True, - help="Enable built-in web_search/web_fetch tools", - ) - p_chat.add_argument( - "--max-tool-rounds", - type=int, - default=8, - help="Max assistant/tool loop rounds per user turn", - ) - p_chat.add_argument( - "--no-repl", - action="store_true", - help="Run single-turn chat and exit", - ) p_proxy = subparsers.add_parser("proxy", add_help=False) p_proxy.add_argument("--device", "-d", help="Device name (default: last connected)")