diff --git a/parallel_web_tools/cli/commands.py b/parallel_web_tools/cli/commands.py index f271d44..90ee0e5 100644 --- a/parallel_web_tools/cli/commands.py +++ b/parallel_web_tools/cli/commands.py @@ -7,6 +7,7 @@ import sys import tempfile import time +from pathlib import Path from typing import Any, NoReturn import click @@ -138,6 +139,12 @@ def format_help(self, ctx, formatter): EXIT_AUTH_ERROR = 3 # Authentication/authorization failure EXIT_API_ERROR = 4 # API call failed EXIT_TIMEOUT = 5 # Operation timed out +EXIT_INTERRUPTED = 130 # SIGINT / Ctrl-C (matches POSIX 128 + signal number) + +# Default subdirectory for `research run` / `research poll` auto-saved results. +# Lives under the user's cwd so files don't leak into $HOME or wherever they +# happened to invoke the CLI. +DEFAULT_RESEARCH_OUTPUT_DIR = "parallel-research" # ============================================================================= @@ -186,6 +193,27 @@ def _handle_error( sys.exit(exit_code) +def _exit_research_interrupted(run_id: str | None) -> NoReturn: + """Print a helpful resume hint after Ctrl-C and exit.""" + if run_id: + console.print("\n[bold yellow]Interrupted.[/bold yellow] The task is still running on the server.") + console.print(f"[dim]Resume with: parallel-cli research poll {run_id}[/dim]") + else: + console.print("\n[bold yellow]Interrupted before task creation.[/bold yellow]") + sys.exit(EXIT_INTERRUPTED) + + +def _exit_research_timeout(error: TimeoutError, output_json: bool, suggest_poll: bool = True) -> NoReturn: + """Format a research timeout for human or JSON output and exit.""" + if output_json: + print(json.dumps({"error": {"message": str(error), "type": "TimeoutError"}}, indent=2)) + else: + console.print(f"[bold yellow]Timeout: {error}[/bold yellow]") + if suggest_poll: + console.print("[dim]The task is still running. Use 'parallel-cli research poll ' to resume.[/dim]") + sys.exit(EXIT_TIMEOUT) + + def parse_comma_separated(values: tuple[str, ...]) -> list[str]: """Parse a tuple of values that may contain comma-separated items. @@ -1709,12 +1737,32 @@ def research(): ) @click.option("--timeout", type=int, default=3600, show_default=True, help="Max wait time in seconds") @click.option("--poll-interval", type=int, default=45, show_default=True, help="Seconds between status checks") -@click.option("--no-wait", is_flag=True, help="Return immediately after creating task (don't poll)") +@click.option("--no-wait", is_flag=True, help="Return immediately after creating task (don't save or poll)") @click.option("--dry-run", is_flag=True, help="Show what would be executed without making API calls") @click.option( - "-o", "--output", "output_file", type=click.Path(), help="Save results (creates {name}.json and {name}.md)" + "--text", + "use_text", + is_flag=True, + help="Return a markdown report (text schema) instead of the default structured JSON.", +) +@click.option( + "--text-description", + default=None, + help="Steering description for --text reports (e.g. 'Keep under 1000 words, focus on M&A')", ) -@click.option("--json", "output_json", is_flag=True, help="Output JSON to stdout") +@click.option( + "-o", + "--output", + "output_base", + type=click.Path(), + default=None, + help=( + "Output base path; writes {base}.json (and {base}.md with --text). " + f"Default: ./{DEFAULT_RESEARCH_OUTPUT_DIR}/. Any .json/.md suffix is stripped." + ), +) +@click.option("--force", is_flag=True, help="Overwrite existing output files") +@click.option("--json", "output_json", is_flag=True, help="Also print the result as JSON to stdout") @click.option( "--previous-interaction-id", help="Interaction ID from a previous task to reuse as context", @@ -1727,7 +1775,10 @@ def research_run( poll_interval: int, no_wait: bool, dry_run: bool, - output_file: str | None, + use_text: bool, + text_description: str | None, + output_base: str | None, + force: bool, output_json: bool, previous_interaction_id: str | None, ): @@ -1736,20 +1787,36 @@ def research_run( QUERY is the research question (max 15,000 chars). Alternatively, use --input-file or pass "-" as QUERY to read from stdin. - Use --previous-interaction-id to continue research from a prior task's context. - The interaction ID is shown in the output of every research run. + \b + Output (when --no-wait is not set): + Results are always saved to disk so a long-running task is never lost. + Default base path: ./parallel-research/. Override with -o NAME + (writes NAME.json, plus NAME.md with --text). Existing files are not + overwritten unless --force is passed. - Examples: + \b + Schemas: + Default: auto schema (API-chosen structured JSON; deep-research outputs + on `pro` tiers and above). + --text: text schema (markdown report with inline citations). Use + --text-description to steer length or focus. - parallel-cli research run "What are the latest developments in quantum computing?" + Use --previous-interaction-id to continue research from a prior task. - parallel-cli research run -f question.txt --processor ultra -o report + \b + Examples: + parallel-cli research run "What are the latest developments in quantum computing?" + parallel-cli research run --text "Market analysis of HVAC industry" -o report + parallel-cli research run -f question.txt --processor ultra --text -o report + echo "My research question" | parallel-cli research run - --json + parallel-cli research run "What are the implications?" \\ + --previous-interaction-id trun_abc123 + """ + output_schema = "text" if use_text else "auto" - echo "My research question" | parallel-cli research run - --json + if text_description and not use_text: + raise click.UsageError("--text-description requires --text.") - # Follow-up research using context from a previous task: - parallel-cli research run "What are the implications?" --previous-interaction-id trun_abc123 - """ # Read from stdin if "-" is passed if query == "-": query = click.get_text_stream("stdin").read().strip() @@ -1766,12 +1833,20 @@ def research_run( query = query[:15000] if dry_run: + # Show where files will go using a placeholder run_id so users can see the layout. + planned_base = _resolve_research_base_path(output_base, "") + planned_paths = [f"{planned_base}.json"] + if use_text: + planned_paths.append(f"{planned_base}.md") dry_run_data = { "dry_run": True, "query": query[:200] + "..." if len(query) > 200 else query, "query_length": len(query), "processor": processor, + "output_schema": output_schema, "expected_latency": RESEARCH_PROCESSORS[processor], + "output_paths": planned_paths, + "force": force, } if output_json: print(json.dumps(dry_run_data, indent=2)) @@ -1780,17 +1855,29 @@ def research_run( console.print(f" [bold]Query:[/bold] {dry_run_data['query']}") console.print(f" [bold]Length:[/bold] {len(query)} chars") console.print(f" [bold]Processor:[/bold] {processor}") + console.print(f" [bold]Schema:[/bold] {output_schema}") console.print(f" [bold]Latency:[/bold] {RESEARCH_PROCESSORS[processor]}") + console.print(f" [bold]Output:[/bold] {', '.join(planned_paths)}") return + # Single-element list captures the run_id from the on_status callback so a + # Ctrl-C during the long poll can suggest `parallel-cli research poll + # `. List-as-box keeps the closure simple — no `nonlocal` needed. + run_id_box: list[str] = [] + try: if no_wait: - # Create task and return immediately if not output_json: console.print(f"[dim]Creating research task with processor: {processor}...[/dim]") result = create_research_task( - query, processor=processor, source="cli", previous_interaction_id=previous_interaction_id + query, + processor=processor, + source="cli", + previous_interaction_id=previous_interaction_id, + output_schema=output_schema, + text_description=text_description, ) + run_id_box.append(result["run_id"]) if not output_json: console.print(f"\n[bold green]Task created: {result['run_id']}[/bold green]") @@ -1798,20 +1885,26 @@ def research_run( console.print(f"Interaction ID: {result['interaction_id']}") console.print(f"Track progress: {result['result_url']}") console.print("\n[dim]Use 'parallel-cli research status ' to check status[/dim]") - console.print("[dim]Use 'parallel-cli research poll ' to wait for results[/dim]") + console.print("[dim]Use 'parallel-cli research poll ' to fetch and save results[/dim]") console.print("[dim]Use '--previous-interaction-id' on a new run to continue this research[/dim]") if output_json: print(json.dumps(result, indent=2)) else: - # Run and wait for results if not output_json: console.print(f"[bold cyan]Starting deep research with processor: {processor}[/bold cyan]") - console.print(f"[dim]This may take {RESEARCH_PROCESSORS[processor]}[/dim]\n") + console.print(f"[dim]This may take {RESEARCH_PROCESSORS[processor]}[/dim]") + if output_base: + planned_base = _resolve_research_base_path(output_base, "") + console.print(f"[dim]Will save to: {planned_base}.json[/dim]\n") + else: + console.print(f"[dim]Will save to: ./{DEFAULT_RESEARCH_OUTPUT_DIR}/.json[/dim]\n") start_time = time.time() def on_status(status: str, run_id: str): + if not run_id_box: + run_id_box.append(run_id) if output_json: return elapsed = time.time() - start_time @@ -1833,20 +1926,20 @@ def on_status(status: str, run_id: str): on_status=on_status, source="cli", previous_interaction_id=previous_interaction_id, + output_schema=output_schema, + text_description=text_description, ) - _output_research_result(result, output_file, output_json) + _save_and_display_research(result, output_base, output_json, force=force) + except KeyboardInterrupt: + _exit_research_interrupted(run_id_box[0] if run_id_box else None) except TimeoutError as e: - if output_json: - error_data = {"error": {"message": str(e), "type": "TimeoutError"}} - print(json.dumps(error_data, indent=2)) - else: - console.print(f"[bold yellow]Timeout: {e}[/bold yellow]") - console.print("[dim]The task is still running. Use 'parallel-cli research poll ' to resume.[/dim]") - sys.exit(EXIT_TIMEOUT) + _exit_research_timeout(e, output_json) except RuntimeError as e: _handle_error(e, output_json=output_json) + except click.ClickException: + raise except Exception as e: _handle_error(e, output_json=output_json) @@ -1892,24 +1985,43 @@ def research_status(run_id: str, output_json: bool): @click.option("--timeout", type=int, default=3600, show_default=True, help="Max wait time in seconds") @click.option("--poll-interval", type=int, default=45, show_default=True, help="Seconds between status checks") @click.option( - "-o", "--output", "output_file", type=click.Path(), help="Save results (creates {name}.json and {name}.md)" + "-o", + "--output", + "output_base", + type=click.Path(), + default=None, + help=( + "Output base path; writes {base}.json (and {base}.md if the task used text schema). " + f"Default: ./{DEFAULT_RESEARCH_OUTPUT_DIR}/. Any .json/.md suffix is stripped." + ), ) -@click.option("--json", "output_json", is_flag=True, help="Output JSON to stdout") +@click.option("--force", is_flag=True, help="Overwrite existing output files") +@click.option("--json", "output_json", is_flag=True, help="Also print the result as JSON to stdout") def research_poll( run_id: str, timeout: int, poll_interval: int, - output_file: str | None, + output_base: str | None, + force: bool, output_json: bool, ): - """Poll an existing research task until completion. + """Poll an existing research task until completion and save the result. RUN_ID is the task identifier (e.g., trun_xxx). + + \b + Output: + Same as `research run`. Default base path: ./parallel-research/. + Override with -o NAME (writes NAME.json, plus NAME.md if the task was + created with text schema). Existing files are not overwritten unless + --force is passed. """ try: if not output_json: console.print(f"[bold cyan]Polling task: {run_id}[/bold cyan]") - console.print(f"[dim]Track progress: https://platform.parallel.ai/play/deep-research/{run_id}[/dim]\n") + console.print(f"[dim]Track progress: https://platform.parallel.ai/play/deep-research/{run_id}[/dim]") + planned_base = _resolve_research_base_path(output_base, run_id) + console.print(f"[dim]Will save to: {planned_base}.json (+.md for text schema)[/dim]\n") start_time = time.time() @@ -1929,17 +2041,16 @@ def on_status(status: str, run_id: str): source="cli", ) - _output_research_result(result, output_file, output_json) + _save_and_display_research(result, output_base, output_json, force=force) + except KeyboardInterrupt: + _exit_research_interrupted(run_id) except TimeoutError as e: - if output_json: - error_data = {"error": {"message": str(e), "type": "TimeoutError"}} - print(json.dumps(error_data, indent=2)) - else: - console.print(f"[bold yellow]Timeout: {e}[/bold yellow]") - sys.exit(EXIT_TIMEOUT) + _exit_research_timeout(e, output_json, suggest_poll=False) except RuntimeError as e: _handle_error(e, output_json=output_json) + except click.ClickException: + raise except Exception as e: _handle_error(e, output_json=output_json) @@ -2063,83 +2174,138 @@ def _content_to_markdown(content: Any, level: int = 1) -> str: return str(content) -def _output_research_result( +def _resolve_research_base_path(output_base: str | None, run_id: str) -> Path: + """Resolve the base path for research output files. + + Returns a Path with no .json/.md suffix. If `output_base` is None, defaults + to ./parallel-research/{run_id} so results don't pollute cwd. + + If `output_base` looks like a directory (trailing slash, or an existing + directory), append inside it so `-o outputs/` does the obvious + thing instead of writing `outputs.json`. Otherwise treat it as a base + filename, only stripping a trailing `.json`/`.md` so `-o report` and + `-o report.json` produce the same result. Other suffixes (e.g. `.v2`, + `.bak`) are preserved as part of the name. + """ + if not output_base: + return Path(DEFAULT_RESEARCH_OUTPUT_DIR) / run_id + + looks_like_dir = output_base.endswith(("/", os.sep)) or Path(output_base).is_dir() + if looks_like_dir: + return Path(output_base) / run_id + + base_path = Path(output_base) + if base_path.suffix.lower() in {".json", ".md"}: + base_path = base_path.with_suffix("") + return base_path + + +def _save_and_display_research( result: dict, - output_file: str | None, + output_base: str | None, output_json: bool, + force: bool = False, ): - """Output research result to console and/or files. + """Save the research result to disk and display a summary. + + Always writes {base}.json. Writes {base}.md as well when the task used + text schema (a markdown report). Auto-schema results stay JSON-only. - When saving to a file, creates two files from the base name: - - {name}.json: metadata and citations - - {name}.md: research content as markdown + Without --force, refuses to overwrite existing files. On write failure + (e.g. permission denied), falls back to /tmp/{run_id}.{ext} so the result + is never lost. """ output = result.get("output", {}) + run_id = result.get("run_id", "research") + + base_path = _resolve_research_base_path(output_base, run_id) + # Append rather than `.with_suffix(".json")` so unconventional bases like + # `report.v2` are preserved as `report.v2.json` (with_suffix would replace). + json_path = base_path.parent / f"{base_path.name}.json" + + # The SDK's response carries a `type` discriminator ("text" or "json"). + # Fall back to the requested `output_schema` we threaded through, then to + # a content-shape heuristic for older mocks/poll flows. + content = output.get("content") if isinstance(output, dict) else None + response_type = output.get("type") if isinstance(output, dict) else None + is_text_response = ( + response_type == "text" + or result.get("output_schema") == "text" + or (response_type is None and isinstance(content, str) and content != "") + ) + md_path = base_path.parent / f"{base_path.name}.md" if is_text_response else None + + output_payload = output.copy() if isinstance(output, dict) else output + if md_path is not None and isinstance(content, str): + # Move the markdown body to the .md sibling and reference it from JSON. + output_payload["content_file"] = md_path.name + output_payload.pop("content", None) + output_data = { - "run_id": result.get("run_id"), + "run_id": run_id, "interaction_id": result.get("interaction_id"), "result_url": result.get("result_url"), "status": result.get("status"), - "output": output.copy() if isinstance(output, dict) else output, + "output": output_payload, } - # Save to files if requested - if output_file: - from pathlib import Path - - # Strip any extension to get base name - base_path = Path(output_file) - if base_path.suffix: - base_path = base_path.with_suffix("") - - json_path = base_path.with_suffix(".json") - md_path = base_path.with_suffix(".md") - - # Extract content to markdown file - if isinstance(output, dict) and "content" in output: - content = output["content"] - content_text = _content_to_markdown(content) - - if content_text: - with open(md_path, "w") as f: - f.write(content_text) - console.print(f"[green]Content saved to:[/green] {md_path}") + targets = [(json_path, "json")] + ([(md_path, "md")] if md_path else []) + if not force: + existing = [p for p, _ in targets if p.exists()] + if existing: + lines = [] + for p, _ in targets: + lines.append(f" {p} {'(exists)' if p.exists() else '(new)'}") + raise click.ClickException( + "Refusing to overwrite existing output:\n" + "\n".join(lines) + "\nPass --force to overwrite." + ) - # Replace content in JSON with reference to markdown file - output_data["output"] = output_data["output"].copy() - output_data["output"]["content_file"] = md_path.name - del output_data["output"]["content"] + def _write_outputs(json_target: Path, md_target: Path | None) -> None: + if md_target is not None and isinstance(content, str): + md_target.parent.mkdir(parents=True, exist_ok=True) + md_target.write_text(content) + if not output_json: + console.print(f"[green]Content saved to:[/green] {md_target}") - with open(json_path, "w") as f: + json_target.parent.mkdir(parents=True, exist_ok=True) + with open(json_target, "w") as f: json.dump(output_data, f, indent=2, default=str) - console.print(f"[green]Metadata saved to:[/green] {json_path}") + if not output_json: + console.print(f"[green]Metadata saved to:[/green] {json_target}") + + try: + _write_outputs(json_path, md_path) + except OSError as e: + # Fall back to /tmp so a successful (and billed) API call is never lost. + tmp_dir = Path(tempfile.gettempdir()) + fallback_json = tmp_dir / f"{run_id}.json" + fallback_md = tmp_dir / f"{run_id}.md" if md_path else None + if md_path is not None: + output_data["output"]["content_file"] = fallback_md.name if fallback_md else None + if not output_json: + console.print(f"[yellow]Failed to write to {json_path.parent}: {e}. Falling back to {tmp_dir}.[/yellow]") + _write_outputs(fallback_json, fallback_md) - # Output to console if output_json: print(json.dumps(output_data, indent=2, default=str)) - else: - console.print("\n[bold green]Research Complete![/bold green]") - console.print(f"[dim]Task: {result.get('run_id')}[/dim]") - console.print(f"[dim]Interaction ID: {result.get('interaction_id')}[/dim]") - console.print(f"[dim]URL: {result.get('result_url')}[/dim]\n") + return - # Show executive summary if available - output = result.get("output", {}) - content = output.get("content") if isinstance(output, dict) else None - summary = _extract_executive_summary(content) if content else None + console.print("\n[bold green]Research Complete![/bold green]") + console.print(f"[dim]Task: {run_id}[/dim]") + console.print(f"[dim]Interaction ID: {result.get('interaction_id')}[/dim]") + console.print(f"[dim]URL: {result.get('result_url')}[/dim]\n") - if summary: - from rich.markdown import Markdown - from rich.panel import Panel + summary = _extract_executive_summary(content) if content else None + if summary: + from rich.markdown import Markdown + from rich.panel import Panel - console.print(Panel(Markdown(summary), title="Executive Summary", border_style="cyan")) - console.print() + console.print(Panel(Markdown(summary), title="Executive Summary", border_style="cyan")) + console.print() - if not output_file: - console.print("[dim]Use --output to save full results to a file, or --json to print to stdout[/dim]") - interaction_id = result.get("interaction_id") - if interaction_id: - console.print(f"[dim]Use '--previous-interaction-id {interaction_id}' to continue this research[/dim]") + interaction_id = result.get("interaction_id") + if interaction_id: + console.print(f"[dim]Use '--previous-interaction-id {interaction_id}' to continue this research[/dim]") # ============================================================================= diff --git a/parallel_web_tools/core/__init__.py b/parallel_web_tools/core/__init__.py index 2be24c2..69c4d23 100644 --- a/parallel_web_tools/core/__init__.py +++ b/parallel_web_tools/core/__init__.py @@ -49,6 +49,7 @@ ) from parallel_web_tools.core.research import ( RESEARCH_PROCESSORS, + OutputSchemaType, create_research_task, get_research_result, get_research_status, @@ -123,6 +124,7 @@ "run_enrichment_from_dict", # Research "RESEARCH_PROCESSORS", + "OutputSchemaType", "create_research_task", "get_research_result", "get_research_status", diff --git a/parallel_web_tools/core/research.py b/parallel_web_tools/core/research.py index 7cc6474..855ce55 100644 --- a/parallel_web_tools/core/research.py +++ b/parallel_web_tools/core/research.py @@ -9,12 +9,15 @@ from __future__ import annotations from collections.abc import Callable -from typing import Any +from typing import Any, Literal from parallel_web_tools.core.auth import create_client from parallel_web_tools.core.polling import poll_until from parallel_web_tools.core.user_agent import ClientSource +# Output schema types supported for deep research +OutputSchemaType = Literal["auto", "text"] + # Base URL for viewing results PLATFORM_BASE = "https://platform.parallel.ai" @@ -69,24 +72,51 @@ def _serialize_output(output: Any) -> dict[str, Any]: return {"raw": str(output)} +def _build_task_spec(output_schema: OutputSchemaType, text_description: str | None = None) -> Any: + """Build task_spec kwargs for the SDK based on output schema type. + + Returns None for auto schema (SDK default), or a TaskSpecParam for text. + `text_description` steers the markdown report and is only meaningful with + output_schema="text". + """ + if output_schema == "text": + from parallel.types import TaskSpecParam, TextSchemaParam + + # `type="text"` is the wire-format discriminator (required per the + # API's cURL example), even though the Python docs sometimes show it + # implicit — TextSchemaParam is a TypedDict and won't fill it in. + text_kwargs: dict[str, Any] = {"type": "text"} + if text_description: + text_kwargs["description"] = text_description + return TaskSpecParam(output_schema=TextSchemaParam(**text_kwargs)) + return None + + def create_research_task( query: str, processor: str = "pro-fast", api_key: str | None = None, source: ClientSource = "python", previous_interaction_id: str | None = None, + output_schema: OutputSchemaType = "auto", + text_description: str | None = None, ) -> dict[str, Any]: """Create a deep research task without waiting for results. Args: query: Research question or topic (max 15,000 chars). - processor: Processor tier (see RESEARCH_PROCESSORS). + processor: Processor tier (see RESEARCH_PROCESSORS). Auto/text schemas + yield deep-research-style outputs only on `pro` tiers and above. api_key: Optional API key. source: Client source identifier for User-Agent. previous_interaction_id: Interaction ID from a previous task to reuse as context. + output_schema: "auto" (default; API-chosen structured output) or + "text" (markdown report with inline citations). + text_description: Optional steering description for text-schema reports + (e.g. "Keep under 1000 words, focus on M&A activity"). Returns: - Dict with run_id, interaction_id, result_url, and other task metadata. + Dict with run_id, interaction_id, result_url, output_schema, and other metadata. """ client = create_client(api_key, source) @@ -96,6 +126,9 @@ def create_research_task( } if previous_interaction_id: create_kwargs["previous_interaction_id"] = previous_interaction_id + task_spec = _build_task_spec(output_schema, text_description) + if task_spec is not None: + create_kwargs["task_spec"] = task_spec task = client.task_run.create(**create_kwargs) @@ -105,6 +138,7 @@ def create_research_task( "result_url": f"{PLATFORM_BASE}/play/deep-research/{task.run_id}", "processor": processor, "status": getattr(task, "status", "pending"), + "output_schema": output_schema, } @@ -171,6 +205,7 @@ def _poll_until_complete( poll_interval: int, on_status: Callable[[str, str], None] | None, interaction_id: str | None = None, + output_schema: OutputSchemaType | None = None, ) -> dict[str, Any]: """Poll a research task until completion and return the result. @@ -182,6 +217,8 @@ def _poll_until_complete( poll_interval: Seconds between status checks. on_status: Optional callback called with (status, run_id) on each poll. interaction_id: Known interaction ID (updated from poll responses). + output_schema: Schema the task was created with, included in the result + so callers don't need to infer it from response shape. Returns: Dict with content and metadata. @@ -207,13 +244,19 @@ def fetch_result(): result = client.task_run.result(run_id=run_id) output = result.output if hasattr(result, "output") else {} output_data = _serialize_output(output) - return { + result_dict: dict[str, Any] = { "run_id": run_id, "interaction_id": poll_state["interaction_id"] or run_id, "result_url": result_url, "status": "completed", "output": output_data, } + # Note: this `output_schema` is the *requested* schema (caller intent), + # not the SDK's `TaskRunJsonOutput.output_schema` (which is server-set + # and only present for auto-mode runs). + if output_schema is not None: + result_dict["output_schema"] = output_schema + return result_dict def format_error(response, status): error = getattr(response, "error", None) or f"Task {status}" @@ -244,6 +287,8 @@ def run_research( on_status: Callable[[str, str], None] | None = None, source: ClientSource = "python", previous_interaction_id: str | None = None, + output_schema: OutputSchemaType = "auto", + text_description: str | None = None, ) -> dict[str, Any]: """Run deep research and wait for results. @@ -252,16 +297,20 @@ def run_research( Args: query: Research question or topic (max 15,000 chars). - processor: Processor tier (see RESEARCH_PROCESSORS). + processor: Processor tier (see RESEARCH_PROCESSORS). Auto/text schemas + yield deep-research-style outputs only on `pro` tiers and above. api_key: Optional API key. timeout: Maximum wait time in seconds (default: 3600 = 1 hour). poll_interval: Seconds between status checks (default: 45). on_status: Optional callback called with (status, run_id) on each poll. source: Client source identifier for User-Agent. previous_interaction_id: Interaction ID from a previous task to reuse as context. + output_schema: "auto" (default; API-chosen structured output) or + "text" (markdown report with inline citations). + text_description: Optional steering description for text-schema reports. Returns: - Dict with content and metadata. + Dict with content and metadata, including the requested output_schema. Raises: TimeoutError: If the task doesn't complete within timeout. @@ -275,6 +324,9 @@ def run_research( } if previous_interaction_id: create_kwargs["previous_interaction_id"] = previous_interaction_id + task_spec = _build_task_spec(output_schema, text_description) + if task_spec is not None: + create_kwargs["task_spec"] = task_spec task = client.task_run.create(**create_kwargs) run_id = task.run_id @@ -285,7 +337,14 @@ def run_research( on_status("created", run_id) return _poll_until_complete( - client, run_id, result_url, timeout, poll_interval, on_status, interaction_id=interaction_id + client, + run_id, + result_url, + timeout, + poll_interval, + on_status, + interaction_id=interaction_id, + output_schema=output_schema, ) @@ -299,7 +358,9 @@ def poll_research( ) -> dict[str, Any]: """Resume polling an existing research task. - Use this to reconnect to a task that was created earlier. + Use this to reconnect to a task that was created earlier. The original + output_schema is not known here (it was fixed at create time); the + consumer must infer it from response shape if it cares. Args: run_id: The task run ID to poll. diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..5d521ba --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,16 @@ +"""Shared pytest fixtures for the parallel-web-tools test suite.""" + +import pytest + + +@pytest.fixture(autouse=True) +def _isolate_cwd(tmp_path, monkeypatch): + """Run every test in a fresh tmp dir. + + The research CLI now auto-saves results to ./parallel-research/ by + default, so any test that exercises `research run` / `research poll` + without an explicit `-o` would otherwise drop files into the repo root. + Running every test from a tmp dir keeps the working tree clean and lets us + drop the per-test `monkeypatch.chdir(tmp_path)` boilerplate. + """ + monkeypatch.chdir(tmp_path) diff --git a/tests/test_cli.py b/tests/test_cli.py index 0b17eed..0218460 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1848,7 +1848,7 @@ def test_deploy_snowflake_missing_user(self, runner): class TestOutputResearchResultJsonPath: """Tests for _output_research_result JSON output path.""" - def test_json_output_to_stdout(self, runner): + def test_json_output_to_stdout(self, runner, tmp_path, monkeypatch): """Should output JSON to stdout via research run --json.""" with mock.patch("parallel_web_tools.cli.commands.run_research") as mock_run: mock_run.return_value = { @@ -2256,7 +2256,7 @@ def test_enrich_poll_json_clean_output(self, runner): assert len(output) == 1 assert output[0]["output"]["ceo"] == "CEO A" - def test_research_run_json_clean_output(self, runner): + def test_research_run_json_clean_output(self, runner, tmp_path, monkeypatch): """research run --json should produce clean parseable JSON.""" with mock.patch("parallel_web_tools.cli.commands.run_research") as mock_run: mock_run.return_value = { @@ -2294,7 +2294,7 @@ def test_research_run_no_wait_json_clean_output(self, runner): output = json.loads(result.output.strip()) assert output["run_id"] == "trun_nowait" - def test_research_poll_json_clean_output(self, runner): + def test_research_poll_json_clean_output(self, runner, tmp_path, monkeypatch): """research poll --json should produce clean parseable JSON.""" with mock.patch("parallel_web_tools.cli.commands.poll_research") as mock_poll: mock_poll.return_value = { diff --git a/tests/test_research.py b/tests/test_research.py index f5848eb..53c0d4d 100644 --- a/tests/test_research.py +++ b/tests/test_research.py @@ -9,6 +9,7 @@ from parallel_web_tools.cli.commands import _extract_executive_summary, main from parallel_web_tools.core.research import ( RESEARCH_PROCESSORS, + _build_task_spec, _serialize_output, create_research_task, get_research_result, @@ -66,6 +67,44 @@ def test_create_task_truncates_query(self, mock_parallel_client): call_args = mock_parallel_client.task_run.create.call_args assert len(call_args.kwargs["input"]) == 15000 + def test_create_task_auto_schema_no_task_spec(self, mock_parallel_client): + """Should not pass task_spec for auto schema (default).""" + mock_task = mock.MagicMock() + mock_task.run_id = "trun_123" + mock_parallel_client.task_run.create.return_value = mock_task + + create_research_task("What is AI?", output_schema="auto") + + call_args = mock_parallel_client.task_run.create.call_args + assert "task_spec" not in call_args.kwargs + + def test_create_task_text_schema(self, mock_parallel_client): + """Should pass task_spec with text schema when output_schema='text'.""" + mock_task = mock.MagicMock() + mock_task.run_id = "trun_123" + mock_parallel_client.task_run.create.return_value = mock_task + + create_research_task("What is AI?", output_schema="text") + + call_args = mock_parallel_client.task_run.create.call_args + assert "task_spec" in call_args.kwargs + task_spec = call_args.kwargs["task_spec"] + assert task_spec["output_schema"]["type"] == "text" + + +class TestBuildTaskSpec: + """Tests for _build_task_spec helper.""" + + def test_auto_returns_none(self): + """Should return None for auto schema.""" + assert _build_task_spec("auto") is None + + def test_text_returns_task_spec(self): + """Should return TaskSpecParam with TextSchemaParam for text schema.""" + result = _build_task_spec("text") + assert result is not None + assert result["output_schema"]["type"] == "text" + class TestGetResearchStatus: """Tests for get_research_status function.""" @@ -172,6 +211,52 @@ def test_run_research_failed(self, mock_parallel_client): with pytest.raises(RuntimeError, match="failed"): run_research("What is AI?", poll_interval=1) + def test_run_research_text_schema(self, mock_parallel_client): + """Should pass task_spec with text schema to SDK.""" + mock_task = mock.MagicMock() + mock_task.run_id = "trun_text" + mock_parallel_client.task_run.create.return_value = mock_task + + mock_status = mock.MagicMock() + mock_status.status = "completed" + mock_parallel_client.task_run.retrieve.return_value = mock_status + + mock_output = mock.MagicMock() + mock_output.model_dump.return_value = {"content": {"text": "Markdown report"}} + mock_result = mock.MagicMock() + mock_result.output = mock_output + mock_parallel_client.task_run.result.return_value = mock_result + + with mock.patch("parallel_web_tools.core.polling.time.sleep"): + result = run_research("What is AI?", poll_interval=1, timeout=10, output_schema="text") + + assert result["status"] == "completed" + call_args = mock_parallel_client.task_run.create.call_args + assert "task_spec" in call_args.kwargs + assert call_args.kwargs["task_spec"]["output_schema"]["type"] == "text" + + def test_run_research_auto_schema_no_task_spec(self, mock_parallel_client): + """Should not pass task_spec for auto schema.""" + mock_task = mock.MagicMock() + mock_task.run_id = "trun_auto" + mock_parallel_client.task_run.create.return_value = mock_task + + mock_status = mock.MagicMock() + mock_status.status = "completed" + mock_parallel_client.task_run.retrieve.return_value = mock_status + + mock_output = mock.MagicMock() + mock_output.model_dump.return_value = {"content": {"text": "JSON result"}} + mock_result = mock.MagicMock() + mock_result.output = mock_output + mock_parallel_client.task_run.result.return_value = mock_result + + with mock.patch("parallel_web_tools.core.polling.time.sleep"): + run_research("What is AI?", poll_interval=1, timeout=10, output_schema="auto") + + call_args = mock_parallel_client.task_run.create.call_args + assert "task_spec" not in call_args.kwargs + def test_run_research_on_status_callback(self, mock_parallel_client): """Should call on_status callback during polling.""" mock_task = mock.MagicMock() @@ -278,6 +363,7 @@ def test_research_run_help(self, runner): assert "--processor" in result.output assert "--timeout" in result.output assert "--no-wait" in result.output + assert "--text" in result.output assert "--output" in result.output def test_research_run_no_query(self, runner): @@ -349,7 +435,7 @@ def test_research_run_json_output(self, runner): output = json.loads("\n".join(json_lines)) assert output["run_id"] == "trun_123" - def test_research_run_with_wait(self, runner): + def test_research_run_with_wait(self, runner, tmp_path, monkeypatch): """Should poll and return results without --no-wait.""" with mock.patch("parallel_web_tools.cli.commands.run_research") as mock_run: mock_run.return_value = { @@ -392,7 +478,7 @@ def test_research_run_with_previous_interaction_id_no_wait(self, runner): call_kwargs = mock_create.call_args assert call_kwargs.kwargs.get("previous_interaction_id") == "trun_123" - def test_research_run_with_previous_interaction_id_wait(self, runner): + def test_research_run_with_previous_interaction_id_wait(self, runner, tmp_path, monkeypatch): """Should pass previous_interaction_id to run_research.""" with mock.patch("parallel_web_tools.cli.commands.run_research") as mock_run: mock_run.return_value = { @@ -463,6 +549,73 @@ def test_research_run_json_includes_interaction_id(self, runner): output = json.loads("\n".join(json_lines)) assert output["interaction_id"] == "trun_int_abc" + def test_research_run_text_flag(self, runner, tmp_path, monkeypatch): + """Should pass output_schema='text' when --text is used.""" + with mock.patch("parallel_web_tools.cli.commands.run_research") as mock_run: + mock_run.return_value = { + "run_id": "trun_text", + "result_url": "https://platform.parallel.ai/play/deep-research/trun_text", + "status": "completed", + "output": { + "content": "# Markdown Report\n\nThis is a markdown report with enough text to be meaningful.\n\n## Section\n\nBody." + }, + } + + result = runner.invoke(main, ["research", "run", "What is AI?", "--text", "--poll-interval", "1"]) + + assert result.exit_code == 0 + mock_run.assert_called_once() + call_kwargs = mock_run.call_args.kwargs + assert call_kwargs["output_schema"] == "text" + + def test_research_run_default_auto_schema(self, runner, tmp_path, monkeypatch): + """Should pass output_schema='auto' by default (no --text).""" + with mock.patch("parallel_web_tools.cli.commands.run_research") as mock_run: + mock_run.return_value = { + "run_id": "trun_auto", + "result_url": "https://platform.parallel.ai/play/deep-research/trun_auto", + "status": "completed", + "output": {"content": {"text": "Structured JSON result"}}, + } + + result = runner.invoke(main, ["research", "run", "What is AI?", "--poll-interval", "1"]) + + assert result.exit_code == 0 + call_kwargs = mock_run.call_args.kwargs + assert call_kwargs["output_schema"] == "auto" + + def test_research_run_text_no_wait(self, runner): + """Should pass output_schema when using --text with --no-wait.""" + with mock.patch("parallel_web_tools.cli.commands.create_research_task") as mock_create: + mock_create.return_value = { + "run_id": "trun_text_nw", + "result_url": "https://platform.parallel.ai/play/deep-research/trun_text_nw", + "status": "pending", + } + + result = runner.invoke(main, ["research", "run", "What is AI?", "--text", "--no-wait"]) + + assert result.exit_code == 0 + mock_create.assert_called_once() + call_kwargs = mock_create.call_args.kwargs + assert call_kwargs["output_schema"] == "text" + + def test_research_run_text_in_help(self, runner): + """Should show --text flag in help.""" + result = runner.invoke(main, ["research", "run", "--help"]) + assert result.exit_code == 0 + assert "--text" in result.output + + def test_research_run_dry_run_shows_schema(self, runner): + """Should show output_schema in dry run output.""" + result = runner.invoke(main, ["research", "run", "What is AI?", "--dry-run", "--text"]) + assert result.exit_code == 0 + assert "text" in result.output + + result = runner.invoke(main, ["research", "run", "What is AI?", "--dry-run"]) + assert result.exit_code == 0 + assert "auto" in result.output + class TestResearchStatusCommand: """Tests for the research status command.""" @@ -514,7 +667,7 @@ def test_research_poll_help(self, runner): assert "RUN_ID" in result.output assert "--timeout" in result.output - def test_research_poll(self, runner): + def test_research_poll(self, runner, tmp_path, monkeypatch): """Should poll and return results.""" with mock.patch("parallel_web_tools.cli.commands.poll_research") as mock_poll: mock_poll.return_value = { @@ -545,9 +698,8 @@ def test_research_processors(self, runner): class TestResearchOutputFile: """Tests for saving research results to files.""" - def test_research_save_to_file_with_content(self, runner, tmp_path): - """Should save content to separate markdown file.""" - output_base = tmp_path / "report" + def test_default_saves_json_only(self, runner, tmp_path): + """Default (auto schema) should save only .json.""" json_file = tmp_path / "report.json" md_file = tmp_path / "report.md" @@ -556,154 +708,228 @@ def test_research_save_to_file_with_content(self, runner, tmp_path): "run_id": "trun_123", "result_url": "https://platform.parallel.ai/play/deep-research/trun_123", "status": "completed", - "output": {"content": {"text": "# Research findings\n\nThis is the report."}, "basis": []}, + "output": {"content": {"market_size": "10B"}, "basis": []}, } result = runner.invoke( main, - ["research", "run", "What is AI?", "-o", str(output_base), "--poll-interval", "1"], + ["research", "run", "What is AI?", "-o", str(tmp_path / "report"), "--poll-interval", "1"], ) assert result.exit_code == 0 - - # Check JSON file has output with content_file reference assert json_file.exists() + assert not md_file.exists() + data = json.loads(json_file.read_text()) assert data["run_id"] == "trun_123" - assert data["status"] == "completed" - assert "output" in data - assert "content" not in data["output"] - assert data["output"]["content_file"] == "report.md" - assert data["output"]["basis"] == [] + assert data["output"]["content"]["market_size"] == "10B" - # Check markdown file has content - assert md_file.exists() - assert md_file.read_text() == "# Research findings\n\nThis is the report." - - def test_research_save_to_file_strips_extension(self, runner, tmp_path): - """Should strip extension from output path and create both files.""" - output_with_ext = tmp_path / "report.json" + def test_text_saves_json_and_md(self, runner, tmp_path): + """--text should save both .json (with content_file ref) and .md.""" json_file = tmp_path / "report.json" md_file = tmp_path / "report.md" with mock.patch("parallel_web_tools.cli.commands.run_research") as mock_run: mock_run.return_value = { - "run_id": "trun_ext", - "result_url": "https://platform.parallel.ai/play/deep-research/trun_ext", + "run_id": "trun_text", + "result_url": "https://platform.parallel.ai/play/deep-research/trun_text", "status": "completed", - "output": {"content": "Content here"}, + "output": {"content": "# Report\n\nFindings here.", "basis": [{"field": "content"}]}, } result = runner.invoke( main, - ["research", "run", "Question?", "-o", str(output_with_ext), "--poll-interval", "1"], + ["research", "run", "Question?", "--text", "-o", str(tmp_path / "report"), "--poll-interval", "1"], ) assert result.exit_code == 0 + + # Both files exist assert json_file.exists() assert md_file.exists() - def test_research_save_to_file_string_content(self, runner, tmp_path): - """Should handle string content directly.""" - output_base = tmp_path / "report" + # .md has the content + assert md_file.read_text() == "# Report\n\nFindings here." + + # .json references .md and doesn't duplicate content + data = json.loads(json_file.read_text()) + assert data["output"]["content_file"] == "report.md" + assert "content" not in data["output"] + assert data["output"]["basis"] == [{"field": "content"}] + + def test_output_strips_extension_from_path(self, runner, tmp_path): + """-o with extension should still produce correct files.""" json_file = tmp_path / "report.json" md_file = tmp_path / "report.md" with mock.patch("parallel_web_tools.cli.commands.run_research") as mock_run: mock_run.return_value = { - "run_id": "trun_456", - "result_url": "https://platform.parallel.ai/play/deep-research/trun_456", + "run_id": "trun_ext", + "result_url": "https://platform.parallel.ai/play/deep-research/trun_ext", "status": "completed", - "output": {"content": "Plain string content"}, + "output": {"content": "Content here"}, } result = runner.invoke( main, - ["research", "run", "Question?", "-o", str(output_base), "--poll-interval", "1"], + ["research", "run", "Question?", "--text", "-o", str(md_file), "--poll-interval", "1"], ) assert result.exit_code == 0 - - # Check markdown file has content + assert json_file.exists() assert md_file.exists() - assert md_file.read_text() == "Plain string content" - # Check JSON references markdown file - data = json.loads(json_file.read_text()) - assert data["output"]["content_file"] == "report.md" + def test_default_writes_to_parallel_research_subdir(self, runner, tmp_path, monkeypatch): + """Without -o, results go under ./parallel-research/.json so cwd stays clean.""" + with mock.patch("parallel_web_tools.cli.commands.run_research") as mock_run: + mock_run.return_value = { + "run_id": "trun_abc", + "result_url": "https://platform.parallel.ai/play/deep-research/trun_abc", + "status": "completed", + "output_schema": "auto", + "output": {"content": {"text": "Result"}}, + } - def test_research_save_to_file_no_content(self, runner, tmp_path): - """Should handle output without content field.""" - output_base = tmp_path / "report" - json_file = tmp_path / "report.json" - md_file = tmp_path / "report.md" + result = runner.invoke(main, ["research", "run", "Question?", "--poll-interval", "1"]) + assert result.exit_code == 0 + # New default: subdirectory, not cwd directly + assert (tmp_path / "parallel-research" / "trun_abc.json").exists() + assert not (tmp_path / "parallel-research" / "trun_abc.md").exists() + # And we don't pollute cwd itself + assert not (tmp_path / "trun_abc.json").exists() + + def test_default_text_writes_both_files_to_subdir(self, runner, tmp_path, monkeypatch): + """--text without -o writes both .json and .md under ./parallel-research/.""" with mock.patch("parallel_web_tools.cli.commands.run_research") as mock_run: mock_run.return_value = { - "run_id": "trun_789", - "result_url": "https://platform.parallel.ai/play/deep-research/trun_789", + "run_id": "trun_xyz", + "result_url": "https://platform.parallel.ai/play/deep-research/trun_xyz", "status": "completed", - "output": {"other_field": "some value"}, + "output_schema": "text", + "output": {"content": "Markdown content here"}, + } + + result = runner.invoke(main, ["research", "run", "Question?", "--text", "--poll-interval", "1"]) + + assert result.exit_code == 0 + assert (tmp_path / "parallel-research" / "trun_xyz.json").exists() + assert (tmp_path / "parallel-research" / "trun_xyz.md").exists() + + def test_refuses_overwrite_without_force(self, runner, tmp_path): + """Existing output files should error out unless --force is passed.""" + target = tmp_path / "report.json" + target.write_text('{"existing": true}') + + with mock.patch("parallel_web_tools.cli.commands.run_research") as mock_run: + mock_run.return_value = { + "run_id": "trun_overwrite", + "result_url": "https://platform.parallel.ai/play/deep-research/trun_overwrite", + "status": "completed", + "output_schema": "auto", + "output": {"content": {"x": 1}}, } result = runner.invoke( main, - ["research", "run", "Question?", "-o", str(output_base), "--poll-interval", "1"], + ["research", "run", "Q?", "-o", str(tmp_path / "report"), "--poll-interval", "1"], + ) + + assert result.exit_code != 0 + assert "Refusing to overwrite" in result.output + # Existing file untouched + assert json.loads(target.read_text()) == {"existing": True} + + def test_force_overwrites(self, runner, tmp_path): + """--force should clobber existing files.""" + target = tmp_path / "report.json" + target.write_text('{"existing": true}') + + with mock.patch("parallel_web_tools.cli.commands.run_research") as mock_run: + mock_run.return_value = { + "run_id": "trun_overwrite", + "result_url": "https://platform.parallel.ai/play/deep-research/trun_overwrite", + "status": "completed", + "output_schema": "auto", + "output": {"content": {"x": 1}}, + } + + result = runner.invoke( + main, + ["research", "run", "Q?", "-o", str(tmp_path / "report"), "--force", "--poll-interval", "1"], ) assert result.exit_code == 0 + assert json.loads(target.read_text())["run_id"] == "trun_overwrite" - # No markdown file should be created - assert not md_file.exists() + def test_creates_parent_directories(self, runner, tmp_path): + """-o pointing into a missing subdirectory should mkdir -p, not crash.""" + with mock.patch("parallel_web_tools.cli.commands.run_research") as mock_run: + mock_run.return_value = { + "run_id": "trun_mkdir", + "result_url": "https://platform.parallel.ai/play/deep-research/trun_mkdir", + "status": "completed", + "output_schema": "auto", + "output": {"content": {"x": 1}}, + } - # JSON should have original output - data = json.loads(json_file.read_text()) - assert data["output"]["other_field"] == "some value" - assert "content_file" not in data["output"] + base = tmp_path / "missing" / "deeply" / "nested" / "report" + result = runner.invoke(main, ["research", "run", "Q?", "-o", str(base), "--poll-interval", "1"]) - def test_research_save_to_file_structured_content(self, runner, tmp_path): - """Should convert structured dict content to markdown.""" - output_base = tmp_path / "report" - json_file = tmp_path / "report.json" - md_file = tmp_path / "report.md" + assert result.exit_code == 0 + assert (tmp_path / "missing" / "deeply" / "nested" / "report.json").exists() + def test_only_strips_json_md_suffixes(self, runner, tmp_path): + """-o report.bak should preserve .bak; we only recognize .json/.md as our own.""" with mock.patch("parallel_web_tools.cli.commands.run_research") as mock_run: mock_run.return_value = { - "run_id": "trun_structured", - "result_url": "https://platform.parallel.ai/play/deep-research/trun_structured", + "run_id": "trun_suffix", + "result_url": "https://platform.parallel.ai/play/deep-research/trun_suffix", "status": "completed", - "output": { - "content": { - "summary": "This is the summary.", - "key_findings": ["Finding 1", "Finding 2"], - "detailed_analysis": {"section_one": "Details here."}, - }, - "basis": [], - }, + "output_schema": "auto", + "output": {"content": {"x": 1}}, } result = runner.invoke( main, - ["research", "run", "Question?", "-o", str(output_base), "--poll-interval", "1"], + ["research", "run", "Q?", "-o", str(tmp_path / "report.bak"), "--poll-interval", "1"], ) assert result.exit_code == 0 + # .bak is preserved as part of the base name; we append .json + assert (tmp_path / "report.bak.json").exists() + assert not (tmp_path / "report.json").exists() - # Markdown file should be created - assert md_file.exists() - md_content = md_file.read_text() + def test_strips_json_md_suffixes(self, runner, tmp_path): + """-o report.json and -o report should produce the same result.""" + with mock.patch("parallel_web_tools.cli.commands.run_research") as mock_run: + mock_run.return_value = { + "run_id": "trun_strip", + "result_url": "https://platform.parallel.ai/play/deep-research/trun_strip", + "status": "completed", + "output_schema": "text", + "output": {"content": "# Report"}, + } - # Check markdown has sections - assert "# Summary" in md_content - assert "This is the summary." in md_content - assert "# Key Findings" in md_content - assert "- Finding 1" in md_content - assert "# Detailed Analysis" in md_content + # Passing .json + result = runner.invoke( + main, + [ + "research", + "run", + "Q?", + "-o", + str(tmp_path / "report.json"), + "--text", + "--poll-interval", + "1", + ], + ) - # JSON should reference markdown file - data = json.loads(json_file.read_text()) - assert data["output"]["content_file"] == "report.md" - assert "content" not in data["output"] + assert result.exit_code == 0 + # Both files exist — .json stripped from -o, then re-appended + assert (tmp_path / "report.json").exists() + assert (tmp_path / "report.md").exists() class TestSerializeOutput: @@ -858,9 +1084,9 @@ def test_non_string_non_dict(self): class TestResearchOutputExecutiveSummary: - """Tests that the executive summary is printed to console.""" + """Tests that the executive summary is always printed to console.""" - def test_research_run_prints_executive_summary(self, runner): + def test_research_run_prints_executive_summary(self, runner, tmp_path, monkeypatch): """Should print executive summary when research completes.""" with mock.patch("parallel_web_tools.cli.commands.run_research") as mock_run: mock_run.return_value = { @@ -872,14 +1098,14 @@ def test_research_run_prints_executive_summary(self, runner): }, } - result = runner.invoke(main, ["research", "run", "What is AI?", "--poll-interval", "1"]) + result = runner.invoke(main, ["research", "run", "What is AI?", "--text", "--poll-interval", "1"]) assert result.exit_code == 0 assert "Research Complete" in result.output assert "Executive Summary" in result.output assert "executive summary of the research" in result.output - def test_research_poll_prints_executive_summary(self, runner): + def test_research_poll_prints_executive_summary(self, runner, tmp_path, monkeypatch): """Should print executive summary when polling completes.""" with mock.patch("parallel_web_tools.cli.commands.poll_research") as mock_poll: mock_poll.return_value = { @@ -897,7 +1123,7 @@ def test_research_poll_prints_executive_summary(self, runner): assert "Executive Summary" in result.output assert "substantial executive summary" in result.output - def test_no_summary_when_content_missing(self, runner): + def test_no_summary_when_content_missing(self, runner, tmp_path, monkeypatch): """Should not crash when content is missing.""" with mock.patch("parallel_web_tools.cli.commands.run_research") as mock_run: mock_run.return_value = { @@ -913,19 +1139,19 @@ def test_no_summary_when_content_missing(self, runner): assert "Research Complete" in result.output assert "Executive Summary" not in result.output - def test_no_summary_for_json_output(self, runner): - """Should not print summary panel when --json is used.""" + def test_summary_shown_with_auto_schema(self, runner, tmp_path, monkeypatch): + """Should print summary for auto schema (structured content).""" with mock.patch("parallel_web_tools.cli.commands.run_research") as mock_run: mock_run.return_value = { "run_id": "trun_json", "result_url": "https://platform.parallel.ai/play/deep-research/trun_json", "status": "completed", "output": { - "content": "# Report\n\nThis is a long executive summary for testing.\n\n## Section\n\nBody." + "content": {"summary": "This is a structured summary for testing the executive summary display."} }, } - result = runner.invoke(main, ["research", "run", "What is AI?", "--poll-interval", "1", "--json"]) + result = runner.invoke(main, ["research", "run", "What is AI?", "--poll-interval", "1"]) assert result.exit_code == 0 - assert "Executive Summary" not in result.output + assert "Executive Summary" in result.output