From 474ebc0e7e6c919eb937a8d736ba9ed4b65ce609 Mon Sep 17 00:00:00 2001 From: Adam Hevenor Date: Fri, 13 Feb 2026 09:34:16 -0700 Subject: [PATCH] Add dual output mode (human/plain) with TTY auto-detection Adds a global --output/-o flag that switches between rich table output (human) and pipe-delimited data-only output (plain). Auto-detects based on TTY when not specified, so piping to other tools automatically gets clean, parseable output. Also removes 80-char content truncation from list and search commands. Co-Authored-By: Claude Opus 4.6 --- src/tpuff/cli.py | 11 +- src/tpuff/commands/get.py | 25 +++-- src/tpuff/commands/list.py | 195 ++++++++++++++++++++++------------- src/tpuff/commands/schema.py | 64 ++++++++---- src/tpuff/commands/search.py | 68 ++++++------ src/tpuff/utils/output.py | 60 +++++++++++ 6 files changed, 291 insertions(+), 132 deletions(-) create mode 100644 src/tpuff/utils/output.py diff --git a/src/tpuff/cli.py b/src/tpuff/cli.py index aebfd5f..531437b 100644 --- a/src/tpuff/cli.py +++ b/src/tpuff/cli.py @@ -10,6 +10,7 @@ from tpuff.commands.list import list_cmd from tpuff.commands.schema import schema from tpuff.commands.search import search +from tpuff.utils.output import resolve_output_mode # Context settings to enable -h as help alias for all commands CONTEXT_SETTINGS = {"help_option_names": ["-h", "--help"]} @@ -18,11 +19,19 @@ @click.group(context_settings=CONTEXT_SETTINGS) @click.version_option(version=__version__, prog_name="tpuff") @click.option("--debug", is_flag=True, help="Enable debug output") +@click.option( + "-o", + "--output", + type=click.Choice(["human", "plain"]), + default=None, + help="Output format: human (rich tables) or plain (pipe-delimited). Auto-detects TTY if omitted.", +) @click.pass_context -def cli(ctx: click.Context, debug: bool) -> None: +def cli(ctx: click.Context, debug: bool, output: str | None) -> None: """tpuff - CLI tool for Turbopuffer vector database.""" ctx.ensure_object(dict) ctx.obj["debug"] = debug + ctx.obj["output_mode"] = resolve_output_mode(output) # Register commands diff --git a/src/tpuff/commands/get.py b/src/tpuff/commands/get.py index 3991576..98ad355 100644 --- a/src/tpuff/commands/get.py +++ b/src/tpuff/commands/get.py @@ -8,6 +8,7 @@ from tpuff.client import get_namespace from tpuff.utils.debug import debug_log +from tpuff.utils.output import is_plain, status_print console = Console() @@ -24,8 +25,14 @@ def get( region: str | None, ) -> None: """Get a document by ID from a namespace.""" + plain = is_plain(ctx) + try: - console.print(f"\n[bold]Querying document with ID: {id} from namespace: {namespace}[/bold]\n") + status_print( + ctx, + f"\n[bold]Querying document with ID: {id} from namespace: {namespace}[/bold]\n", + console, + ) # Get namespace reference ns = get_namespace(namespace, region) @@ -64,14 +71,20 @@ def get( else: doc_dict = {"id": getattr(doc, "id", "N/A")} - # Display document - console.print("[cyan]Document:[/cyan]") - console.print(json.dumps(doc_dict, indent=2, default=str)) + if plain: + # Plain mode: raw JSON only + click.echo(json.dumps(doc_dict, default=str)) + else: + # Display document + console.print("[cyan]Document:[/cyan]") + console.print(json.dumps(doc_dict, indent=2, default=str)) # Show performance info if hasattr(result, "performance") and result.performance: - console.print( - f"\n[dim]Query took {result.performance.query_execution_ms:.2f}ms[/dim]" + status_print( + ctx, + f"\n[dim]Query took {result.performance.query_execution_ms:.2f}ms[/dim]", + console, ) except Exception as e: diff --git a/src/tpuff/commands/list.py b/src/tpuff/commands/list.py index a8f3a00..bb8b027 100644 --- a/src/tpuff/commands/list.py +++ b/src/tpuff/commands/list.py @@ -16,6 +16,7 @@ get_index_status, get_unindexed_bytes, ) +from tpuff.utils.output import is_plain, print_table_plain, status_print console = Console() @@ -33,10 +34,10 @@ def format_bytes(bytes_count: int) -> str: return f"{bytes_count:.2f} {sizes[i]}" -def format_updated_at(timestamp: str | datetime | None) -> str: +def format_updated_at(timestamp: str | datetime | None, plain: bool = False) -> str: """Format timestamp smartly: time if today, date otherwise.""" if timestamp is None: - return "[dim]N/A[/dim]" + return "N/A" if plain else "[dim]N/A[/dim]" try: # Handle datetime objects directly @@ -58,17 +59,22 @@ def format_updated_at(timestamp: str | datetime | None) -> str: else: return date.strftime("%b %-d, %Y") except Exception: - return str(timestamp) if timestamp else "[dim]N/A[/dim]" + if timestamp: + return str(timestamp) + return "N/A" if plain else "[dim]N/A[/dim]" -def format_recall(recall_data) -> str: +def format_recall(recall_data, plain: bool = False) -> str: """Format recall as a color-coded percentage.""" if not recall_data: - return "[dim]N/A[/dim]" + return "N/A" if plain else "[dim]N/A[/dim]" percentage = recall_data.avg_recall * 100 display_value = f"{percentage:.1f}%" + if plain: + return display_value + if recall_data.avg_recall > 0.95: return f"[green]{display_value}[/green]" elif recall_data.avg_recall > 0.8: @@ -104,12 +110,17 @@ def extract_vector_info(schema: dict) -> dict | None: def display_namespace_documents( - namespace: str, top_k: int, region: str | None = None + ctx: click.Context, namespace: str, top_k: int, region: str | None = None ) -> None: """List documents in a specific namespace.""" + plain = is_plain(ctx) ns = get_namespace(namespace, region) - console.print(f"\n[bold]Querying namespace: {namespace} (top {top_k} results)[/bold]\n") + status_print( + ctx, + f"\n[bold]Querying namespace: {namespace} (top {top_k} results)[/bold]\n", + console, + ) # Get namespace metadata to extract schema metadata = ns.metadata() @@ -124,8 +135,10 @@ def display_namespace_documents( console.print("[red]Error: No vector attribute found in namespace schema[/red]") sys.exit(1) - console.print( - f"[dim]Using {vector_info['dimensions']}-dimensional zero vector for query[/dim]\n" + status_print( + ctx, + f"[dim]Using {vector_info['dimensions']}-dimensional zero vector for query[/dim]\n", + console, ) # Create zero vector @@ -153,14 +166,10 @@ def display_namespace_documents( console.print("No documents found in namespace") return - console.print(f"[bold]Found {len(rows)} document(s):[/bold]\n") + status_print(ctx, f"[bold]Found {len(rows)} document(s):[/bold]\n", console) - # Create table for results - table = Table(show_header=True, header_style="cyan") - table.add_column("ID") - table.add_column("Contents") - - # Add rows to table + # Collect row data + table_rows = [] for row in rows: # Get the row as a dict using model_dump() or fallback if hasattr(row, "model_dump"): @@ -179,30 +188,36 @@ def display_namespace_documents( if key not in exclude_keys and not key.startswith("_"): contents[key] = value - # Stringify and truncate contents contents_str = json.dumps(contents, default=str) - max_length = 80 - display_contents = ( - contents_str[:max_length] + "..." if len(contents_str) > max_length else contents_str - ) + table_rows.append([str(row_id), contents_str]) - table.add_row(str(row_id), display_contents) - - console.print(table) + if plain: + print_table_plain(["ID", "Contents"], table_rows) + else: + table = Table(show_header=True, header_style="cyan") + table.add_column("ID") + table.add_column("Contents") + for r in table_rows: + table.add_row(*r) + console.print(table) # Show performance info if available if hasattr(result, "performance") and result.performance: - console.print( - f"\n[dim]Query took {result.performance.query_execution_ms:.2f}ms[/dim]" + status_print( + ctx, + f"\n[dim]Query took {result.performance.query_execution_ms:.2f}ms[/dim]", + console, ) def display_namespaces( + ctx: click.Context, all_regions: bool = False, region: str | None = None, include_recall: bool = False, ) -> None: """List all namespaces.""" + plain = is_plain(ctx) namespaces_with_metadata = fetch_namespaces_with_metadata( all_regions=all_regions, region=region, @@ -213,7 +228,11 @@ def display_namespaces( console.print("No namespaces found") return - console.print(f"\n[bold]Found {len(namespaces_with_metadata)} namespace(s):[/bold]\n") + status_print( + ctx, + f"\n[bold]Found {len(namespaces_with_metadata)} namespace(s):[/bold]\n", + console, + ) # Sort by updated_at in descending order (most recent first) def sort_key(item: NamespaceWithMetadata): @@ -229,62 +248,90 @@ def sort_key(item: NamespaceWithMetadata): namespaces_with_metadata.sort(key=sort_key, reverse=True) - # Create table with conditional region and recall columns - table = Table(show_header=True, header_style="cyan") - table.add_column("Namespace") + # Build headers + headers = ["Namespace"] if all_regions: - table.add_column("Region") - table.add_column("Rows") - table.add_column("Logical Bytes") - table.add_column("Index Status") - table.add_column("Unindexed Bytes") + headers.append("Region") + headers.extend(["Rows", "Logical Bytes", "Index Status", "Unindexed Bytes"]) if include_recall: - table.add_column("Recall") - table.add_column("Updated") + headers.append("Recall") + headers.append("Updated") - # Add rows to table + # Collect row data + table_rows = [] for item in namespaces_with_metadata: if item.metadata: index_status = get_index_status(item.metadata) - index_status_display = ( - "[green]up-to-date[/green]" - if index_status == "up-to-date" - else "[red]updating[/red]" - ) - unindexed = get_unindexed_bytes(item.metadata) - unindexed_display = ( - f"[red]{format_bytes(unindexed)}[/red]" - if unindexed > 0 - else format_bytes(0) - ) - - row = [f"[bold]{item.namespace_id}[/bold]"] - if all_regions and item.region: - row.append(f"[dim]{item.region}[/dim]") - row.extend([ - f"{item.metadata.approx_row_count:,}", - format_bytes(item.metadata.approx_logical_bytes), - index_status_display, - unindexed_display, - ]) - if include_recall: - row.append(format_recall(item.recall)) - row.append(format_updated_at(item.metadata.updated_at)) - - table.add_row(*row) + + if plain: + row = [item.namespace_id] + if all_regions: + row.append(item.region or "") + row.extend([ + f"{item.metadata.approx_row_count:,}", + format_bytes(item.metadata.approx_logical_bytes), + index_status, + format_bytes(unindexed), + ]) + if include_recall: + row.append(format_recall(item.recall, plain=True)) + row.append(format_updated_at(item.metadata.updated_at, plain=True)) + else: + index_status_display = ( + "[green]up-to-date[/green]" + if index_status == "up-to-date" + else "[red]updating[/red]" + ) + unindexed_display = ( + f"[red]{format_bytes(unindexed)}[/red]" + if unindexed > 0 + else format_bytes(0) + ) + + row = [f"[bold]{item.namespace_id}[/bold]"] + if all_regions and item.region: + row.append(f"[dim]{item.region}[/dim]") + row.extend([ + f"{item.metadata.approx_row_count:,}", + format_bytes(item.metadata.approx_logical_bytes), + index_status_display, + unindexed_display, + ]) + if include_recall: + row.append(format_recall(item.recall)) + row.append(format_updated_at(item.metadata.updated_at)) + + table_rows.append(row) else: - row = [f"[bold]{item.namespace_id}[/bold]"] - if all_regions and item.region: - row.append(f"[dim]{item.region}[/dim]") - row.extend(["[dim]N/A[/dim]"] * 4) - if include_recall: + if plain: + row = [item.namespace_id] + if all_regions: + row.append(item.region or "") + row.extend(["N/A"] * 4) + if include_recall: + row.append("N/A") + row.append("N/A") + else: + row = [f"[bold]{item.namespace_id}[/bold]"] + if all_regions and item.region: + row.append(f"[dim]{item.region}[/dim]") + row.extend(["[dim]N/A[/dim]"] * 4) + if include_recall: + row.append("[dim]N/A[/dim]") row.append("[dim]N/A[/dim]") - row.append("[dim]N/A[/dim]") - table.add_row(*row) + table_rows.append(row) - console.print(table) + if plain: + print_table_plain(headers, table_rows) + else: + table = Table(show_header=True, header_style="cyan") + for h in headers: + table.add_column(h) + for r in table_rows: + table.add_row(*r) + console.print(table) @click.command("list", context_settings={"help_option_names": ["-h", "--help"]}) @@ -327,10 +374,10 @@ def list_cmd( ) sys.exit(1) - display_namespace_documents(namespace, top_k, region) + display_namespace_documents(ctx, namespace, top_k, region) else: # List all namespaces - display_namespaces(all_regions, region, include_recall) + display_namespaces(ctx, all_regions, region, include_recall) except Exception as e: console.print(f"[red]Error: {e}[/red]") sys.exit(1) diff --git a/src/tpuff/commands/schema.py b/src/tpuff/commands/schema.py index 28e77e9..4f3e54f 100644 --- a/src/tpuff/commands/schema.py +++ b/src/tpuff/commands/schema.py @@ -10,6 +10,7 @@ from rich.table import Table from tpuff.client import get_namespace, get_turbopuffer_client +from tpuff.utils.output import is_plain, print_table_plain console = Console() @@ -173,6 +174,10 @@ def schema_get( raw: bool, ) -> None: """Display the schema for a namespace.""" + # Plain output mode implies raw JSON + plain = is_plain(ctx) + use_raw = raw or plain + try: ns = get_namespace(namespace, region) metadata = ns.metadata() @@ -181,7 +186,7 @@ def schema_get( schema_data = metadata.schema if hasattr(metadata, "schema") else {} if not schema_data: - if raw: + if use_raw: print("{}") else: console.print(f"[yellow]No schema found for namespace: {namespace}[/yellow]") @@ -198,14 +203,14 @@ def schema_get( else: schema_dict[attr_name] = str(attr_type) - if raw: + if use_raw: print(json.dumps(schema_dict)) else: console.print(f"\n[bold]Schema for namespace: {namespace}[/bold]\n") console.print(json.dumps(schema_dict, indent=2)) except Exception as e: - if raw: + if use_raw: print(json.dumps({"error": str(e)}), file=sys.stderr) else: console.print(f"[red]Error: {e}[/red]") @@ -404,33 +409,56 @@ def display_batch_summary(results: list[BatchApplyResult], dry_run: bool = False results: List of BatchApplyResult objects dry_run: Whether this was a dry run """ - table = Table(show_header=True, header_style="cyan") - table.add_column("Namespace") - table.add_column("Changes") - table.add_column("Status") + # Check if plain mode is active + ctx = click.get_current_context(silent=True) + plain = is_plain(ctx) if ctx else False + + headers = ["Namespace", "Changes", "Status"] + table_rows = [] for result in results: if result.conflicts > 0: - changes = f"+{result.additions} attributes [red]({result.conflicts} conflict(s))[/red]" - status = "[red]blocked[/red]" + if plain: + changes = f"+{result.additions} attributes ({result.conflicts} conflict(s))" + status = "blocked" + else: + changes = f"+{result.additions} attributes [red]({result.conflicts} conflict(s))[/red]" + status = "[red]blocked[/red]" elif result.error: - changes = "[dim]N/A[/dim]" - status = f"[red]error: {result.error}[/red]" + if plain: + changes = "N/A" + status = f"error: {result.error}" + else: + changes = "[dim]N/A[/dim]" + status = f"[red]error: {result.error}[/red]" elif result.additions == 0: - changes = "[dim]no changes[/dim]" - status = "[green]up-to-date[/green]" if not dry_run else "[dim]would skip[/dim]" + if plain: + changes = "no changes" + status = "up-to-date" if not dry_run else "would skip" + else: + changes = "[dim]no changes[/dim]" + status = "[green]up-to-date[/green]" if not dry_run else "[dim]would skip[/dim]" else: changes = f"+{result.additions} attribute(s)" if dry_run: - status = "[yellow]would apply[/yellow]" + status = "would apply" if plain else "[yellow]would apply[/yellow]" elif result.success: - status = "[green]applied[/green]" + status = "applied" if plain else "[green]applied[/green]" else: - status = "[red]failed[/red]" + status = "failed" if plain else "[red]failed[/red]" - table.add_row(f"[bold]{result.namespace}[/bold]", changes, status) + ns_display = result.namespace if plain else f"[bold]{result.namespace}[/bold]" + table_rows.append([ns_display, changes, status]) - console.print(table) + if plain: + print_table_plain(headers, table_rows) + else: + table = Table(show_header=True, header_style="cyan") + for h in headers: + table.add_column(h) + for r in table_rows: + table.add_row(*r) + console.print(table) def apply_schema_to_single_namespace( diff --git a/src/tpuff/commands/search.py b/src/tpuff/commands/search.py index aa6b0d8..9269443 100644 --- a/src/tpuff/commands/search.py +++ b/src/tpuff/commands/search.py @@ -12,6 +12,7 @@ from tpuff.client import get_namespace from tpuff.utils.debug import debug_log from tpuff.utils.embeddings import embedding_generator +from tpuff.utils.output import is_plain, print_table_plain, status_print console = Console() @@ -70,6 +71,7 @@ def search( region: str | None, ) -> None: """Search for documents in a namespace using vector similarity or full-text search.""" + plain = is_plain(ctx) use_fts = bool(fts_field) # Validate options @@ -80,10 +82,10 @@ def search( sys.exit(1) if use_fts and model_id: - console.print("[yellow]Warning: Both --fts and --model specified. Using FTS mode.[/yellow]") + status_print(ctx, "[yellow]Warning: Both --fts and --model specified. Using FTS mode.[/yellow]", console) - console.print(f"\n[bold]Searching in namespace: {namespace}[/bold]") - console.print(f'[dim]Query: "{query}"[/dim]') + status_print(ctx, f"\n[bold]Searching in namespace: {namespace}[/bold]", console) + status_print(ctx, f'[dim]Query: "{query}"[/dim]', console) try: ns = get_namespace(namespace, region) @@ -95,7 +97,7 @@ def search( if use_fts: # Full-text search mode - console.print(f'[dim]Mode: Full-text search (BM25) on field "{fts_field}"[/dim]\n') + status_print(ctx, f'[dim]Mode: Full-text search (BM25) on field "{fts_field}"[/dim]\n', console) query_params = { "rank_by": [fts_field, "BM25", query], @@ -105,8 +107,8 @@ def search( query_params["exclude_attributes"] = [vector_info["attributeName"]] else: # Vector search mode - console.print("[dim]Mode: Vector similarity search[/dim]") - console.print(f"[dim]Model: {model_id}[/dim]\n") + status_print(ctx, "[dim]Mode: Vector similarity search[/dim]", console) + status_print(ctx, f"[dim]Model: {model_id}[/dim]\n", console) # Generate embedding for query try: @@ -130,7 +132,7 @@ def search( # Re-raise other errors raise - console.print(f"[dim]Generated {len(embedding)}-dimensional embedding[/dim]\n") + status_print(ctx, f"[dim]Generated {len(embedding)}-dimensional embedding[/dim]\n", console) # Verify vector configuration if not vector_info: @@ -149,7 +151,7 @@ def search( ) sys.exit(1) - console.print(f"[dim]Using distance metric: {distance_metric}[/dim]\n") + status_print(ctx, f"[dim]Using distance metric: {distance_metric}[/dim]\n", console) query_params = { "rank_by": [vector_info["attributeName"], "ANN", embedding], @@ -195,15 +197,11 @@ def search( if hasattr(first_row, "model_dump"): debug_log("First Row Structure", {"keys": list(first_row.model_dump().keys())}) - console.print(f"[bold]Found {len(rows)} result(s):[/bold]\n") + status_print(ctx, f"[bold]Found {len(rows)} result(s):[/bold]\n", console) - # Create table for results - table = Table(show_header=True, header_style="cyan") - table.add_column("ID") - table.add_column("Contents") - table.add_column("Score" if use_fts else "Distance") - - # Add rows to table + # Collect row data + score_header = "Score" if use_fts else "Distance" + table_rows = [] for row in rows: # Get the row as a dict if hasattr(row, "model_dump"): @@ -218,12 +216,7 @@ def search( if use_fts: # Show only the FTS field field_value = row_dict.get(fts_field, "N/A") - display_contents = str(field_value) if field_value is not None else "[dim]N/A[/dim]" - - # Truncate if too long - max_length = 80 - if len(display_contents) > max_length: - display_contents = display_contents[:max_length] + "..." + display_contents = str(field_value) if field_value is not None else "N/A" else: # Vector search: show all attributes except system fields contents = {} @@ -232,25 +225,34 @@ def search( if key not in exclude_keys and not key.startswith("_"): contents[key] = value - # Stringify and truncate contents - contents_str = json.dumps(contents, default=str) - max_length = 80 - display_contents = ( - contents_str[:max_length] + "..." if len(contents_str) > max_length else contents_str - ) + display_contents = json.dumps(contents, default=str) # Get distance/score value dist_value = row_dict.get("$dist") or row_dict.get("dist") - score_display = f"{dist_value:.4f}" if dist_value is not None else "[dim]N/A[/dim]" + if plain: + score_display = f"{dist_value:.4f}" if dist_value is not None else "N/A" + else: + score_display = f"{dist_value:.4f}" if dist_value is not None else "[dim]N/A[/dim]" - table.add_row(str(row_id), display_contents, score_display) + table_rows.append([str(row_id), display_contents, score_display]) - console.print(table) + if plain: + print_table_plain(["ID", "Contents", score_header], table_rows) + else: + table = Table(show_header=True, header_style="cyan") + table.add_column("ID") + table.add_column("Contents") + table.add_column(score_header) + for r in table_rows: + table.add_row(*r) + console.print(table) # Show performance info if hasattr(result, "performance") and result.performance: - console.print( - f"\n[dim]Search completed in {query_time:.0f}ms (query execution: {result.performance.query_execution_ms:.2f}ms)[/dim]" + status_print( + ctx, + f"\n[dim]Search completed in {query_time:.0f}ms (query execution: {result.performance.query_execution_ms:.2f}ms)[/dim]", + console, ) except Exception as e: diff --git a/src/tpuff/utils/output.py b/src/tpuff/utils/output.py new file mode 100644 index 0000000..e1d04ec --- /dev/null +++ b/src/tpuff/utils/output.py @@ -0,0 +1,60 @@ +"""Output mode utilities for tpuff CLI. + +Supports two output modes: +- human: Rich tables with colors, emojis, decorative messages (default in TTY) +- plain: Pipe-delimited, data-only rows for agent/script consumption (default when piped) +""" + +import sys + +import click + + +def resolve_output_mode(explicit: str | None) -> str: + """Resolve the output mode from explicit flag or TTY auto-detection. + + Args: + explicit: The user-specified mode ("human" or "plain"), or None for auto. + + Returns: + "human" or "plain" + """ + if explicit: + return explicit + return "human" if sys.stdout.isatty() else "plain" + + +def is_plain(ctx: click.Context) -> bool: + """Check if the current output mode is plain. + + Args: + ctx: Click context with output_mode in obj dict. + + Returns: + True if output mode is "plain". + """ + return ctx.obj.get("output_mode") == "plain" + + +def print_table_plain(headers: list[str], rows: list[list[str]]) -> None: + """Print a pipe-delimited table to stdout. + + Args: + headers: Column header names. + rows: List of row data (each row is a list of strings). + """ + click.echo("|".join(headers)) + for row in rows: + click.echo("|".join(str(v) for v in row)) + + +def status_print(ctx: click.Context, message: str, console) -> None: + """Print a decorative/status message only in human mode. + + Args: + ctx: Click context with output_mode in obj dict. + message: Rich-formatted message string. + console: Rich Console instance. + """ + if not is_plain(ctx): + console.print(message)