From 8a7cccc53594b475d31f719927b8b468c769a07b Mon Sep 17 00:00:00 2001 From: Vitor Zucher Date: Thu, 4 Dec 2025 12:38:43 -0300 Subject: [PATCH] Markdown Output formatter feature --- README.md | 66 ++++- src/brightdata/cli/commands/scrape.py | 4 +- src/brightdata/cli/commands/search.py | 4 +- src/brightdata/cli/utils.py | 45 ++-- src/brightdata/formatters/__init__.py | 24 ++ src/brightdata/formatters/base.py | 36 +++ src/brightdata/formatters/json_formatter.py | 35 +++ src/brightdata/formatters/markdown.py | 226 ++++++++++++++++++ .../formatters/minimal_formatter.py | 24 ++ src/brightdata/formatters/pretty_formatter.py | 43 ++++ src/brightdata/formatters/registry.py | 74 ++++++ src/brightdata/models.py | 57 ++++- tests/unit/test_markdown.py | 181 ++++++++++++++ 13 files changed, 784 insertions(+), 35 deletions(-) create mode 100644 src/brightdata/formatters/__init__.py create mode 100644 src/brightdata/formatters/base.py create mode 100644 src/brightdata/formatters/json_formatter.py create mode 100644 src/brightdata/formatters/markdown.py create mode 100644 src/brightdata/formatters/minimal_formatter.py create mode 100644 src/brightdata/formatters/pretty_formatter.py create mode 100644 src/brightdata/formatters/registry.py create mode 100644 tests/unit/test_markdown.py diff --git a/README.md b/README.md index 9d08843..5a8a547 100644 --- a/README.md +++ b/README.md @@ -62,6 +62,7 @@ Modern async-first Python SDK for [Bright Data](https://brightdata.com) APIs wit ### 🏗️ **Core Features** - 🚀 **Async-first architecture** with sync wrappers for compatibility - 🎨 **Dataclass Payloads** - Runtime validation, IDE autocomplete, helper methods +- 📝 **Markdown Output** - Export results as GitHub-flavored markdown tables - 🌐 **Web scraping** via Web Unlocker proxy service - 🔍 **SERP API** - Google, Bing, Yandex search results - 📦 **Platform scrapers** - LinkedIn, Amazon, ChatGPT, Facebook, Instagram @@ -460,10 +461,11 @@ asyncio.run(scrape_multiple()) ## 🆕 What's New in v2 2.0.0 ### 🆕 **Latest Updates (December 2025)** +- ✅ **Markdown Output Format** - NEW! Export results as GitHub-flavored markdown - ✅ **Amazon Search API** - NEW parameter-based product discovery with correct dataset - ✅ **LinkedIn Job Search Fixed** - Now builds URLs from keywords internally - ✅ **Trigger Interface** - Manual trigger/poll/fetch control for all platforms -- ✅ **29 Sync Wrapper Fixes** - All sync methods work (scrapers + SERP API) +- ✅ **30 Sync Wrapper Fixes** - ALL sync methods work (scrapers + SERP + generic) - ✅ **Batch Operations Fixed** - Returns List[ScrapeResult] correctly - ✅ **Auto-Create Zones** - Now enabled by default (was opt-in) - ✅ **Improved Zone Names** - `sdk_unlocker`, `sdk_serp`, `sdk_browser` @@ -656,9 +658,11 @@ result.elapsed_ms() # Total time in milliseconds result.get_timing_breakdown() # Detailed timing dict # Serialization -result.to_dict() # Convert to dictionary -result.to_json(indent=2) # JSON string -result.save_to_file("result.json") # Save to file +result.to_dict() # Convert to dictionary +result.to_json(indent=2) # JSON string +result.to_markdown() # GitHub-flavored markdown (NEW!) +result.save_to_file("result.json") # Save as JSON +result.save_to_file("result.md", format="markdown") # Save as markdown (NEW!) ``` --- @@ -728,6 +732,9 @@ brightdata scrape amazon products "https://amazon.com/dp/B123" --output-format p # Minimal format - Just the data, no metadata brightdata scrape amazon products "https://amazon.com/dp/B123" --output-format minimal + +# Markdown format - GitHub-flavored tables (NEW!) +brightdata scrape amazon products "https://amazon.com/dp/B123" --output-format markdown ``` #### Generic Scraper Response Format (`--response-format`) @@ -749,6 +756,57 @@ brightdata scrape generic "https://example.com" \ --output-format pretty ``` +#### Markdown Output Format (NEW!) + +Export results as GitHub-flavored markdown tables - perfect for reports and documentation: + +```bash +# CLI: Markdown output +brightdata search google "python tutorial" --output-format markdown + +# Save to file +brightdata search google "python tutorial" \ + --output-format markdown \ + --output-file report.md +``` + +**SDK: Markdown methods** + +```python +from brightdata import BrightDataClient + +client = BrightDataClient() +result = client.search.google(query="python tutorial", num_results=5) + +# Generate markdown +md = result.to_markdown() +print(md) + +# Save as markdown +result.save_to_file("report.md", format="markdown") +``` + +**Example Output:** + +```markdown +# Result: ✅ Success + +## Metadata + +| Field | Value | +|-------|-------| +| Cost | $0.0010 USD | +| Time | 1234.56ms | + +## Data + +| position | title | url | +|----------|-------|-----| +| 1 | The Python Tutorial | https://docs.python.org/3/tutorial/ | +| 2 | Python Tutorial - W3Schools | https://www.w3schools.com/python/ | +| 3 | Learn Python | https://www.learnpython.org/ | +``` + --- ## 🐼 Pandas Integration diff --git a/src/brightdata/cli/commands/scrape.py b/src/brightdata/cli/commands/scrape.py index 0cab2f8..3439aeb 100644 --- a/src/brightdata/cli/commands/scrape.py +++ b/src/brightdata/cli/commands/scrape.py @@ -16,9 +16,9 @@ ) @click.option( "--output-format", - type=click.Choice(["json", "pretty", "minimal"], case_sensitive=False), + type=click.Choice(["json", "pretty", "minimal", "markdown"], case_sensitive=False), default="json", - help="Output format", + help="Output format (json, pretty, minimal, markdown)", ) @click.option("--output-file", type=click.Path(), help="Save output to file") @click.pass_context diff --git a/src/brightdata/cli/commands/search.py b/src/brightdata/cli/commands/search.py index 47666bf..7e9b22a 100644 --- a/src/brightdata/cli/commands/search.py +++ b/src/brightdata/cli/commands/search.py @@ -16,9 +16,9 @@ ) @click.option( "--output-format", - type=click.Choice(["json", "pretty", "minimal"], case_sensitive=False), + type=click.Choice(["json", "pretty", "minimal", "markdown"], case_sensitive=False), default="json", - help="Output format", + help="Output format (json, pretty, minimal, markdown)", ) @click.option("--output-file", type=click.Path(), help="Save output to file") @click.pass_context diff --git a/src/brightdata/cli/utils.py b/src/brightdata/cli/utils.py index f167adc..1432896 100644 --- a/src/brightdata/cli/utils.py +++ b/src/brightdata/cli/utils.py @@ -67,34 +67,41 @@ def create_client(api_key: Optional[str] = None, **kwargs) -> BrightDataClient: def format_result(result: Any, output_format: str = "json") -> str: """ - Format result for output. + Format result for output using formatter registry. Args: result: Result object (ScrapeResult, SearchResult, etc.) - output_format: Output format ("json", "pretty", "minimal") + output_format: Output format ("json", "pretty", "minimal", "markdown") Returns: Formatted string """ - if output_format == "json": - if hasattr(result, "to_dict"): - data = result.to_dict() - elif hasattr(result, "__dict__"): - from dataclasses import asdict, is_dataclass - - if is_dataclass(result): - data = asdict(result) + try: + from ..formatters import FormatterRegistry + + formatter = FormatterRegistry.get_formatter(output_format) + return formatter.format(result) + except (ValueError, ImportError): + # Fallback to legacy formatting for backward compatibility + if output_format == "json": + if hasattr(result, "to_dict"): + data = result.to_dict() + elif hasattr(result, "__dict__"): + from dataclasses import asdict, is_dataclass + + if is_dataclass(result): + data = asdict(result) + else: + data = result.__dict__ else: - data = result.__dict__ + data = result + return json.dumps(data, indent=2, default=str) + elif output_format == "pretty": + return format_result_pretty(result) + elif output_format == "minimal": + return format_result_minimal(result) else: - data = result - return json.dumps(data, indent=2, default=str) - elif output_format == "pretty": - return format_result_pretty(result) - elif output_format == "minimal": - return format_result_minimal(result) - else: - return str(result) + return str(result) def format_result_pretty(result: Any) -> str: diff --git a/src/brightdata/formatters/__init__.py b/src/brightdata/formatters/__init__.py new file mode 100644 index 0000000..c6524df --- /dev/null +++ b/src/brightdata/formatters/__init__.py @@ -0,0 +1,24 @@ +"""Output formatters for results.""" + +from .registry import FormatterRegistry +from .base import BaseFormatter +from .json_formatter import JSONFormatter +from .pretty_formatter import PrettyFormatter +from .minimal_formatter import MinimalFormatter +from .markdown import MarkdownFormatter + +# Auto-register formatters +FormatterRegistry.register("json", JSONFormatter) +FormatterRegistry.register("pretty", PrettyFormatter) +FormatterRegistry.register("minimal", MinimalFormatter) +FormatterRegistry.register("markdown", MarkdownFormatter) +FormatterRegistry.register("md", MarkdownFormatter) # Alias + +__all__ = [ + "FormatterRegistry", + "BaseFormatter", + "JSONFormatter", + "PrettyFormatter", + "MinimalFormatter", + "MarkdownFormatter", +] diff --git a/src/brightdata/formatters/base.py b/src/brightdata/formatters/base.py new file mode 100644 index 0000000..59a20df --- /dev/null +++ b/src/brightdata/formatters/base.py @@ -0,0 +1,36 @@ +"""Base formatter interface.""" + +from abc import ABC, abstractmethod +from typing import Any + + +class BaseFormatter(ABC): + """ + Base formatter interface using Strategy Pattern. + + All formatters must implement this interface to ensure + consistent behavior across different output formats. + """ + + @abstractmethod + def format(self, result: Any) -> str: + """ + Format result to string representation. + + Args: + result: Result object (ScrapeResult, SearchResult, etc.) + + Returns: + Formatted string representation + """ + pass + + @abstractmethod + def get_extension(self) -> str: + """ + Get file extension for this format. + + Returns: + File extension including dot (e.g., ".json", ".md") + """ + pass diff --git a/src/brightdata/formatters/json_formatter.py b/src/brightdata/formatters/json_formatter.py new file mode 100644 index 0000000..3b6fea5 --- /dev/null +++ b/src/brightdata/formatters/json_formatter.py @@ -0,0 +1,35 @@ +"""JSON output formatter.""" + +import json +from typing import Any +from dataclasses import asdict, is_dataclass +from .base import BaseFormatter + + +class JSONFormatter(BaseFormatter): + """ + Format results as JSON. + + Provides clean, structured JSON output suitable for: + - API consumption + - Data processing + - Automation + """ + + def format(self, result: Any) -> str: + """Format result as JSON string.""" + if hasattr(result, "to_dict"): + data = result.to_dict() + elif hasattr(result, "__dict__"): + if is_dataclass(result): + data = asdict(result) + else: + data = result.__dict__ + else: + data = result + + return json.dumps(data, indent=2, default=str) + + def get_extension(self) -> str: + """Get file extension.""" + return ".json" diff --git a/src/brightdata/formatters/markdown.py b/src/brightdata/formatters/markdown.py new file mode 100644 index 0000000..9e8375f --- /dev/null +++ b/src/brightdata/formatters/markdown.py @@ -0,0 +1,226 @@ +"""Markdown output formatter.""" + +import json +from typing import Any, Dict, List +from .base import BaseFormatter + + +class MarkdownFormatter(BaseFormatter): + """ + Format results as GitHub-flavored Markdown. + + Features: + - Tables for structured data + - Code blocks for complex data + - Status badges (✅/❌) + - Metadata section with timing/cost + - Smart column limiting for readability + """ + + def format(self, result: Any) -> str: + """ + Format result as markdown. + + Args: + result: Result object to format + + Returns: + GitHub-flavored markdown string + """ + lines = [] + + # Header with status badge + if hasattr(result, "success"): + status_badge = "✅ Success" if result.success else "❌ Failed" + lines.append(f"# Result: {status_badge}") + lines.append("") + else: + lines.append("# Result") + lines.append("") + + # Metadata table + metadata_lines = self._format_metadata(result) + if metadata_lines: + lines.extend(metadata_lines) + lines.append("") + + # Data section + if hasattr(result, "data") and result.data is not None: + lines.append("## Data") + lines.append("") + + if isinstance(result.data, list): + lines.append(self._format_list_as_table(result.data)) + elif isinstance(result.data, dict): + lines.append(self._format_dict_as_table(result.data)) + elif isinstance(result.data, str): + lines.append("```") + lines.append(result.data[:1000]) # Limit to 1000 chars + if len(result.data) > 1000: + lines.append(f"... ({len(result.data) - 1000} more characters)") + lines.append("```") + else: + lines.append(f"```json\n{json.dumps(result.data, indent=2)}\n```") + + lines.append("") + + # Error section + if hasattr(result, "error") and result.error: + lines.append("## Error") + lines.append("") + lines.append(f"> ⚠️ {result.error}") + lines.append("") + + return "\n".join(lines) + + def _format_metadata(self, result: Any) -> List[str]: + """Format metadata as markdown table.""" + lines = [] + metadata = [] + + # Collect metadata fields + if hasattr(result, "platform") and result.platform: + metadata.append(("Platform", f"`{result.platform}`")) + + if hasattr(result, "method") and result.method: + metadata.append(("Method", f"`{result.method}`")) + + if hasattr(result, "cost") and result.cost is not None: + metadata.append(("Cost", f"${result.cost:.4f} USD")) + + if hasattr(result, "elapsed_ms"): + try: + elapsed = result.elapsed_ms() + metadata.append(("Time", f"{elapsed:.2f}ms")) + except Exception: + pass + + if hasattr(result, "snapshot_id") and result.snapshot_id: + metadata.append(("Snapshot ID", f"`{result.snapshot_id}`")) + + if hasattr(result, "url") and result.url: + url_display = result.url[:60] + "..." if len(result.url) > 60 else result.url + metadata.append(("URL", url_display)) + + # Build table if we have metadata + if metadata: + lines.append("## Metadata") + lines.append("") + lines.append("| Field | Value |") + lines.append("|-------|-------|") + for key, value in metadata: + lines.append(f"| {key} | {value} |") + + return lines + + def _format_list_as_table(self, data: List) -> str: + """Format list as markdown table.""" + if not data: + return "_No data_" + + # Handle list of dicts (most common case) + if isinstance(data[0], dict): + return self._format_list_of_dicts_as_table(data) + + # Handle simple list + lines = [] + lines.append("| Index | Value |") + lines.append("|-------|-------|") + + for i, item in enumerate(data[:20]): # Limit to 20 items + value = str(item)[:100] # Limit value length + lines.append(f"| {i} | {value} |") + + if len(data) > 20: + lines.append("") + lines.append(f"_... and {len(data) - 20} more items_") + + return "\n".join(lines) + + def _format_list_of_dicts_as_table(self, data: List[Dict]) -> str: + """Format list of dictionaries as markdown table.""" + if not data: + return "_No data_" + + # Get all unique keys from first 10 items + keys = set() + for item in data[:10]: + if isinstance(item, dict): + keys.update(item.keys()) + + # Select most important columns (limit to 5 for readability) + priority_keys = ["name", "title", "url", "price", "final_price", "rating"] + selected_keys = [] + + # Add priority keys first + for key in priority_keys: + if key in keys: + selected_keys.append(key) + if len(selected_keys) >= 5: + break + + # Fill remaining slots with other keys + if len(selected_keys) < 5: + for key in sorted(keys): + if key not in selected_keys: + selected_keys.append(key) + if len(selected_keys) >= 5: + break + + if not selected_keys: + # Fallback to JSON if no keys + return f"```json\n{json.dumps(data[:10], indent=2)}\n```" + + # Build table + lines = [] + lines.append("| " + " | ".join(selected_keys) + " |") + lines.append("| " + " | ".join(["---"] * len(selected_keys)) + " |") + + for item in data[:10]: # Limit to 10 rows for readability + if isinstance(item, dict): + values = [] + for key in selected_keys: + value = item.get(key, "") + # Truncate long values + value_str = str(value)[:50] + if len(str(value)) > 50: + value_str += "..." + values.append(value_str) + lines.append("| " + " | ".join(values) + " |") + + if len(data) > 10: + lines.append("") + lines.append(f"_... and {len(data) - 10} more items_") + + return "\n".join(lines) + + def _format_dict_as_table(self, data: Dict) -> str: + """Format dictionary as markdown table.""" + if not data: + return "_No data_" + + lines = [] + lines.append("| Key | Value |") + lines.append("|-----|-------|") + + # Show top 20 fields + for key, value in list(data.items())[:20]: + # Truncate long values + value_str = str(value)[:100] + if len(str(value)) > 100: + value_str += "..." + + # Escape pipes in values + value_str = value_str.replace("|", "\\|") + + lines.append(f"| `{key}` | {value_str} |") + + if len(data) > 20: + lines.append("") + lines.append(f"_... and {len(data) - 20} more fields_") + + return "\n".join(lines) + + def get_extension(self) -> str: + """Get file extension.""" + return ".md" diff --git a/src/brightdata/formatters/minimal_formatter.py b/src/brightdata/formatters/minimal_formatter.py new file mode 100644 index 0000000..ef9b426 --- /dev/null +++ b/src/brightdata/formatters/minimal_formatter.py @@ -0,0 +1,24 @@ +"""Minimal output formatter.""" + +import json +from typing import Any +from .base import BaseFormatter + + +class MinimalFormatter(BaseFormatter): + """ + Format results in minimal format (just the data). + + Provides clean data output without metadata, + ideal for piping to other commands or data processing. + """ + + def format(self, result: Any) -> str: + """Format result in minimal format (data only).""" + if hasattr(result, "data"): + return json.dumps(result.data, indent=2, default=str) + return json.dumps(result, indent=2, default=str) + + def get_extension(self) -> str: + """Get file extension.""" + return ".json" diff --git a/src/brightdata/formatters/pretty_formatter.py b/src/brightdata/formatters/pretty_formatter.py new file mode 100644 index 0000000..111999d --- /dev/null +++ b/src/brightdata/formatters/pretty_formatter.py @@ -0,0 +1,43 @@ +"""Pretty output formatter.""" + +import json +from typing import Any +from .base import BaseFormatter + + +class PrettyFormatter(BaseFormatter): + """ + Format results in human-readable pretty format. + + Provides formatted output with: + - Success/failure indicators + - Cost and timing information + - Readable data display + """ + + def format(self, result: Any) -> str: + """Format result in pretty, human-readable way.""" + lines = [] + + if hasattr(result, "success"): + status = "✓ Success" if result.success else "✗ Failed" + lines.append(f"Status: {status}") + + if hasattr(result, "cost") and result.cost: + lines.append(f"Cost: ${result.cost:.4f} USD") + + if hasattr(result, "elapsed_ms"): + elapsed = result.elapsed_ms() + lines.append(f"Elapsed: {elapsed:.2f}ms") + + if hasattr(result, "data") and result.data: + lines.append("\nData:") + lines.append(json.dumps(result.data, indent=2)) + else: + lines.append(json.dumps(result, indent=2)) + + return "\n".join(lines) + + def get_extension(self) -> str: + """Get file extension.""" + return ".txt" diff --git a/src/brightdata/formatters/registry.py b/src/brightdata/formatters/registry.py new file mode 100644 index 0000000..0471edb --- /dev/null +++ b/src/brightdata/formatters/registry.py @@ -0,0 +1,74 @@ +"""Formatter registry for managing output formatters.""" + +from typing import Dict, Type, List +from .base import BaseFormatter + + +class FormatterRegistry: + """ + Registry for output formatters using Registry Pattern. + + Provides centralized formatter management and makes it easy + to add new output formats without modifying existing code. + + Example: + >>> FormatterRegistry.register("csv", CSVFormatter) + >>> formatter = FormatterRegistry.get_formatter("csv") + >>> output = formatter.format(result) + """ + + _formatters: Dict[str, Type[BaseFormatter]] = {} + + @classmethod + def register(cls, name: str, formatter: Type[BaseFormatter]) -> None: + """ + Register a formatter. + + Args: + name: Format name (e.g., "json", "markdown") + formatter: Formatter class implementing BaseFormatter + """ + cls._formatters[name.lower()] = formatter + + @classmethod + def get_formatter(cls, name: str) -> BaseFormatter: + """ + Get formatter instance by name. + + Args: + name: Format name (case-insensitive) + + Returns: + Formatter instance + + Raises: + ValueError: If format name not registered + """ + name_lower = name.lower() + if name_lower not in cls._formatters: + available = ", ".join(cls.list_formats()) + raise ValueError(f"Unknown format: '{name}'. Available formats: {available}") + return cls._formatters[name_lower]() + + @classmethod + def list_formats(cls) -> List[str]: + """ + List all registered format names. + + Returns: + List of format names + """ + return sorted(list(cls._formatters.keys())) + + @classmethod + def is_registered(cls, name: str) -> bool: + """ + Check if a format is registered. + + Args: + name: Format name to check + + Returns: + True if registered, False otherwise + """ + return name.lower() in cls._formatters diff --git a/src/brightdata/models.py b/src/brightdata/models.py index 2fd1233..a070ebd 100644 --- a/src/brightdata/models.py +++ b/src/brightdata/models.py @@ -101,31 +101,72 @@ def to_json(self, indent: Optional[int] = None) -> str: """ return json.dumps(self.to_dict(), indent=indent, default=str) + def to_markdown(self) -> str: + """ + Convert result to GitHub-flavored Markdown. + + Returns: + Markdown-formatted string with tables, badges, and metadata. + + Example: + >>> result = client.search.google(query="python") + >>> md = result.to_markdown() + >>> print(md) + # Result: ✅ Success + + ## Metadata + | Field | Value | + |-------|-------| + | Cost | $0.0010 USD | + | Time | 1234.56ms | + + ## Data + | position | title | url | + |----------|-------|-----| + | 1 | Python Tutorial | https://... | + """ + from .formatters import FormatterRegistry + + formatter = FormatterRegistry.get_formatter("markdown") + return formatter.format(self) + def save_to_file(self, filepath: Union[str, Path], format: str = "json") -> None: """ Save result data to file. Args: filepath: Path where to save the file. Must be a valid file path. - format: File format. Currently only "json" is supported. + format: File format - "json", "markdown", "md", "pretty", "minimal". Raises: ValueError: If format is not supported. OSError: If file cannot be written (permissions, disk full, etc.). IOError: If file I/O operation fails. + + Example: + >>> result.save_to_file("output.md", format="markdown") + >>> result.save_to_file("data.json", format="json") """ + from .formatters import FormatterRegistry + path = Path(filepath).resolve() if not path.parent.exists(): raise OSError(f"Parent directory does not exist: {path.parent}") - if format.lower() == "json": - try: - path.write_text(self.to_json(indent=2), encoding="utf-8") - except OSError as e: - raise OSError(f"Failed to write file {path}: {e}") from e - else: - raise ValueError(f"Unsupported format: {format}. Use 'json'.") + try: + # Use formatter registry for all formats + formatter = FormatterRegistry.get_formatter(format) + content = formatter.format(self) + path.write_text(content, encoding="utf-8") + except ValueError as e: + # Unknown format + raise ValueError( + f"Unsupported format: {format}. " + f"Supported formats: {', '.join(FormatterRegistry.list_formats())}" + ) from e + except OSError as e: + raise OSError(f"Failed to write file {path}: {e}") from e def __repr__(self) -> str: """String representation for debugging.""" diff --git a/tests/unit/test_markdown.py b/tests/unit/test_markdown.py new file mode 100644 index 0000000..4433225 --- /dev/null +++ b/tests/unit/test_markdown.py @@ -0,0 +1,181 @@ +"""Tests for markdown output formatter.""" + +import pytest +from brightdata.models import ScrapeResult, SearchResult +from brightdata.formatters import MarkdownFormatter, FormatterRegistry + + +class TestMarkdownFormatter: + """Test markdown formatting functionality.""" + + def test_formatter_registered(self): + """Test markdown formatter is registered.""" + assert FormatterRegistry.is_registered("markdown") + assert FormatterRegistry.is_registered("md") # Alias + + def test_format_success_result(self): + """Test markdown formatting for successful result.""" + result = ScrapeResult( + success=True, + url="https://example.com", + data={"title": "Test", "price": 99.99}, + cost=0.001, + platform="test", + ) + + md = result.to_markdown() + + assert "# Result: ✅ Success" in md + assert "## Metadata" in md + assert "## Data" in md + assert "| Field | Value |" in md + assert "$0.0010 USD" in md + + def test_format_failed_result(self): + """Test markdown formatting for failed result.""" + result = ScrapeResult(success=False, url="", error="API Error: 404 Not Found", data=None) + + md = result.to_markdown() + + assert "# Result: ❌ Failed" in md + assert "## Error" in md + assert "⚠️ API Error: 404 Not Found" in md + + def test_format_list_data_as_table(self): + """Test list data is formatted as markdown table.""" + result = ScrapeResult( + success=True, + url="", + data=[ + {"name": "Product 1", "price": 10.99, "rating": 4.5}, + {"name": "Product 2", "price": 20.99, "rating": 4.8}, + {"name": "Product 3", "price": 15.99, "rating": 4.2}, + ], + ) + + md = result.to_markdown() + + # Should have markdown table + assert "|" in md + assert "| --- |" in md or "|---|" in md + assert "Product 1" in md + assert "Product 2" in md + + def test_format_dict_data_as_table(self): + """Test dict data is formatted as markdown table.""" + result = ScrapeResult( + success=True, + url="", + data={ + "title": "Test Product", + "price": 99.99, + "rating": 4.5, + "reviews": 100, + }, + ) + + md = result.to_markdown() + + assert "| Key | Value |" in md + assert "`title`" in md + assert "Test Product" in md + + def test_save_to_file_markdown(self, tmp_path): + """Test saving result as markdown file.""" + result = ScrapeResult(success=True, url="", data={"test": "data"}, cost=0.001) + + output_file = tmp_path / "result.md" + result.save_to_file(output_file, format="markdown") + + assert output_file.exists() + + content = output_file.read_text() + assert "# Result:" in content + assert "## Metadata" in content + + def test_markdown_handles_large_data(self): + """Test markdown formatter limits data for readability.""" + # Create 50 items + large_data = [{"id": i, "name": f"Item {i}"} for i in range(50)] + + result = ScrapeResult(success=True, url="", data=large_data) + + md = result.to_markdown() + + # Should limit to 10 rows + assert "Item 0" in md + assert "Item 9" in md or "Item 10" in md + assert "... and" in md # Indicates truncation + + def test_markdown_metadata_fields(self): + """Test markdown includes all metadata fields.""" + result = ScrapeResult( + success=True, + url="https://example.com", + data=[], + cost=0.005, + platform="amazon", + method="web_scraper", + snapshot_id="s_abc123", + ) + + md = result.to_markdown() + + assert "Platform" in md + assert "amazon" in md + assert "Method" in md + assert "web_scraper" in md + assert "Snapshot ID" in md + assert "s_abc123" in md + assert "Cost" in md + assert "$0.0050 USD" in md + + def test_search_result_markdown(self): + """Test Search results format correctly.""" + result = SearchResult( + success=True, + query="python tutorial", + data=[{"position": 1, "title": "Python Docs", "url": "https://python.org"}], + ) + + md = result.to_markdown() + + assert "# Result: ✅ Success" in md + assert "Python Docs" in md + + +class TestFormatterRegistry: + """Test formatter registry functionality.""" + + def test_list_all_formats(self): + """Test listing all available formats.""" + formats = FormatterRegistry.list_formats() + + assert "json" in formats + assert "pretty" in formats + assert "minimal" in formats + assert "markdown" in formats + assert "md" in formats + + def test_get_formatter_by_name(self): + """Test getting formatter by name.""" + formatter = FormatterRegistry.get_formatter("markdown") + assert isinstance(formatter, MarkdownFormatter) + + def test_unknown_format_raises_error(self): + """Test unknown format raises helpful error.""" + with pytest.raises(ValueError) as exc_info: + FormatterRegistry.get_formatter("unknown") + + assert "Unknown format" in str(exc_info.value) + assert "Available formats:" in str(exc_info.value) + + def test_format_names_case_insensitive(self): + """Test format names are case-insensitive.""" + formatter1 = FormatterRegistry.get_formatter("MARKDOWN") + formatter2 = FormatterRegistry.get_formatter("markdown") + formatter3 = FormatterRegistry.get_formatter("MarkDown") + + assert isinstance(formatter1, MarkdownFormatter) + assert isinstance(formatter2, MarkdownFormatter) + assert isinstance(formatter3, MarkdownFormatter)