Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 62 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ Modern async-first Python SDK for [Bright Data](https://brightdata.com) APIs wit
### 🏗️ **Core Features**
- 🚀 **Async-first architecture** with sync wrappers for compatibility
- 🎨 **Dataclass Payloads** - Runtime validation, IDE autocomplete, helper methods
- 📝 **Markdown Output** - Export results as GitHub-flavored markdown tables
- 🌐 **Web scraping** via Web Unlocker proxy service
- 🔍 **SERP API** - Google, Bing, Yandex search results
- 📦 **Platform scrapers** - LinkedIn, Amazon, ChatGPT, Facebook, Instagram
Expand Down Expand Up @@ -460,10 +461,11 @@ asyncio.run(scrape_multiple())
## 🆕 What's New in v2 2.0.0

### 🆕 **Latest Updates (December 2025)**
- ✅ **Markdown Output Format** - NEW! Export results as GitHub-flavored markdown
- ✅ **Amazon Search API** - NEW parameter-based product discovery with correct dataset
- ✅ **LinkedIn Job Search Fixed** - Now builds URLs from keywords internally
- ✅ **Trigger Interface** - Manual trigger/poll/fetch control for all platforms
- ✅ **29 Sync Wrapper Fixes** - All sync methods work (scrapers + SERP API)
- ✅ **30 Sync Wrapper Fixes** - ALL sync methods work (scrapers + SERP + generic)
- ✅ **Batch Operations Fixed** - Returns List[ScrapeResult] correctly
- ✅ **Auto-Create Zones** - Now enabled by default (was opt-in)
- ✅ **Improved Zone Names** - `sdk_unlocker`, `sdk_serp`, `sdk_browser`
Expand Down Expand Up @@ -656,9 +658,11 @@ result.elapsed_ms() # Total time in milliseconds
result.get_timing_breakdown() # Detailed timing dict

# Serialization
result.to_dict() # Convert to dictionary
result.to_json(indent=2) # JSON string
result.save_to_file("result.json") # Save to file
result.to_dict() # Convert to dictionary
result.to_json(indent=2) # JSON string
result.to_markdown() # GitHub-flavored markdown (NEW!)
result.save_to_file("result.json") # Save as JSON
result.save_to_file("result.md", format="markdown") # Save as markdown (NEW!)
```

---
Expand Down Expand Up @@ -728,6 +732,9 @@ brightdata scrape amazon products "https://amazon.com/dp/B123" --output-format p

# Minimal format - Just the data, no metadata
brightdata scrape amazon products "https://amazon.com/dp/B123" --output-format minimal

# Markdown format - GitHub-flavored tables (NEW!)
brightdata scrape amazon products "https://amazon.com/dp/B123" --output-format markdown
```

#### Generic Scraper Response Format (`--response-format`)
Expand All @@ -749,6 +756,57 @@ brightdata scrape generic "https://example.com" \
--output-format pretty
```

#### Markdown Output Format (NEW!)

Export results as GitHub-flavored markdown tables - perfect for reports and documentation:

```bash
# CLI: Markdown output
brightdata search google "python tutorial" --output-format markdown

# Save to file
brightdata search google "python tutorial" \
--output-format markdown \
--output-file report.md
```

**SDK: Markdown methods**

```python
from brightdata import BrightDataClient

client = BrightDataClient()
result = client.search.google(query="python tutorial", num_results=5)

# Generate markdown
md = result.to_markdown()
print(md)

# Save as markdown
result.save_to_file("report.md", format="markdown")
```

**Example Output:**

```markdown
# Result: ✅ Success

## Metadata

| Field | Value |
|-------|-------|
| Cost | $0.0010 USD |
| Time | 1234.56ms |

## Data

| position | title | url |
|----------|-------|-----|
| 1 | The Python Tutorial | https://docs.python.org/3/tutorial/ |
| 2 | Python Tutorial - W3Schools | https://www.w3schools.com/python/ |
| 3 | Learn Python | https://www.learnpython.org/ |
```

---

## 🐼 Pandas Integration
Expand Down
4 changes: 2 additions & 2 deletions src/brightdata/cli/commands/scrape.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@
)
@click.option(
"--output-format",
type=click.Choice(["json", "pretty", "minimal"], case_sensitive=False),
type=click.Choice(["json", "pretty", "minimal", "markdown"], case_sensitive=False),
default="json",
help="Output format",
help="Output format (json, pretty, minimal, markdown)",
)
@click.option("--output-file", type=click.Path(), help="Save output to file")
@click.pass_context
Expand Down
4 changes: 2 additions & 2 deletions src/brightdata/cli/commands/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@
)
@click.option(
"--output-format",
type=click.Choice(["json", "pretty", "minimal"], case_sensitive=False),
type=click.Choice(["json", "pretty", "minimal", "markdown"], case_sensitive=False),
default="json",
help="Output format",
help="Output format (json, pretty, minimal, markdown)",
)
@click.option("--output-file", type=click.Path(), help="Save output to file")
@click.pass_context
Expand Down
45 changes: 26 additions & 19 deletions src/brightdata/cli/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,34 +67,41 @@ def create_client(api_key: Optional[str] = None, **kwargs) -> BrightDataClient:

def format_result(result: Any, output_format: str = "json") -> str:
"""
Format result for output.
Format result for output using formatter registry.

Args:
result: Result object (ScrapeResult, SearchResult, etc.)
output_format: Output format ("json", "pretty", "minimal")
output_format: Output format ("json", "pretty", "minimal", "markdown")

Returns:
Formatted string
"""
if output_format == "json":
if hasattr(result, "to_dict"):
data = result.to_dict()
elif hasattr(result, "__dict__"):
from dataclasses import asdict, is_dataclass

if is_dataclass(result):
data = asdict(result)
try:
from ..formatters import FormatterRegistry

formatter = FormatterRegistry.get_formatter(output_format)
return formatter.format(result)
except (ValueError, ImportError):
# Fallback to legacy formatting for backward compatibility
if output_format == "json":
if hasattr(result, "to_dict"):
data = result.to_dict()
elif hasattr(result, "__dict__"):
from dataclasses import asdict, is_dataclass

if is_dataclass(result):
data = asdict(result)
else:
data = result.__dict__
else:
data = result.__dict__
data = result
return json.dumps(data, indent=2, default=str)
elif output_format == "pretty":
return format_result_pretty(result)
elif output_format == "minimal":
return format_result_minimal(result)
else:
data = result
return json.dumps(data, indent=2, default=str)
elif output_format == "pretty":
return format_result_pretty(result)
elif output_format == "minimal":
return format_result_minimal(result)
else:
return str(result)
return str(result)


def format_result_pretty(result: Any) -> str:
Expand Down
24 changes: 24 additions & 0 deletions src/brightdata/formatters/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
"""Output formatters for results."""

from .registry import FormatterRegistry
from .base import BaseFormatter
from .json_formatter import JSONFormatter
from .pretty_formatter import PrettyFormatter
from .minimal_formatter import MinimalFormatter
from .markdown import MarkdownFormatter

# Auto-register formatters
FormatterRegistry.register("json", JSONFormatter)
FormatterRegistry.register("pretty", PrettyFormatter)
FormatterRegistry.register("minimal", MinimalFormatter)
FormatterRegistry.register("markdown", MarkdownFormatter)
FormatterRegistry.register("md", MarkdownFormatter) # Alias

__all__ = [
"FormatterRegistry",
"BaseFormatter",
"JSONFormatter",
"PrettyFormatter",
"MinimalFormatter",
"MarkdownFormatter",
]
36 changes: 36 additions & 0 deletions src/brightdata/formatters/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
"""Base formatter interface."""

from abc import ABC, abstractmethod
from typing import Any


class BaseFormatter(ABC):
"""
Base formatter interface using Strategy Pattern.

All formatters must implement this interface to ensure
consistent behavior across different output formats.
"""

@abstractmethod
def format(self, result: Any) -> str:
"""
Format result to string representation.

Args:
result: Result object (ScrapeResult, SearchResult, etc.)

Returns:
Formatted string representation
"""
pass

@abstractmethod
def get_extension(self) -> str:
"""
Get file extension for this format.

Returns:
File extension including dot (e.g., ".json", ".md")
"""
pass
35 changes: 35 additions & 0 deletions src/brightdata/formatters/json_formatter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
"""JSON output formatter."""

import json
from typing import Any
from dataclasses import asdict, is_dataclass
from .base import BaseFormatter


class JSONFormatter(BaseFormatter):
"""
Format results as JSON.

Provides clean, structured JSON output suitable for:
- API consumption
- Data processing
- Automation
"""

def format(self, result: Any) -> str:
"""Format result as JSON string."""
if hasattr(result, "to_dict"):
data = result.to_dict()
elif hasattr(result, "__dict__"):
if is_dataclass(result):
data = asdict(result)
else:
data = result.__dict__
else:
data = result

return json.dumps(data, indent=2, default=str)

def get_extension(self) -> str:
"""Get file extension."""
return ".json"
Loading