markets-examples/cli.py at main · apistemic/markets-examples · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import io
import json
import logging
from enum import Enum

import pandas as pd
import typer

from apistemic.markets.api import create_markets_api_from_environment
from apistemic.markets.models import CompetitorItem

app = typer.Typer()


class OutputFormat(str, Enum):
    table = "table"
    json = "json"
    csv = "csv"
    parquet = "parquet"


class Endpoint(str, Enum):
    competitors = "competitors"
    lookalikes = "lookalikes"


@app.command()
def leadgen():
    pass


@app.command()
def fetch(
    endpoint: Endpoint = typer.Argument(..., help="Type of data to fetch"),
    slug: str = typer.Argument(
        ...,
        help=(
            "company identifier, can be company ID, Linkedin slug or domain name."
            " For example, to get Uber"
            " both `linkedin:uber-com` and `domain:uber.com` work"
        ),
    ),
    format: OutputFormat = typer.Option(OutputFormat.table, help="Output format"),
):
    """
    Fetch competitors or lookalikes for a given company.
    """
    # Step 1: Fetch data
    items = _fetch_items(slug, endpoint.value)

    # Step 2: Display data
    _display_items(items, format)


def _fetch_items(
    slug: str,
    endpoint: str,
) -> list[CompetitorItem]:
    """Fetch data from the API and apply limit if specified."""
    api = create_markets_api_from_environment()

    if endpoint == "competitors":
        items = api.get_competitors_with_original(slug)
    else:  # lookalikes
        items = api.get_lookalikes_with_original(slug)

    return items


def _display_items(items: list[CompetitorItem], format: OutputFormat):
    """Display the data in the requested format."""
    # early return for non-list formats
    if format == OutputFormat.json:
        typer.echo(json.dumps([item.model_dump() for item in items], indent=2))
        return

    # all other formats are list-based
    items_data = [item.model_dump() for item in items]
    df = pd.json_normalize(items_data)

    # Convert nullable int columns to Int64 dtype
    int_columns = [
        "organization.id",
        "organization.employee_count",
        "organization.founded_year",
    ]
    for col in int_columns:
        if col in df.columns:
            df[col] = df[col].astype("Int64")

    if format == OutputFormat.csv:
        typer.echo(df.to_csv(index=False), nl=False)
    elif format == OutputFormat.parquet:
        buffer = io.BytesIO()
        df.to_parquet(buffer)
        typer.echo(buffer.getvalue(), nl=False)
    elif format == OutputFormat.table:
        # Format the table with better display options
        typer.echo(
            df.to_string(
                index=False,
                max_colwidth=20,
                float_format=lambda x: f"{x:.3f}" if pd.notna(x) else "",
            )
        )
    else:
        formats = ", ".join(OutputFormat.__members__.keys())
        raise ValueError(
            f"Unsupported format: {format}. Supported formats are: {formats}"
        )


if __name__ == "__main__":
    logging.basicConfig(level=logging.INFO)
    app()