diff --git a/docs/configuration/datasources.md b/docs/configuration/datasources.md index b0d59a5..6308b48 100644 --- a/docs/configuration/datasources.md +++ b/docs/configuration/datasources.md @@ -42,30 +42,36 @@ password: ${DB_PASSWORD} SLayer uses [sqlglot](https://github.com/tobymao/sqlglot) for dialect-aware SQL generation. Databases are supported at two tiers: -### First-class support +### Database Drivers + +#### First-class support These databases are verified by integration tests and runnable Docker examples. Regressions are caught in CI. -| Type | Install Extra | Connection Driver | Example | -|------|---------------|-------------------|---------| -| `postgres` / `postgresql` | `pip install motley-slayer[postgres]` | `postgresql://` | `postgresql://user:pass@localhost:5432/db` | -| `mysql` / `mariadb` | `pip install motley-slayer[mysql]` | `mysql+pymysql://` | `mysql+pymysql://user:pass@localhost:3306/db` | -| `clickhouse` | `pip install motley-slayer[clickhouse]` | `clickhouse+http://` | `clickhouse+http://user:pass@localhost:8123/db` | -| `sqlite` | (built-in) | `sqlite:///` | `sqlite:///path/to/db.sqlite` | -| `duckdb` | `pip install motley-slayer[duckdb]` | `duckdb:///` | `duckdb:///path/to/db.duckdb` | - -### Additional support - -These databases have SQL generation covered by unit tests, but are not verified against live instances yet. - -| Type | Notes | -|------|-------| -| `snowflake` | Analytical/cloud warehouse; no foreign keys (like ClickHouse), so auto-ingestion won't discover joins | -| `bigquery` | Analytical/cloud warehouse; no foreign keys, same caveat as Snowflake | -| `redshift` | Postgres-based cloud warehouse; FKs are informational only (not enforced) | -| `trino` / `presto` / `athena` | Federated query engines; no FKs, schema depends on the underlying connector | -| `databricks` / `spark` | Spark SQL-based; no FKs | -| `oracle` / `mssql` / `sqlserver` / `tsql` | Broadly compatible with Postgres feature set | +| Type | Install Extra | Connection String | +|------|---------------|-------------------| +| `sqlite` | (built-in, no extra needed) | `sqlite:///path/to/db.sqlite` | +| `postgres` / `postgresql` | `motley-slayer[postgres]` | `postgresql://user:pass@localhost:5432/db` | +| `mysql` / `mariadb` | `motley-slayer[mysql]` | `mysql+pymysql://user:pass@localhost:3306/db` | +| `clickhouse` | `motley-slayer[clickhouse]` | `clickhouse+http://user:pass@localhost:8123/db` | +| `duckdb` | `motley-slayer[duckdb]` | `duckdb:///path/to/db.duckdb` | + +#### Additional support + +SQL generation is covered by unit tests, but not verified against live instances. Install the appropriate SQLAlchemy driver manually. + +| Type | SQLAlchemy Driver | Install | +|------|-------------------|---------| +| `snowflake` | `snowflake-sqlalchemy` | `pip install snowflake-sqlalchemy` | +| `bigquery` | `sqlalchemy-bigquery` | `pip install sqlalchemy-bigquery` | +| `redshift` | `sqlalchemy-redshift` + `redshift_connector` | `pip install sqlalchemy-redshift redshift-connector` | +| `trino` / `presto` / `athena` | `trino` or `PyAthena` | `pip install trino` or `pip install PyAthena` | +| `databricks` / `spark` | `databricks-sql-connector` | `pip install databricks-sql-connector` | +| `oracle` | `oracledb` | `pip install oracledb` | +| `mssql` / `sqlserver` / `tsql` | `pyodbc` or `pymssql` | `pip install pyodbc` or `pip install pymssql` | + +!!! note + Snowflake, BigQuery, ClickHouse, and similar analytical warehouses typically don't have foreign keys, so auto-ingestion won't discover joins. Define joins manually in your model YAML. !!! tip If your database isn't listed but is supported by sqlglot, it may already work — SLayer falls back to Postgres-style SQL by default. Try it and [open an issue](https://github.com/MotleyAI/slayer/issues) if you hit a problem. diff --git a/docs/configuration/storage.md b/docs/configuration/storage.md index d968fce..a89ffc2 100644 --- a/docs/configuration/storage.md +++ b/docs/configuration/storage.md @@ -35,6 +35,26 @@ from slayer.storage.sqlite_storage import SQLiteStorage storage = SQLiteStorage(db_path="./slayer.db") ``` +## Storage Resolution + +The `resolve_storage()` factory creates a backend from a path or URI: + +```python +from slayer.storage.base import resolve_storage + +storage = resolve_storage("./slayer_data") # YAMLStorage (directory) +storage = resolve_storage("slayer.db") # SQLiteStorage (.db extension) +storage = resolve_storage("sqlite:///slayer.db") # SQLiteStorage (explicit scheme) +storage = resolve_storage("yaml://./data") # YAMLStorage (explicit scheme) +``` + +The CLI uses this via the `--storage` flag: + +```bash +slayer serve --storage ./slayer_data # YAML +slayer serve --storage slayer.db # SQLite +``` + ## Custom Backends Both backends implement the `StorageBackend` protocol. You can write your own: @@ -55,6 +75,19 @@ class MyCustomStorage(StorageBackend): def delete_datasource(self, name: str) -> bool: ... ``` +Register it for URI-based resolution: + +```python +from slayer.storage.base import register_storage, resolve_storage +from my_package import RedisStorage + +register_storage("redis", lambda path: RedisStorage(url=f"redis://{path}")) + +# Now works everywhere: +storage = resolve_storage("redis://localhost:6379/0") +# slayer serve --storage redis://localhost:6379/0 +``` + Pass any backend to the server, MCP, or client: ```python diff --git a/docs/getting-started/cli.md b/docs/getting-started/cli.md new file mode 100644 index 0000000..f3094d3 --- /dev/null +++ b/docs/getting-started/cli.md @@ -0,0 +1,128 @@ +# CLI Setup — Terminal Users + +Query your database from the command line. No Python code needed — just install and go. + +## Install + +```bash +uv tool install motley-slayer +``` + +For databases other than SQLite, add the driver extra (see [full list](../configuration/datasources.md#database-drivers)): + +```bash +uv tool install 'motley-slayer[postgres]' +``` + +## Connect a database + +Create a datasource — either from a YAML file or inline: + +```bash +# Inline (quick setup — use ${ENV_VAR} for secrets) +slayer datasources create-inline my_pg \ + --type postgres \ + --host localhost \ + --database myapp \ + --username analyst \ + --password-stdin + +# Or from a YAML file +slayer datasources create datasource.yaml +``` + +YAML file format: + +```yaml +# datasource.yaml +name: my_pg +type: postgres +host: localhost +port: 5432 +database: myapp +username: analyst +password: ${DB_PASSWORD} +``` + +Test the connection: + +```bash +slayer datasources test my_pg +# OK — connected to 'my_pg' (postgres). +``` + +## Ingest models + +Auto-generate models from your database schema: + +```bash +slayer ingest --datasource my_pg +# Ingested: orders (6 dims, 12 measures) +# Ingested: customers (4 dims, 5 measures) +# Ingested: regions (3 dims, 2 measures) +``` + +Optionally filter tables: + +```bash +slayer ingest --datasource my_pg --schema public --include orders,customers +slayer ingest --datasource my_pg --exclude migrations,django_session +``` + +## Query + +```bash +# Count orders by status +slayer query '{"source_model": "orders", "fields": [{"formula": "count"}], "dimensions": ["status"]}' + +# From a file +slayer query @query.json + +# Output as JSON (pipe-friendly) +slayer query @query.json --format json + +# Preview the generated SQL without running it +slayer query @query.json --dry-run + +# Show execution plan +slayer query @query.json --explain +``` + +## Explore models + +```bash +slayer models list +slayer models show orders +slayer datasources list +``` + +## Verify it works + +After install + ingest, this should return data: + +```bash +slayer query '{"source_model": "orders", "fields": [{"formula": "count"}]}' +``` + +Expected output: + +``` +orders.count +------------ +42 + +1 row(s) +``` + +If you see "Model 'orders' not found", check that `slayer ingest` ran successfully and that `--storage` points to the right location. + +## Start a server (optional) + +If you also want a REST API or MCP endpoint: + +```bash +slayer serve # REST API at http://localhost:5143 +slayer serve --storage slayer.db # Using SQLite storage +``` + +See the [CLI Reference](../reference/cli.md) for all commands and flags. diff --git a/docs/getting-started/index.md b/docs/getting-started/index.md new file mode 100644 index 0000000..abed387 --- /dev/null +++ b/docs/getting-started/index.md @@ -0,0 +1,37 @@ +# Getting Started + +SLayer is a semantic layer that sits between your database and whatever consumes the data — AI agents, apps, scripts, or dashboards. You define your data model once (or let SLayer auto-generate it), and consumers query using measures, dimensions, and filters instead of writing SQL. + +## Which interface is right for you? + +| I want to... | Use | Guide | +|---|---|---| +| Connect an AI agent (Claude, Cursor) to my database | **MCP Server** | [MCP Setup](mcp.md) | +| Query from the terminal or scripts | **CLI** | [CLI Setup](cli.md) | +| Build an app that queries data (any language) | **REST API** | [REST API Setup](rest-api.md) | +| Use SLayer as a Python library | **Python SDK** | [Python Setup](python.md) | + +All four interfaces use the same query language and the same models — pick the one that fits your workflow. You can use multiple interfaces simultaneously (e.g., MCP for your agent + REST API for your dashboard). + +## Supported Databases + +SLayer works with most SQL databases. The base install includes SQLite support (no extras needed). + +| Database | Install | Status | +|---|---|---| +| SQLite | included | Fully tested | +| PostgreSQL | `motley-slayer[postgres]` | Fully tested | +| MySQL / MariaDB | `motley-slayer[mysql]` | Fully tested | +| ClickHouse | `motley-slayer[clickhouse]` | Fully tested | +| DuckDB | `motley-slayer[duckdb]` | Fully tested | +| Snowflake, BigQuery, Redshift, Trino, Databricks, MS SQL, Oracle | Covered by sqlglot | SQL generation tested | + +## Next Steps + +After setting up your interface, explore: + +- [Terminology](../concepts/terminology.md) — key terms and concepts +- [Models](../concepts/models.md) — define custom dimensions and measures +- [Queries](../concepts/queries.md) — query structure and parameters +- [Formulas](../concepts/formulas.md) — transforms, arithmetic, filters +- [Examples](../examples/01_dynamic/dynamic.md) — interactive notebooks diff --git a/docs/getting-started/mcp.md b/docs/getting-started/mcp.md new file mode 100644 index 0000000..c29eafd --- /dev/null +++ b/docs/getting-started/mcp.md @@ -0,0 +1,75 @@ +# MCP Setup — AI Agents + +Connect your AI agent (Claude Code, Cursor, etc.) to your database through SLayer's MCP server. No Python knowledge required. + +## Install + +```bash +uv tool install motley-slayer +``` + +For databases other than SQLite, add the driver extra (see [full list](../configuration/datasources.md#database-drivers)): + +```bash +uv tool install 'motley-slayer[postgres]' +``` + +## Connect to your agent + +### Claude Code (stdio — recommended) + +```bash +claude mcp add slayer -- slayer mcp --storage ./slayer_data +``` + +If SLayer is in a virtualenv, use the full path to the executable: + +```bash +claude mcp add slayer -- $(which slayer) mcp --storage /absolute/path/to/slayer_data +``` + +### Remote agents (HTTP/SSE) + +Start the server, then point your agent at the SSE endpoint: + +```bash +slayer serve --storage ./slayer_data + +# In another terminal / agent config: +claude mcp add slayer-remote --transport sse --url http://localhost:5143/mcp/sse +``` + +## Connect a database + +Once the agent is connected, it handles everything conversationally. A typical exchange: + +> **You:** Connect to my Postgres database at localhost, database "myapp", user "analyst" +> +> **Agent:** *calls `create_datasource` → auto-ingests models → calls `datasource_summary`* +> +> "Connected! I found 4 tables: orders (12 dims, 8 measures), customers (5 dims, 3 measures), ..." +> +> **You:** How many orders per status? +> +> **Agent:** *calls `query(source_model="orders", fields=[{"formula": "count"}], dimensions=["status"])`* + +The agent uses these MCP tools in order: + +1. `create_datasource` — connect to DB (auto-ingests models by default) +2. `datasource_summary` — discover available models and their schemas +3. `inspect_model` — see dimensions, measures, and sample data for a model +4. `query` — run queries + +See the [MCP Reference](../reference/mcp.md) for the full tools list. + +## Verify it works + +Ask your agent: + +> "List the available SLayer models" + +The agent should call `datasource_summary` and return a list of your tables/models. If it says "no models found", check that: + +1. The `--storage` path is correct +2. You've connected a datasource (or the agent has via `create_datasource`) +3. Models were ingested (auto-ingest runs by default with `create_datasource`) diff --git a/docs/getting-started/python.md b/docs/getting-started/python.md new file mode 100644 index 0000000..0ce44c1 --- /dev/null +++ b/docs/getting-started/python.md @@ -0,0 +1,147 @@ +# Python Setup — SDK & Embedded Use + +Use SLayer as a Python library — either as a client to a running server, or embedded directly in your application with no server at all. + +## Install + +```bash +pip install motley-slayer # Base (SQLite works out of the box) +pip install motley-slayer[all] # Everything +``` + +For specific database drivers or optional extras, see the [full list](../configuration/datasources.md#database-drivers): + +```bash +pip install motley-slayer[postgres] # PostgreSQL driver +pip install motley-slayer[client] # httpx + pandas for remote mode +``` + +## Embedded mode (no server) + +Use SLayer directly in your Python code — no HTTP, no server process: + +```python +from slayer.core.models import DatasourceConfig +from slayer.engine.ingestion import ingest_datasource +from slayer.engine.query_engine import SlayerQueryEngine +from slayer.storage.yaml_storage import YAMLStorage + +# Set up storage +storage = YAMLStorage(base_dir="./slayer_data") + +# Connect a database +ds = DatasourceConfig( + name="my_pg", + type="postgres", + host="localhost", + database="myapp", + username="analyst", + password="${DB_PASSWORD}", # resolved from env vars +) +storage.save_datasource(ds) + +# Auto-generate models from schema +models = ingest_datasource(datasource=ds, schema="public") +for model in models: + storage.save_model(model) + print(f" {model.name}: {len(model.dimensions)} dims, {len(model.measures)} measures") +``` + +## Query + +```python +from slayer.core.query import SlayerQuery + +engine = SlayerQueryEngine(storage=storage) + +result = engine.execute(query=SlayerQuery( + source_model="orders", + fields=[{"formula": "count"}, {"formula": "revenue_sum"}], + dimensions=[{"name": "status"}], +)) + +for row in result.data: + print(row) +# {"orders.status": "completed", "orders.count": 42, "orders.revenue_sum": 12345.67} +``` + +The response object: + +```python +result.data # list of row dicts +result.columns # list of column names +result.row_count # number of rows +result.sql # generated SQL (when dry_run or explain is set) +result.meta # dict of column name → FieldMetadata (labels, etc.) +``` + +## Remote mode (client → server) + +Connect to a running SLayer server: + +```python +from slayer.client.slayer_client import SlayerClient +from slayer.core.query import SlayerQuery + +# Connect to remote server +client = SlayerClient(url="http://localhost:5143") + +# Query — returns SlayerResponse (same as embedded mode) +result = client.query(SlayerQuery( + source_model="orders", + fields=[{"formula": "count"}], + dimensions=[{"name": "status"}], +)) +print(result.data) +``` + +## DataFrame integration + +```python +# With pandas (requires motley-slayer[client] extra) +df = client.query_df(SlayerQuery( + source_model="orders", + fields=[{"formula": "count"}, {"formula": "revenue_sum"}], + dimensions=[{"name": "status"}], +)) +print(df) +``` + +## SQLite storage + +For single-file storage instead of YAML directories: + +```python +from slayer.storage.sqlite_storage import SQLiteStorage + +storage = SQLiteStorage(db_path="slayer.db") +# Use exactly like YAMLStorage +``` + +Or use the factory: + +```python +from slayer.storage.base import resolve_storage + +storage = resolve_storage("./slayer_data") # YAML +storage = resolve_storage("slayer.db") # SQLite +``` + +## Verify it works + +```python +from slayer.storage.base import resolve_storage +from slayer.engine.query_engine import SlayerQueryEngine + +storage = resolve_storage("./slayer_data") +engine = SlayerQueryEngine(storage=storage) + +# Should list your ingested models +print(storage.list_models()) + +# Should return data +result = engine.execute(query={"source_model": "orders", "fields": [{"formula": "count"}]}) +print(f"{result.row_count} row(s), columns: {result.columns}") +``` + +See the [Python Client Reference](../reference/python-client.md) for the full API. diff --git a/docs/getting-started/rest-api.md b/docs/getting-started/rest-api.md new file mode 100644 index 0000000..dc8f14d --- /dev/null +++ b/docs/getting-started/rest-api.md @@ -0,0 +1,148 @@ +# REST API Setup — App Developers + +Use SLayer from any language via HTTP. No Python needed in your application — just start the server and call the API. + +## Install and start + +```bash +uv tool install motley-slayer +slayer serve +``` + +For databases other than SQLite, add the driver extra (see [full list](../configuration/datasources.md#database-drivers)): + +```bash +uv tool install 'motley-slayer[postgres]' +``` + +The API runs at `http://localhost:5143`. An MCP SSE endpoint is also available at `/mcp/sse`. + +## Connect a database + +Create a datasource config file: + +```yaml +# slayer_data/datasources/my_pg.yaml +name: my_pg +type: postgres +host: localhost +port: 5432 +database: myapp +username: analyst +password: ${DB_PASSWORD} +``` + +Ingest models from the schema: + +```bash +slayer ingest --datasource my_pg +``` + +Or do everything via the API: + +```bash +# Create datasource +curl -X POST http://localhost:5143/datasources \ + -H "Content-Type: application/json" \ + -d '{"name": "my_pg", "type": "postgres", "host": "localhost", "database": "myapp", "username": "analyst", "password": "secret"}' + +# Ingest models +curl -X POST http://localhost:5143/ingest \ + -H "Content-Type: application/json" \ + -d '{"datasource": "my_pg"}' +``` + +## Query + +```bash +# Count orders by status +curl -X POST http://localhost:5143/query \ + -H "Content-Type: application/json" \ + -d '{ + "source_model": "orders", + "fields": [{"formula": "count"}], + "dimensions": [{"name": "status"}] + }' +``` + +Response: + +```json +{ + "data": [ + {"orders.status": "completed", "orders.count": 42}, + {"orders.status": "pending", "orders.count": 15} + ], + "columns": ["orders.status", "orders.count"], + "row_count": 2 +} +``` + +## More examples + +```bash +# Monthly revenue with date range +curl -X POST http://localhost:5143/query \ + -H "Content-Type: application/json" \ + -d '{ + "source_model": "orders", + "fields": [{"formula": "revenue_sum"}], + "time_dimensions": [{"dimension": {"name": "created_at"}, "granularity": "month", "date_range": ["2024-01-01", "2024-12-31"]}] + }' + +# Top 5 customers +curl -X POST http://localhost:5143/query \ + -H "Content-Type: application/json" \ + -d '{ + "source_model": "orders", + "fields": [{"formula": "revenue_sum"}], + "dimensions": [{"name": "customers.name"}], + "order": [{"column": {"name": "revenue_sum"}, "direction": "desc"}], + "limit": 5 + }' + +# List models +curl http://localhost:5143/models + +# Get model details +curl http://localhost:5143/models/orders +``` + +## Verify it works + +```bash +# Health check +curl http://localhost:5143/health +# {"status": "ok"} + +# List models (should return your ingested models) +curl http://localhost:5143/models +``` + +If `/models` returns an empty list, run `slayer ingest --datasource my_pg` first. + +## Using from other languages + +SLayer is just HTTP + JSON — use any HTTP client: + +**JavaScript:** +```javascript +const res = await fetch("http://localhost:5143/query", { + method: "POST", + headers: {"Content-Type": "application/json"}, + body: JSON.stringify({ + source_model: "orders", + fields: [{formula: "count"}], + dimensions: [{name: "status"}], + }), +}); +const {data} = await res.json(); +``` + +**Go:** +```go +body := `{"source_model": "orders", "fields": [{"formula": "count"}]}` +resp, _ := http.Post("http://localhost:5143/query", "application/json", strings.NewReader(body)) +``` + +See the [REST API Reference](../reference/rest-api.md) for all endpoints. diff --git a/docs/index.md b/docs/index.md index 3a77129..cdc5a56 100644 --- a/docs/index.md +++ b/docs/index.md @@ -42,7 +42,9 @@ Agent ─→ MCP / REST API / Python SDK ## Next Steps -- [Getting Started](getting-started.md) — install, connect a database, run your first query -- [MCP Server](interfaces/mcp.md) — set up SLayer as an MCP tool server for AI agents +- [Getting Started](getting-started/index.md) — pick your interface and get running in minutes +- [MCP Setup](getting-started/mcp.md) — connect AI agents to your database +- [CLI Setup](getting-started/cli.md) — query from the terminal +- [REST API Setup](getting-started/rest-api.md) — build apps in any language - [Models](concepts/models.md) — understand dimensions and measures - [Queries](concepts/queries.md) — query format reference with examples diff --git a/docs/interfaces/cli.md b/docs/interfaces/cli.md index 60ce8b5..34c5159 100644 --- a/docs/interfaces/cli.md +++ b/docs/interfaces/cli.md @@ -2,6 +2,22 @@ SLayer provides a command-line interface for server management, querying, and model operations. +## Storage + +All commands accept a `--storage` flag to specify where models and datasources are stored: + +```bash +# YAML files in a directory (default) +slayer serve --storage ./slayer_data + +# SQLite database file (auto-detected by .db/.sqlite/.sqlite3 extension) +slayer serve --storage slayer.db +``` + +The default is `./slayer_data` (YAML). Override with `$SLAYER_STORAGE` or `$SLAYER_MODELS_DIR` env vars. + +The legacy `--models-dir` flag still works but is deprecated in favor of `--storage`. + ## Commands ### `slayer serve` @@ -9,15 +25,16 @@ SLayer provides a command-line interface for server management, querying, and mo Start the HTTP server (REST API + MCP SSE endpoint at `/mcp/sse`). ```bash -slayer serve --models-dir ./slayer_data -slayer serve --host 0.0.0.0 --port 8080 --models-dir ./slayer_data +slayer serve +slayer serve --host 0.0.0.0 --port 8080 +slayer serve --storage slayer.db ``` | Flag | Default | Description | |------|---------|-------------| | `--host` | `0.0.0.0` | Bind address | | `--port` | `5143` | Port number | -| `--models-dir` | `./slayer_data` | Storage directory | +| `--storage` | `./slayer_data` | Storage path (directory for YAML, .db file for SQLite) | ### `slayer mcp` @@ -25,17 +42,17 @@ Run SLayer as an MCP server using stdio transport. This command is **not meant t ```bash # Register with Claude Code (the agent will spawn the process) -claude mcp add slayer -- slayer mcp --models-dir ./slayer_data +claude mcp add slayer -- slayer mcp --storage ./slayer_data # If slayer is in a virtualenv, use the full executable path: -# claude mcp add slayer -- $(poetry env info -p)/bin/slayer mcp --models-dir /abs/path/to/slayer_data +# claude mcp add slayer -- $(poetry env info -p)/bin/slayer mcp --storage /abs/path/to/slayer_data ``` For MCP over HTTP (SSE), use `slayer serve` instead — it exposes MCP at `/mcp/sse` alongside the REST API. | Flag | Default | Description | |------|---------|-------------| -| `--models-dir` | `./slayer_data` | Storage directory | +| `--storage` | `./slayer_data` | Storage path | ### `slayer query` @@ -50,33 +67,46 @@ slayer query @query.json # JSON output slayer query '{"source_model": "orders", "fields": ["count"]}' --format json + +# Preview SQL without executing +slayer query '{"source_model": "orders", "fields": ["count"]}' --dry-run + +# Show execution plan +slayer query @query.json --explain ``` | Flag | Default | Description | |------|---------|-------------| -| `--models-dir` | `./slayer_data` | Storage directory | +| `--storage` | `./slayer_data` | Storage path | | `--format` | `table` | Output format: `table` or `json` | +| `--dry-run` | | Generate SQL without executing | +| `--explain` | | Run EXPLAIN ANALYZE on the query | ### `slayer ingest` Auto-generate models from a datasource. ```bash -slayer ingest --datasource my_postgres --schema public --models-dir ./slayer_data +slayer ingest --datasource my_postgres +slayer ingest --datasource my_postgres --schema public +slayer ingest --datasource my_postgres --include orders,customers +slayer ingest --datasource my_postgres --exclude migrations,django_session ``` | Flag | Required | Description | |------|----------|-------------| | `--datasource` | Yes | Datasource name | | `--schema` | No | Database schema to inspect | -| `--models-dir` | No | Storage directory | +| `--include` | No | Comma-separated tables to include | +| `--exclude` | No | Comma-separated tables to exclude | +| `--storage` | No | Storage path | ### `slayer models` Manage models. ```bash -slayer models list --models-dir ./slayer_data +slayer models list slayer models show orders slayer models create model.yaml slayer models delete orders @@ -87,6 +117,6 @@ slayer models delete orders Manage datasources. ```bash -slayer datasources list --models-dir ./slayer_data +slayer datasources list slayer datasources show my_postgres # credentials masked ``` diff --git a/docs/reference/cli.md b/docs/reference/cli.md new file mode 100644 index 0000000..34c5159 --- /dev/null +++ b/docs/reference/cli.md @@ -0,0 +1,122 @@ +# CLI + +SLayer provides a command-line interface for server management, querying, and model operations. + +## Storage + +All commands accept a `--storage` flag to specify where models and datasources are stored: + +```bash +# YAML files in a directory (default) +slayer serve --storage ./slayer_data + +# SQLite database file (auto-detected by .db/.sqlite/.sqlite3 extension) +slayer serve --storage slayer.db +``` + +The default is `./slayer_data` (YAML). Override with `$SLAYER_STORAGE` or `$SLAYER_MODELS_DIR` env vars. + +The legacy `--models-dir` flag still works but is deprecated in favor of `--storage`. + +## Commands + +### `slayer serve` + +Start the HTTP server (REST API + MCP SSE endpoint at `/mcp/sse`). + +```bash +slayer serve +slayer serve --host 0.0.0.0 --port 8080 +slayer serve --storage slayer.db +``` + +| Flag | Default | Description | +|------|---------|-------------| +| `--host` | `0.0.0.0` | Bind address | +| `--port` | `5143` | Port number | +| `--storage` | `./slayer_data` | Storage path (directory for YAML, .db file for SQLite) | + +### `slayer mcp` + +Run SLayer as an MCP server using stdio transport. This command is **not meant to be run manually** — it is spawned by an AI agent (Claude Code, Cursor, etc.) as a subprocess. To set it up, register the command with your agent: + +```bash +# Register with Claude Code (the agent will spawn the process) +claude mcp add slayer -- slayer mcp --storage ./slayer_data + +# If slayer is in a virtualenv, use the full executable path: +# claude mcp add slayer -- $(poetry env info -p)/bin/slayer mcp --storage /abs/path/to/slayer_data +``` + +For MCP over HTTP (SSE), use `slayer serve` instead — it exposes MCP at `/mcp/sse` alongside the REST API. + +| Flag | Default | Description | +|------|---------|-------------| +| `--storage` | `./slayer_data` | Storage path | + +### `slayer query` + +Execute a query from the terminal. + +```bash +# Inline JSON +slayer query '{"source_model": "orders", "fields": ["count"], "dimensions": ["status"]}' + +# From a file +slayer query @query.json + +# JSON output +slayer query '{"source_model": "orders", "fields": ["count"]}' --format json + +# Preview SQL without executing +slayer query '{"source_model": "orders", "fields": ["count"]}' --dry-run + +# Show execution plan +slayer query @query.json --explain +``` + +| Flag | Default | Description | +|------|---------|-------------| +| `--storage` | `./slayer_data` | Storage path | +| `--format` | `table` | Output format: `table` or `json` | +| `--dry-run` | | Generate SQL without executing | +| `--explain` | | Run EXPLAIN ANALYZE on the query | + +### `slayer ingest` + +Auto-generate models from a datasource. + +```bash +slayer ingest --datasource my_postgres +slayer ingest --datasource my_postgres --schema public +slayer ingest --datasource my_postgres --include orders,customers +slayer ingest --datasource my_postgres --exclude migrations,django_session +``` + +| Flag | Required | Description | +|------|----------|-------------| +| `--datasource` | Yes | Datasource name | +| `--schema` | No | Database schema to inspect | +| `--include` | No | Comma-separated tables to include | +| `--exclude` | No | Comma-separated tables to exclude | +| `--storage` | No | Storage path | + +### `slayer models` + +Manage models. + +```bash +slayer models list +slayer models show orders +slayer models create model.yaml +slayer models delete orders +``` + +### `slayer datasources` + +Manage datasources. + +```bash +slayer datasources list +slayer datasources show my_postgres # credentials masked +``` diff --git a/docs/reference/mcp.md b/docs/reference/mcp.md new file mode 100644 index 0000000..b9ebe7e --- /dev/null +++ b/docs/reference/mcp.md @@ -0,0 +1,148 @@ +# MCP Server + +SLayer runs as an [MCP](https://modelcontextprotocol.io/) server, allowing AI agents (Claude, Cursor, etc.) to discover and query data conversationally. + +## Transports + +SLayer supports two MCP transports. Both expose the exact same tools — the only difference is how the agent connects. + +### Stdio (local) + +The agent spawns SLayer as a subprocess and communicates via stdin/stdout. You do **not** run `slayer mcp` manually — the agent launches it. You only need to register the command with your agent. + +**Claude Code setup:** + +```bash +claude mcp add slayer -- slayer mcp --models-dir ./slayer_data +``` + +If `slayer` is installed in a virtualenv (e.g. via Poetry), use the full path to the executable so the agent can find it regardless of working directory: + +```bash +# Find the virtualenv path +poetry env info -p +# e.g. /home/user/.venvs/slayer-abc123 + +# Register with the full path +claude mcp add slayer -- /home/user/.venvs/slayer-abc123/bin/slayer mcp --models-dir /path/to/slayer_data +``` + +### SSE (remote) + +MCP over HTTP via Server-Sent Events. You run `slayer serve` yourself — it exposes both the REST API and the MCP SSE endpoint on the same port: + +```bash +# 1. Start the server +slayer serve --models-dir ./slayer_data +# REST API at http://localhost:5143/ +# MCP SSE at http://localhost:5143/mcp/sse +``` + +Then, in a separate terminal, register the remote endpoint with your agent: + +```bash +# 2. Connect the agent +claude mcp add slayer-remote --transport sse --url http://localhost:5143/mcp/sse +``` + +This is useful when SLayer runs on a different machine, in Docker, or when multiple agents need to share the same server. + +### Verify + +```bash +claude mcp list +``` + +## Tools Reference + +### Datasource Management + +| Tool | Description | +|------|-------------| +| `create_datasource` | Create a DB connection, test it, and auto-ingest models (set `auto_ingest=false` to skip). | +| `list_datasources` | List configured datasources (no credentials shown). | +| `describe_datasource` | Show details, test connection, list available schemas. | +| `list_tables` | List tables in a database before ingesting. | +| `edit_datasource` | Edit an existing datasource config. | +| `delete_datasource` | Remove a datasource config. | + +### Model Management + +| Tool | Description | +|------|-------------| +| `datasource_summary` | List all datasources and their models with schemas (dimensions, measures). Returns JSON. | +| `inspect_model` | Detailed model info with sample data. Params: `model_name`, `num_rows` (default 3), `show_sql` (default false). | +| `create_model` | Create a new model from table/SQL definition. | +| `create_model_from_query` | Create a model from a query — saves the query's SQL as a reusable model with auto-introspected dimensions and measures. Params: `name`, `query` (SLayer query dict), `description` (optional). | +| `edit_model` | Edit an existing model in one call. Params: `model_name` (required), `description`, `data_source`, `default_time_dimension` (optional metadata), `add_measures` (list), `add_dimensions` (list), `remove` (list of names). | +| `delete_model` | Delete a model entirely. | + +### Querying + +| Tool | Description | +|------|-------------| +| `query` | Execute a semantic query. See [Queries](../concepts/queries.md) for format. | + +**`query` parameters:** + +| Param | Type | Description | +|-------|------|-------------| +| `source_model` | string | Model name (required) | +| `fields` | list | Data columns: measures, arithmetic, transforms. E.g. `["count", {"formula": "revenue / count", "name": "aov", "label": "Average Order Value"}, "cumsum(revenue)"]`. Each field has an optional `label` for human-readable display. Supports nesting: `"change(cumsum(revenue))"` | +| `dimensions` | list | Dimension names, e.g. `["status"]`. When using the engine directly, dimensions accept an optional `label` via `{"name": "status", "label": "Order Status"}`. | +| `filters` | list[str] | Filter formula strings, e.g. `["status = 'active'", "amount > 100"]`. Supports operators (`=`, `<>`, `>`, `>=`, `<`, `<=`, `IN`, `IS NULL`, `IS NOT NULL`, `LIKE`, `NOT LIKE`), boolean logic (`AND`, `OR`, `NOT`), and inline transform expressions (`"change(revenue) > 0"`). Filters on measures are automatically routed to HAVING. | +| `time_dimensions` | list[dict] | Time grouping. Each entry supports an optional `label` for display. | +| `order` | list[dict] | Sorting, e.g. `[{"column": "count", "direction": "desc"}]` | +| `limit` | int | Max rows | +| `offset` | int | Skip rows | +| `whole_periods_only` | bool | Snap date filters to time bucket boundaries, exclude the current incomplete time bucket | +| `show_sql` | bool | Include the generated SQL in the response for debugging | +| `dry_run` | bool | Generate and return the SQL without executing it | +| `explain` | bool | Run EXPLAIN ANALYZE and return the query plan | +| `format` | string | Output format: `"markdown"` (default, compact), `"json"` (structured), or `"csv"` (most compact). Case-insensitive | + +### Ingestion + +| Tool | Description | +|------|-------------| +| `ingest_datasource_models` | Auto-generate models from DB schema with rollup joins. Params: `datasource_name`, `include_tables`, `schema_name`. | + +## Typical Agent Workflows + +### Connect and explore a new database + +``` +1. create_datasource(name="mydb", type="postgres", host="localhost", database="app", username="user", password="pass") + # auto_ingest=true by default — models are generated automatically +2. datasource_summary() # see what was generated +3. inspect_model(model_name="orders") # see schema + sample data +``` + +To explore first without auto-ingesting: + +``` +1. create_datasource(name="mydb", type="postgres", host="localhost", database="app", username="user", password="pass", auto_ingest=false) +2. describe_datasource(name="mydb") # verify connection, see schemas +3. list_tables(datasource_name="mydb", schema_name="public") # explore tables +4. ingest_datasource_models(datasource_name="mydb", schema_name="public") +5. datasource_summary() # see what was generated +``` + +### Query data + +``` +1. datasource_summary() # discover models +2. inspect_model(model_name="orders") # see schema + sample data +3. query(source_model="orders", fields=["count"], dimensions=["status"], limit=10) +``` + +### Customize a model + +``` +1. edit_model( + model_name="orders", + add_measures=[{"name": "avg_amount", "sql": "amount", "type": "avg"}], + add_dimensions=[{"name": "priority", "sql": "priority", "type": "string"}], + remove=["amount_sum"] + ) +``` diff --git a/docs/reference/python-client.md b/docs/reference/python-client.md new file mode 100644 index 0000000..6215776 --- /dev/null +++ b/docs/reference/python-client.md @@ -0,0 +1,92 @@ +# Python Client + +The Python SDK supports both **remote mode** (connects to a running server) and **local mode** (no server needed). + +## Installation + +```bash +pip install motley-slayer[client] # httpx + pandas +``` + +## Usage + +### Remote Mode + +```python +from slayer.client.slayer_client import SlayerClient +from slayer.core.query import SlayerQuery + +client = SlayerClient(url="http://localhost:5143") + +query = SlayerQuery( + source_model="orders", + fields=["count", "revenue_sum"], + dimensions=["status"], + limit=10, +) + +# Get raw data +data = client.query(query) +# [{"orders.status": "completed", "orders.count": 42, ...}, ...] + +# Get pandas DataFrame +df = client.query_df(query) +print(df) +``` + +### Local Mode + +No server needed — queries execute directly against the storage backend: + +```python +from slayer.client.slayer_client import SlayerClient +from slayer.storage.yaml_storage import YAMLStorage + +client = SlayerClient(storage=YAMLStorage(base_dir="./slayer_data")) + +# Same query API as remote mode +data = client.query(query) +df = client.query_df(query) +``` + +### Other Methods + +```python +# List models +models = client.list_models() + +# Get model definition +model = client.get_model("orders") + +# Create a model +client.create_model({"name": "orders", "sql_table": "public.orders", ...}) + +# List datasources +datasources = client.list_datasources() + +# Create a datasource +client.create_datasource({"name": "mydb", "type": "postgres", ...}) +``` + +## Direct Engine Access + +For maximum control, use the query engine directly: + +```python +from slayer.engine.query_engine import SlayerQueryEngine +from slayer.storage.yaml_storage import YAMLStorage + +storage = YAMLStorage(base_dir="./slayer_data") +engine = SlayerQueryEngine(storage=storage) + +result = engine.execute(query=query) +# result.data — list of row dicts +# result.columns — list of column names +# result.meta — dict mapping column names to FieldMetadata (label, and more coming soon) +# +# client.query() returns SlayerResponse with all fields above +# client.sql(query) returns just the generated SQL string +# client.explain(query) returns SlayerResponse with EXPLAIN ANALYZE output +# result.row_count — number of rows +# result.sql — generated SQL string +``` diff --git a/docs/reference/rest-api.md b/docs/reference/rest-api.md new file mode 100644 index 0000000..ee30f71 --- /dev/null +++ b/docs/reference/rest-api.md @@ -0,0 +1,114 @@ +# REST API + +SLayer provides a FastAPI-based REST API on port **5143** by default. + +## Start the Server + +```bash +slayer serve --models-dir ./slayer_data +slayer serve --host 0.0.0.0 --port 8080 --models-dir ./slayer_data +``` + +## Endpoints + +### Health Check + +``` +GET /health +``` + +```bash +curl http://localhost:5143/health +# {"status": "ok"} +``` + +### Query + +``` +POST /query +``` + +```bash +curl -X POST http://localhost:5143/query \ + -H "Content-Type: application/json" \ + -d '{ + "source_model": "orders", + "fields": ["count"], + "dimensions": ["status"], + "limit": 10 + }' +``` + +Response: + +```json +{ + "data": [ + {"orders.status": "completed", "orders.count": 42}, + {"orders.status": "pending", "orders.count": 15} + ], + "row_count": 2, + "columns": ["orders.status", "orders.count"] +} +``` + +### Models + +``` +GET /models # List all models +GET /models/{name} # Get model definition +POST /models # Create a model +PUT /models/{name} # Update a model +DELETE /models/{name} # Delete a model +``` + +```bash +# List models +curl http://localhost:5143/models + +# Get model definition (hidden dimensions/measures excluded) +curl http://localhost:5143/models/orders + +# Create a model +curl -X POST http://localhost:5143/models \ + -H "Content-Type: application/json" \ + -d '{"name": "orders", "sql_table": "public.orders", "data_source": "mydb", ...}' +``` + +### Datasources + +``` +GET /datasources # List all datasources +GET /datasources/{name} # Get datasource (credentials masked) +POST /datasources # Create a datasource +DELETE /datasources/{name} # Delete a datasource +``` + +```bash +# List datasources +curl http://localhost:5143/datasources + +# Get datasource (password/connection_string shown as ***) +curl http://localhost:5143/datasources/my_postgres +``` + +### Ingestion + +``` +POST /ingest +``` + +```bash +curl -X POST http://localhost:5143/ingest \ + -H "Content-Type: application/json" \ + -d '{"datasource": "my_postgres", "schema_name": "public"}' +``` + +Response: + +```json +{ + "status": "ingested", + "models": ["orders", "customers", "products"] +} +``` diff --git a/mkdocs.yml b/mkdocs.yml index ff16701..b427261 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -10,18 +10,23 @@ theme: nav: - Home: index.md - - Getting Started: getting-started.md + - Getting Started: + - getting-started/index.md + - MCP (AI Agents): getting-started/mcp.md + - CLI (Terminal): getting-started/cli.md + - REST API (Any Language): getting-started/rest-api.md + - Python SDK: getting-started/python.md - Concepts: - Terminology: concepts/terminology.md - Models: concepts/models.md - Queries: concepts/queries.md - Formulas: concepts/formulas.md - Auto-Ingestion: concepts/ingestion.md - - Interfaces: - - MCP Server: interfaces/mcp.md - - REST API: interfaces/rest-api.md - - Python Client: interfaces/python-client.md - - CLI: interfaces/cli.md + - Reference: + - MCP Server: reference/mcp.md + - REST API: reference/rest-api.md + - Python Client: reference/python-client.md + - CLI: reference/cli.md - Examples: - Dynamic Models: examples/01_dynamic/dynamic.md - SQL vs DSL: diff --git a/slayer/cli.py b/slayer/cli.py index 6e8cb97..a08920e 100644 --- a/slayer/cli.py +++ b/slayer/cli.py @@ -4,61 +4,245 @@ import os import sys +_STORAGE_DEFAULT = os.environ.get("SLAYER_STORAGE", os.environ.get("SLAYER_MODELS_DIR", "./slayer_data")) +_STORAGE_HELP = ( + "Storage path: directory for YAML storage, or .db/.sqlite file for SQLite storage " + "(default: $SLAYER_STORAGE or $SLAYER_MODELS_DIR or ./slayer_data)" +) + + +def _add_storage_arg(parser): + """Add --storage and legacy --models-dir flags to a parser.""" + parser.add_argument("--storage", default=None, help=_STORAGE_HELP) + parser.add_argument( + "--models-dir", + default=None, + help="(deprecated, use --storage) Path to YAML models directory", + ) + + +def _resolve_storage(args): + """Resolve storage backend from --storage or --models-dir flags.""" + from slayer.storage.base import resolve_storage + + path = args.storage or args.models_dir or _STORAGE_DEFAULT + return resolve_storage(path) + def main(): - parser = argparse.ArgumentParser(prog="slayer", description="SLayer — semantic layer for AI agents") + parser = argparse.ArgumentParser( + prog="slayer", + description="SLayer — a lightweight semantic layer for AI agents", + epilog="""\ +common workflows: + # 1. Create a datasource config, ingest models, start the server + slayer ingest --datasource my_postgres + slayer serve + + # 2. Query from the command line + slayer query '{"source_model": "orders", "fields": [{"formula": "count"}]}' + + # 3. Start the MCP server for AI agents + slayer mcp + + # 4. Use SQLite storage instead of YAML files + slayer serve --storage slayer.db + slayer ingest --datasource my_pg --storage slayer.db + +docs: https://motley-slayer.readthedocs.io/ +""", + formatter_class=argparse.RawDescriptionHelpFormatter, + ) subparsers = parser.add_subparsers(dest="command") - # serve command - serve_parser = subparsers.add_parser("serve", help="Start the REST API server") - serve_parser.add_argument("--host", default="0.0.0.0") - serve_parser.add_argument("--port", type=int, default=5143) - serve_parser.add_argument("--models-dir", default=os.environ.get("SLAYER_MODELS_DIR", "./slayer_data")) - - # mcp command - mcp_parser = subparsers.add_parser("mcp", help="Start the MCP server") - mcp_parser.add_argument("--models-dir", default=os.environ.get("SLAYER_MODELS_DIR", "./slayer_data")) - - # query command - query_parser = subparsers.add_parser("query", help="Execute a SLayer query from JSON") - query_parser.add_argument("query_json", help="JSON query string or @file.json") - query_parser.add_argument("--models-dir", default=os.environ.get("SLAYER_MODELS_DIR", "./slayer_data")) - query_parser.add_argument("--format", choices=["json", "table"], default="table") + # ── serve ───────────────────────────────────────────────────────── + serve_parser = subparsers.add_parser( + "serve", + help="Start the REST API server", + epilog="""\ +examples: + slayer serve + slayer serve --port 8080 --storage ./my_data + slayer serve --storage slayer.db +""", + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + serve_parser.add_argument("--host", default="0.0.0.0", help="Bind address (default: 0.0.0.0)") + serve_parser.add_argument("--port", type=int, default=5143, help="Port number (default: 5143)") + _add_storage_arg(serve_parser) + + # ── mcp ─────────────────────────────────────────────────────────── + mcp_parser = subparsers.add_parser( + "mcp", + help="Start the MCP server (stdio transport for AI agents)", + epilog="""\ +examples: + slayer mcp + slayer mcp --storage slayer.db + + # Add to Claude Code: + claude mcp add slayer -- slayer mcp --storage ./slayer_data +""", + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + _add_storage_arg(mcp_parser) + + # ── query ───────────────────────────────────────────────────────── + query_parser = subparsers.add_parser( + "query", + help="Execute a query from JSON", + epilog="""\ +examples: + # Inline JSON + slayer query '{"source_model": "orders", "fields": [{"formula": "count"}]}' + + # From a file + slayer query @query.json + + # Preview SQL without executing + slayer query '{"source_model": "orders", "fields": [{"formula": "count"}]}' --dry-run + + # Show execution plan + slayer query @query.json --explain + + # Output as JSON + slayer query @query.json --format json +""", + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + query_parser.add_argument( + "query_json", + help="JSON query string, or @file.json to read from a file", + ) + _add_storage_arg(query_parser) + query_parser.add_argument( + "--format", + choices=["json", "table"], + default="table", + help="Output format (default: table)", + ) query_parser.add_argument("--dry-run", action="store_true", help="Generate SQL without executing") query_parser.add_argument("--explain", action="store_true", help="Run EXPLAIN ANALYZE on the query") - # ingest command - ingest_parser = subparsers.add_parser("ingest", help="Auto-ingest models from a datasource") - ingest_parser.add_argument("--datasource", required=True) - ingest_parser.add_argument("--schema", default=None) - ingest_parser.add_argument("--models-dir", default=os.environ.get("SLAYER_MODELS_DIR", "./slayer_data")) - - # models command - models_parser = subparsers.add_parser("models", help="Manage models") - models_parser.add_argument("--models-dir", default=os.environ.get("SLAYER_MODELS_DIR", "./slayer_data")) + # ── ingest ──────────────────────────────────────────────────────── + ingest_parser = subparsers.add_parser( + "ingest", + help="Auto-ingest models from a datasource", + epilog="""\ +examples: + slayer ingest --datasource my_postgres + slayer ingest --datasource my_postgres --schema public + slayer ingest --datasource my_postgres --include orders,customers + slayer ingest --datasource my_postgres --exclude migrations,django_session +""", + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + ingest_parser.add_argument("--datasource", required=True, help="Name of the datasource to ingest from") + ingest_parser.add_argument("--schema", default=None, help="Database schema to introspect (e.g., public)") + ingest_parser.add_argument( + "--include", + default=None, + help="Comma-separated list of tables to include (default: all)", + ) + ingest_parser.add_argument( + "--exclude", + default=None, + help="Comma-separated list of tables to exclude", + ) + _add_storage_arg(ingest_parser) + + # ── models ──────────────────────────────────────────────────────── + models_parser = subparsers.add_parser( + "models", + help="Manage models", + epilog="""\ +examples: + slayer models list + slayer models show orders + slayer models create model.yaml + slayer models delete old_model +""", + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + _add_storage_arg(models_parser) models_subparsers = models_parser.add_subparsers(dest="models_command") models_subparsers.add_parser("list", help="List all models") - models_show_parser = models_subparsers.add_parser("show", help="Show a model definition") + models_show_parser = models_subparsers.add_parser("show", help="Show a model definition (YAML)") models_show_parser.add_argument("name", help="Model name") models_create_parser = models_subparsers.add_parser("create", help="Create a model from a YAML file") - models_create_parser.add_argument("file", help="Path to YAML file") + models_create_parser.add_argument("file", help="Path to YAML model definition") models_delete_parser = models_subparsers.add_parser("delete", help="Delete a model") models_delete_parser.add_argument("name", help="Model name") - # datasources command - datasources_parser = subparsers.add_parser("datasources", help="Manage datasources") - datasources_parser.add_argument("--models-dir", default=os.environ.get("SLAYER_MODELS_DIR", "./slayer_data")) + # ── datasources ─────────────────────────────────────────────────── + datasources_parser = subparsers.add_parser( + "datasources", + help="Manage datasources", + epilog="""\ +examples: + slayer datasources list + slayer datasources show my_postgres + + # Create from YAML file + slayer datasources create datasource.yaml + + # Create inline (quick setup — use env vars for secrets) + slayer datasources create-inline my_pg --type postgres --host localhost --database mydb --username user --password '${DB_PASSWORD}' + + # Or read password interactively + slayer datasources create-inline my_pg --type postgres --host localhost --database mydb --username user --password-stdin + + # Create SQLite/DuckDB (just needs a path) + slayer datasources create-inline my_sqlite --type sqlite --database /path/to/data.db + + slayer datasources delete my_postgres + slayer datasources test my_postgres +""", + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + _add_storage_arg(datasources_parser) datasources_subparsers = datasources_parser.add_subparsers(dest="datasources_command") datasources_subparsers.add_parser("list", help="List all datasources") - datasources_show_parser = datasources_subparsers.add_parser("show", help="Show a datasource definition") + datasources_show_parser = datasources_subparsers.add_parser( + "show", help="Show datasource config (passwords masked)" + ) datasources_show_parser.add_argument("name", help="Datasource name") + datasources_create_parser = datasources_subparsers.add_parser("create", help="Create a datasource from a YAML file") + datasources_create_parser.add_argument("file", help="Path to YAML datasource config") + + ds_inline_parser = datasources_subparsers.add_parser( + "create-inline", help="Create a datasource from command-line flags" + ) + ds_inline_parser.add_argument("name", help="Datasource name") + ds_inline_parser.add_argument("--type", required=True, help="Database type (postgres, mysql, sqlite, duckdb, ...)") + ds_inline_parser.add_argument("--host", default=None, help="Database host") + ds_inline_parser.add_argument("--port", type=int, default=None, help="Database port") + ds_inline_parser.add_argument("--database", default=None, help="Database name or file path") + ds_inline_parser.add_argument("--username", default=None, help="Database username") + ds_inline_parser.add_argument( + "--password", default=None, help="Database password (prefer --password-stdin or ${ENV_VAR} in YAML configs)" + ) + ds_inline_parser.add_argument( + "--password-stdin", action="store_true", help="Read password from stdin (more secure than --password)" + ) + ds_inline_parser.add_argument( + "--connection-string", default=None, help="Full connection string (overrides other flags)" + ) + ds_inline_parser.add_argument("--description", default=None, help="Human-readable description") + + datasources_delete_parser = datasources_subparsers.add_parser("delete", help="Delete a datasource") + datasources_delete_parser.add_argument("name", help="Datasource name") + + datasources_test_parser = datasources_subparsers.add_parser("test", help="Test datasource connectivity") + datasources_test_parser.add_argument("name", help="Datasource name") + args = parser.parse_args() if args.command == "serve": @@ -83,7 +267,6 @@ def _run_query(args): from slayer.core.query import SlayerQuery from slayer.engine.query_engine import SlayerQueryEngine - from slayer.storage.yaml_storage import YAMLStorage query_input = args.query_json if query_input.startswith("@"): @@ -96,7 +279,7 @@ def _run_query(args): data["explain"] = True slayer_query = SlayerQuery.model_validate(data) - storage = YAMLStorage(base_dir=args.models_dir) + storage = _resolve_storage(args) engine = SlayerQueryEngine(storage=storage) result = engine.execute(query=slayer_query) @@ -125,35 +308,42 @@ def _run_query(args): def _run_serve(args): from slayer.api.server import create_app - from slayer.storage.yaml_storage import YAMLStorage - storage = YAMLStorage(base_dir=args.models_dir) + storage = _resolve_storage(args) app = create_app(storage=storage) import uvicorn + uvicorn.run(app, host=args.host, port=args.port) def _run_mcp(args): from slayer.mcp.server import create_mcp_server - from slayer.storage.yaml_storage import YAMLStorage - storage = YAMLStorage(base_dir=args.models_dir) + storage = _resolve_storage(args) mcp = create_mcp_server(storage=storage) mcp.run() def _run_ingest(args): from slayer.engine.ingestion import ingest_datasource - from slayer.storage.yaml_storage import YAMLStorage - storage = YAMLStorage(base_dir=args.models_dir) + storage = _resolve_storage(args) ds = storage.get_datasource(args.datasource) if ds is None: - print(f"Datasource '{args.datasource}' not found in {args.models_dir}") + storage_path = args.storage or args.models_dir or _STORAGE_DEFAULT + print(f"Datasource '{args.datasource}' not found in {storage_path}") sys.exit(1) - models = ingest_datasource(datasource=ds, schema=args.schema) + include = [t for t in (s.strip() for s in args.include.split(",")) if t] if args.include else None + exclude = [t for t in (s.strip() for s in args.exclude.split(",")) if t] if args.exclude else None + + models = ingest_datasource( + datasource=ds, + schema=args.schema, + include_tables=include, + exclude_tables=exclude, + ) for model in models: storage.save_model(model) print(f"Ingested: {model.name} ({len(model.dimensions)} dims, {len(model.measures)} measures)") @@ -163,9 +353,8 @@ def _run_models(args): import yaml from slayer.core.models import SlayerModel - from slayer.storage.yaml_storage import YAMLStorage - storage = YAMLStorage(base_dir=args.models_dir) + storage = _resolve_storage(args) if args.models_command == "list": names = storage.list_models() @@ -210,9 +399,9 @@ def _run_models(args): def _run_datasources(args): import yaml - from slayer.storage.yaml_storage import YAMLStorage + from slayer.core.models import DatasourceConfig - storage = YAMLStorage(base_dir=args.models_dir) + storage = _resolve_storage(args) if args.datasources_command == "list": names = storage.list_datasources() @@ -236,8 +425,56 @@ def _run_datasources(args): data["connection_string"] = "********" print(yaml.dump(data, sort_keys=False, default_flow_style=False).rstrip()) + elif args.datasources_command == "create": + with open(args.file) as f: + data = yaml.safe_load(f) + ds = DatasourceConfig.model_validate(data) + storage.save_datasource(ds) + print(f"Created datasource '{ds.name}' ({ds.type}).") + + elif args.datasources_command == "create-inline": + ds_data = {"name": args.name, "type": args.type} + for field in ("host", "port", "database", "username", "password", "connection_string", "description"): + val = getattr(args, field.replace("-", "_"), None) + if val is not None: + ds_data[field] = val + if args.password_stdin: + import getpass + + ds_data["password"] = ( + getpass.getpass("Password: ") if sys.stdin.isatty() else sys.stdin.readline().rstrip("\n") + ) + ds = DatasourceConfig.model_validate(ds_data) + storage.save_datasource(ds) + print(f"Created datasource '{ds.name}' ({ds.type}).") + + elif args.datasources_command == "delete": + deleted = storage.delete_datasource(args.name) + if deleted: + print(f"Deleted datasource '{args.name}'.") + else: + print(f"Datasource '{args.name}' not found.") + sys.exit(1) + + elif args.datasources_command == "test": + ds = storage.get_datasource(args.name) + if ds is None: + print(f"Datasource '{args.name}' not found.") + sys.exit(1) + import sqlalchemy as sa + + try: + engine = sa.create_engine(ds.resolve_env_vars().get_connection_string()) + with engine.connect() as conn: + conn.execute(sa.text("SELECT 1")) + engine.dispose() + print(f"OK — connected to '{args.name}' ({ds.type}).") + except Exception as e: + print(f"FAILED — {e}") + sys.exit(1) + else: - print("Usage: slayer datasources {list,show}") + print("Usage: slayer datasources {list,show,create,create-inline,delete,test}") sys.exit(1) diff --git a/slayer/storage/base.py b/slayer/storage/base.py index 859985f..81132a0 100644 --- a/slayer/storage/base.py +++ b/slayer/storage/base.py @@ -1,7 +1,7 @@ -"""Abstract storage protocol.""" +"""Abstract storage protocol and factory.""" from abc import ABC, abstractmethod -from typing import List, Optional +from typing import Callable, Dict, List, Optional from slayer.core.models import DatasourceConfig, SlayerModel @@ -30,3 +30,66 @@ def list_datasources(self) -> List[str]: ... @abstractmethod def delete_datasource(self, name: str) -> bool: ... + + +# --------------------------------------------------------------------------- +# Storage factory with pluggable registry +# --------------------------------------------------------------------------- + +_STORAGE_REGISTRY: Dict[str, Callable[[str], StorageBackend]] = {} + + +def register_storage(scheme: str, factory: Callable[[str], StorageBackend]) -> None: + """Register a storage backend factory for a URI scheme. + + Example: + register_storage("redis", lambda path: RedisStorage(url=path)) + """ + _STORAGE_REGISTRY[scheme.lower().strip()] = factory + + +def resolve_storage(path: str) -> StorageBackend: + """Create a StorageBackend from a path or URI. + + Resolution order: + 1. URI scheme (e.g., "sqlite:///data.db", "yaml://./dir") → registered factory + 2. File extension .db/.sqlite/.sqlite3 → SQLiteStorage + 3. Everything else → YAMLStorage (directory) + + Third-party backends can register via register_storage(). + """ + # Check for URI scheme + if "://" in path: + scheme, _, remainder = path.partition("://") + scheme = scheme.lower() + if scheme in _STORAGE_REGISTRY: + return _STORAGE_REGISTRY[scheme](remainder) + # Built-in schemes + if scheme == "yaml": + from slayer.storage.yaml_storage import YAMLStorage + + return YAMLStorage(base_dir=remainder) + if scheme == "sqlite": + from slayer.storage.sqlite_storage import SQLiteStorage + + # sqlite:///abs/path → remainder="/abs/path" (keep absolute) + # sqlite://rel/path → remainder="rel/path" (keep relative) + db_path = remainder if remainder.startswith("/") else remainder.lstrip("/") + return SQLiteStorage(db_path=db_path) + raise ValueError( + f"Unknown storage scheme '{scheme}'. " + f"Built-in: yaml, sqlite. " + f"Registered: {', '.join(_STORAGE_REGISTRY) or 'none'}. " + f"Use register_storage() to add custom backends." + ) + + # Extension-based detection + if path.endswith((".db", ".sqlite", ".sqlite3")): + from slayer.storage.sqlite_storage import SQLiteStorage + + return SQLiteStorage(db_path=path) + + # Default: YAML directory + from slayer.storage.yaml_storage import YAMLStorage + + return YAMLStorage(base_dir=path)