From 6db9da5d6f5922883e92e212c695ae0f21f7156f Mon Sep 17 00:00:00 2001 From: AivanF Date: Fri, 10 Apr 2026 15:00:45 +0300 Subject: [PATCH] Docs consolidation --- .claude/skills/slayer-models.md | 39 ++++---- .claude/skills/slayer-query.md | 127 +++++++++-------------- docs/getting-started.md | 166 ------------------------------- docs/getting-started/cli.md | 4 +- docs/getting-started/mcp.md | 2 +- docs/getting-started/python.md | 14 +-- docs/getting-started/rest-api.md | 20 ++-- 7 files changed, 85 insertions(+), 287 deletions(-) delete mode 100644 docs/getting-started.md diff --git a/.claude/skills/slayer-models.md b/.claude/skills/slayer-models.md index c66c4c9..75353be 100644 --- a/.claude/skills/slayer-models.md +++ b/.claude/skills/slayer-models.md @@ -23,25 +23,21 @@ dimensions: sql: "created_at" type: time -default_time_dimension: created_at # Optional: used by time-dependent formulas when no time_dimensions in query +default_time_dimension: created_at # Optional: used by time-dependent formulas measures: - - name: count - type: count # COUNT(*), no sql needed - - name: revenue_sum - sql: "amount" - type: sum - - name: revenue_avg - sql: "amount" - type: avg + - name: revenue + sql: "amount" # Row-level expression — aggregation chosen at query time + - name: quantity + sql: "quantity" ``` +Measures are **row-level expressions** — no aggregation type in the definition. Aggregation is specified at query time with colon syntax: `"revenue:sum"`, `"revenue:avg"`, `"*:count"`. + ## Data Types **Dimension types**: `string`, `number`, `boolean`, `time` (timestamp), `date` -**Measure aggregation types**: `count`, `count_distinct`, `sum`, `avg`, `min`, `max`, `last` (most recent time bucket's value — for snapshot metrics like balances) - ## Joins Models can declare LEFT JOIN relationships to other models: @@ -52,7 +48,7 @@ joins: join_pairs: [["customer_id", "id"]] ``` -Enables cross-model measures (`customers.avg_score`), multi-hop dimensions (`customers.regions.name`), and transforms on joined measures (`cumsum(customers.avg_score)`). Auto-generated from FKs during ingestion. Joins are auto-resolved transitively by walking the join graph. Diamond joins (same table via different paths) are supported — each path gets a unique `__`-delimited alias (e.g., `customers__regions` vs `warehouses__regions`). +Enables cross-model measures (`customers.score:avg`), multi-hop dimensions (`customers.regions.name`), and transforms on joined measures (`cumsum(customers.score:avg)`). Auto-generated from FKs during ingestion. Joins are auto-resolved transitively by walking the join graph. Diamond joins (same table via different paths) are supported — each path gets a unique `__`-delimited alias (e.g., `customers__regions` vs `warehouses__regions`). ## Model Filters @@ -66,6 +62,7 @@ Models can have always-applied WHERE filters: `filters: ["deleted_at IS NULL"]`. - Use **bare column names** (e.g., `"amount"`) in dimension/measure SQL — SLayer qualifies them automatically - For complex expressions, use the model name as table prefix (e.g., `"orders.amount * orders.quantity"`) + ## Datasource Config ```yaml @@ -80,9 +77,9 @@ password: ${DB_PASSWORD} `${VAR}` references are resolved from environment variables at read time. -## Auto-Ingestion with Rollup Joins +## Auto-Ingestion -Connect to a DB and generate denormalized models automatically: +Connect to a DB and generate models automatically: ```python from slayer.engine.ingestion import ingest_datasource @@ -91,22 +88,22 @@ models = ingest_datasource(datasource=ds, schema="public") Generates: - Dimensions for all columns -- `count` measure; numeric non-ID cols get `_sum`, `_avg`, `_min`, `_max`, `_distinct`; non-numeric non-ID cols get `_distinct`, `_count` +- One measure per non-ID column (e.g., `{name: "amount", sql: "amount"}`) — aggregation chosen at query time +- `*:count` is always available without a measure definition - **Dynamic joins**: detects FK relationships, creates models with explicit join metadata (LEFT JOINs built at query time) -- Joined dimensions use full-path dotted naming (`customers.name`, `customers.regions.name`) -- FK columns are excluded; ID-like columns (`*_id`, `*_key`) skip sum/avg measures -- Count-distinct measures for each referenced table's PK (`customers.count`) +- FK columns are excluded; ID-like columns (`*_id`, `*_key`) are dimensions only ## MCP Incremental Editing Via MCP, agents can edit models incrementally: -- `update_model(model_name="orders", description="Core orders table")` — update metadata without replacing the full definition -- `add_measures(model_name="orders", measures=[{"name": "total", "sql": "amount", "type": "sum"}])` +- `update_model(model_name="orders", description="Core orders table")` +- `add_measures(model_name="orders", measures=[{"name": "margin", "sql": "amount - cost"}])` - `add_dimensions(model_name="orders", dimensions=[{"name": "region", "sql": "region", "type": "string"}])` -- `delete_measures_dimensions(model_name="orders", names=["total"])` +- `delete_measures_dimensions(model_name="orders", names=["margin"])` ## Storage Backends - `YAMLStorage(base_dir="./data")` — models as YAML files in `data/models/`, datasources in `data/datasources/` - `SQLiteStorage(db_path="./slayer.db")` — everything in a single SQLite file - Both implement `StorageBackend` protocol: `save_model()`, `get_model()`, `list_models()`, `delete_model()`, same for datasources +- Use `resolve_storage("path")` factory for auto-detection (directory → YAML, .db → SQLite, URI schemes for custom backends) diff --git a/.claude/skills/slayer-query.md b/.claude/skills/slayer-query.md index 4fa9f0d..17a6574 100644 --- a/.claude/skills/slayer-query.md +++ b/.claude/skills/slayer-query.md @@ -7,109 +7,78 @@ description: How to construct and execute SLayer queries. Use when building quer ## SlayerQuery Structure ```python -from slayer.core.query import SlayerQuery, ColumnRef, TimeDimension, OrderItem +from slayer.core.query import SlayerQuery query = SlayerQuery( - source_model="orders", # Source model name - fields=[{"formula": "count"}, {"formula": "revenue"}], - dimensions=[ColumnRef(name="status")], - time_dimensions=[ - TimeDimension( - dimension=ColumnRef(name="created_at"), - granularity=TimeGranularity.MONTH, # Required - date_range=["2024-01-01", "2024-12-31"], # Optional - ) - ], - filters=[ - "status == 'active'", - ], - order=[OrderItem(column=ColumnRef(name="count", model="orders"), direction="desc")], + source_model="orders", + fields=["*:count", "revenue:sum"], + dimensions=["status"], + time_dimensions=[{"dimension": "created_at", "granularity": "month"}], + filters=["status = 'active'"], + order=[{"column": "*:count", "direction": "desc"}], limit=10, - offset=0, - - whole_periods_only=False, # Optional: snap date filters to time bucket boundaries ) ``` -## ColumnRef +## Fields — Measures with Colon Aggregation -- `ColumnRef(name="status")` — column in the query's model -- `ColumnRef(name="status", label="Order Status")` — with optional human-readable label -- `ColumnRef.from_string("orders.status")` — parse from dotted string - -## Filters - -Filters are simple formula strings passed as `List[str]`: +Measures are row-level expressions; aggregation is chosen at query time with **colon syntax**: ```python -filters=[ - "status == 'active'", - "amount > 100", - "status == 'completed' or status == 'pending'", +fields=[ + "*:count", # COUNT(*) + "revenue:sum", # SUM(revenue) + "revenue:avg", # AVG(revenue) + "price:weighted_avg(weight=quantity)", # weighted average + {"formula": "revenue:sum / *:count", "name": "aov", "label": "Average Order Value"}, + "cumsum(revenue:sum)", # running total + "change_pct(revenue:sum)", # month-over-month growth + "last(revenue:sum)", # most recent period's value + "time_shift(revenue:sum, -1, 'year')", # year-over-year + "lag(revenue:sum, 1)", # previous row (window function) + "rank(revenue:sum)", # ranking ] ``` -**Operators**: `=`, `<>`, `>`, `>=`, `<`, `<=`, `IN`, `IS NULL`, `IS NOT NULL` - -**Boolean logic**: `and`, `or`, `not` within a single string +Built-in aggregations: `sum`, `avg`, `min`, `max`, `count`, `count_distinct`, `first`, `last`, `weighted_avg`, `median`, `percentile`. -**Pattern matching**: `like` and `not like` operators (e.g., `"name like '%acme%'"`, `"name not like '%test%'"`). Filters on measures are automatically routed to HAVING. +`*:count` is always available — no measure definition needed. `col:count` counts non-nulls. -**Filtering on computed columns**: filters can reference field names from `fields` (e.g., `"rev_change < 0"`) or contain inline transform expressions (e.g., `"last(change(revenue)) < 0"`). These are applied as post-filters on the outer query. +Result column naming: `revenue:sum` → `orders.revenue_sum` (colon becomes underscore). `*:count` → `orders.count`. -## Executing +## Filters ```python -# Via engine directly -engine = SlayerQueryEngine(storage=storage) -result = engine.execute(query=query) # SlayerResponse with .data, .columns, .row_count, .sql +filters=[ + "status = 'active'", + "amount > 100", + "status = 'completed' OR status = 'pending'", +] +``` -# Via client (remote) -client = SlayerClient(url="http://localhost:5143") -df = client.query_df(query) +**Operators**: `=`, `<>`, `>`, `>=`, `<`, `<=`, `IN`, `IS NULL`, `IS NOT NULL`, `LIKE`, `NOT LIKE` -# Via client (local, no server) -client = SlayerClient(storage=YAMLStorage(base_dir="./models")) -data = client.query(query) -``` +**Boolean logic**: `AND`, `OR`, `NOT` -## Fields +**Filtering on computed columns**: `"change(revenue:sum) > 0"`, `"last(change(revenue:sum)) < 0"`. Applied as post-filters on the outer query. -The `fields` parameter specifies what data columns to return. Each field has a `formula` (required), optional `name`, and optional `label` (human-readable display name). Formulas are parsed by `slayer/core/formula.py`. +## Executing ```python -query = SlayerQuery( - source_model="orders", - time_dimensions=[TimeDimension(dimension=ColumnRef(name="created_at"), granularity=TimeGranularity.MONTH)], - fields=[ - {"formula": "count"}, - {"formula": "revenue_sum"}, - {"formula": "revenue_sum / count", "name": "aov", "label": "Average Order Value"}, - {"formula": "cumsum(revenue_sum)"}, - {"formula": "change_pct(revenue_sum)"}, - {"formula": "last(revenue_sum)", "name": "latest_rev"}, - {"formula": "time_shift(revenue_sum, -1, 'year')", "name": "rev_last_year"}, - {"formula": "time_shift(revenue_sum, -2)", "name": "rev_2_ago"}, - {"formula": "lag(revenue_sum, 1)", "name": "rev_prev_row"}, - {"formula": "rank(revenue_sum)"}, - ], -) +engine = SlayerQueryEngine(storage=storage) +result = engine.execute(query=query) # SlayerResponse with .data, .columns, .row_count, .sql, .meta ``` -Available formula functions: `cumsum`, `time_shift`, `change`, `change_pct`, `rank`, `last`, `lag`, `lead`. `time_shift` always uses a self-join CTE — no edge NULLs, handles data gaps correctly. `lag(x, n)` / `lead(x, n)` use SQL window functions directly (more efficient, but NULLs at edges). - -Time dimension resolution: single `time_dimensions` entry is used automatically. With 2+, `main_time_dimension` disambiguates (or model's `default_time_dimension` if among query's time dims). With none, falls back to model default. - ## Cross-Model Measures -Reference measures from joined models with dotted syntax (auto-resolved via join graph): +Reference measures from joined models with dotted syntax + colon aggregation: ```python fields=[ - {"formula": "count"}, - {"formula": "customers.avg_score"}, # single hop - {"formula": "cumsum(customers.avg_score)"}, # transforms work too - {"formula": "customers.regions.population_sum"}, # multi-hop + "*:count", + "customers.score:avg", # single hop + "cumsum(customers.score:avg)", # transforms work too + "customers.regions.population:sum", # multi-hop ] ``` @@ -123,8 +92,8 @@ query = SlayerQuery( source_name="orders", dimensions=[{"name": "tier", "sql": "CASE WHEN amount > 100 THEN 'high' ELSE 'low' END"}], ), - dimensions=[ColumnRef(name="tier")], - fields=[...], + dimensions=["tier"], + fields=["*:count"], ) ``` @@ -133,13 +102,11 @@ query = SlayerQuery( Pass a list of queries — earlier queries are named sub-queries, last is the main: ```python -inner = SlayerQuery(name="monthly", source_model="orders", fields=[...], time_dimensions=[...]) -outer = SlayerQuery(source_model="monthly", fields=[{"formula": "count"}]) +inner = SlayerQuery(name="monthly", source_model="orders", fields=["*:count", "revenue:sum"], time_dimensions=[...]) +outer = SlayerQuery(source_model="monthly", fields=["*:count"]) engine.execute(query=[inner, outer]) ``` -Or save a query as a permanent model with `create_model_from_query`. - ## Result Format -Column keys use `model_name.column_name` format: `"orders.count"`, `"orders.status"`. For multi-hop joined dimensions, the full path is included: `"orders.customers.regions.name"`. Response includes `meta` dict mapping column aliases to `FieldMetadata` objects (currently has `label` field). +Column keys use `model_name.column_name` format: `"orders.count"`, `"orders.revenue_sum"`. For multi-hop joined dimensions, the full path is included: `"orders.customers.regions.name"`. Response includes `meta` dict mapping column aliases to `FieldMetadata` objects (currently has `label` field). diff --git a/docs/getting-started.md b/docs/getting-started.md deleted file mode 100644 index bfff3fc..0000000 --- a/docs/getting-started.md +++ /dev/null @@ -1,166 +0,0 @@ -# Getting Started - -## Installation - -### With uv (recommended) - -```bash -# Run directly without installing (SQLite works out of the box) -uvx --from motley-slayer slayer serve --models-dir ./slayer_data - -# Run with database extras -uvx --from 'motley-slayer[postgres]' slayer serve --models-dir ./slayer_data - -# Install as a standalone tool -uv tool install motley-slayer -uv tool install motley-slayer[postgres] # with extras -slayer serve --models-dir ./slayer_data -``` - -### With pip - -```bash -# Full install (all extras + all database drivers) -pip install motley-slayer[all] - -# Base install (REST API + CLI included by default, no database drivers) -pip install motley-slayer - -# Optional extras -pip install motley-slayer[client] # Python SDK (httpx + pandas) -pip install motley-slayer[mcp] # MCP server - -# Database driver extras -pip install motley-slayer[postgres] # PostgreSQL (psycopg2) -pip install motley-slayer[mysql] # MySQL / MariaDB (pymysql) -pip install motley-slayer[clickhouse] # ClickHouse (clickhouse-sqlalchemy) -pip install motley-slayer[duckdb] # DuckDB (duckdb-engine) -``` - -Extras can be combined: `pip install motley-slayer[mcp,postgres]` - -## Connect a Database - -### Option 1: CLI + YAML - -Create a datasource config file: - -```yaml -# slayer_data/datasources/my_postgres.yaml -name: my_postgres -type: postgres -host: localhost -port: 5432 -database: myapp -username: myuser -password: mypassword -``` - -Ingest the schema and start the server (see [CLI reference](interfaces/cli.md) for all commands): - -```bash -slayer ingest --datasource my_postgres --schema public --models-dir ./slayer_data -slayer serve --models-dir ./slayer_data -``` - -### Option 2: MCP (Agent-Driven) - -Register SLayer with your AI agent, then the agent can connect the database and explore it conversationally. There are two MCP transports (see [MCP Server docs](interfaces/mcp.md) for full tool reference): - -**Stdio** (agent spawns SLayer as a subprocess — you do not run `slayer mcp` manually): - -```bash -# Register with Claude Code -claude mcp add slayer -- slayer mcp --models-dir ./slayer_data - -# If slayer is in a virtualenv, use the full executable path: -# claude mcp add slayer -- $(poetry env info -p)/bin/slayer mcp --models-dir /abs/path/to/slayer_data -``` - -**HTTP/SSE** (you run the server, agent connects remotely): - -```bash -# 1. Start the server -slayer serve --models-dir ./slayer_data - -# 2. Register the remote MCP endpoint with your agent -claude mcp add slayer-remote --transport sse --url http://localhost:5143/mcp/sse -``` - -Once connected, the agent will call `create_datasource` (which auto-ingests models by default) then `datasource_summary` then `query` conversationally. Set `auto_ingest=false` to skip auto-ingestion and call `ingest_datasource_models` separately. - -### Option 3: Python (see [Python Client docs](interfaces/python-client.md)) - -```python -from slayer.core.models import DatasourceConfig -from slayer.engine.ingestion import ingest_datasource -from slayer.engine.query_engine import SlayerQueryEngine -from slayer.storage.yaml_storage import YAMLStorage - -# Set up storage -storage = YAMLStorage(base_dir="./slayer_data") - -# Create datasource -ds = DatasourceConfig( - name="my_postgres", - type="postgres", - host="localhost", - port=5432, - database="myapp", - username="myuser", - password="mypassword", -) -storage.save_datasource(ds) - -# Ingest schema (auto-generates models with rollup joins) -models = ingest_datasource(datasource=ds, schema="public") -for model in models: - storage.save_model(model) -``` - -## Run Your First Query - -```python -from slayer.core.query import SlayerQuery - -engine = SlayerQueryEngine(storage=storage) - -query = SlayerQuery( - source_model="orders", - fields=["*:count"], - dimensions=["status"], - limit=10, -) -result = engine.execute(query=query) - -for row in result.data: - print(row) -# {"orders.status": "completed", "orders._count": 42} -# {"orders.status": "pending", "orders._count": 15} -``` - -## Runnable Examples - -The `examples/` directory has ready-to-run setups with sample data: - -| Example | Database | How to run | -|---------|----------|------------| -| [embedded](https://github.com/MotleyAI/slayer/tree/main/examples/embedded) | SQLite | `python examples/embedded/run.py` | -| [postgres](https://github.com/MotleyAI/slayer/tree/main/examples/postgres) | Postgres | `cd examples/postgres && docker compose up -d` | -| [mysql](https://github.com/MotleyAI/slayer/tree/main/examples/mysql) | MySQL | `cd examples/mysql && docker compose up -d` | -| [clickhouse](https://github.com/MotleyAI/slayer/tree/main/examples/clickhouse) | ClickHouse | `cd examples/clickhouse && docker compose up -d` | - -Each includes a `verify.py` script that runs assertions against the seeded data. - -## What's Next - -- [Terminology](concepts/terminology.md) — key terms and concepts -- [Models](concepts/models.md) — define custom dimensions and measures -- [Queries](concepts/queries.md) — query structure and parameters -- [Formulas](concepts/formulas.md) — field and filter formula reference -- [Auto-Ingestion](concepts/ingestion.md) — how rollup joins work -- [MCP Server](interfaces/mcp.md) — MCP tools reference and agent workflows -- [REST API](interfaces/rest-api.md) — HTTP endpoints with curl examples -- [Python Client](interfaces/python-client.md) — SDK for remote and local mode -- [CLI](interfaces/cli.md) — all CLI commands and flags -- [Datasources](configuration/datasources.md) — connection config, env vars, supported databases diff --git a/docs/getting-started/cli.md b/docs/getting-started/cli.md index f3094d3..066399c 100644 --- a/docs/getting-started/cli.md +++ b/docs/getting-started/cli.md @@ -73,7 +73,7 @@ slayer ingest --datasource my_pg --exclude migrations,django_session ```bash # Count orders by status -slayer query '{"source_model": "orders", "fields": [{"formula": "count"}], "dimensions": ["status"]}' +slayer query '{"source_model": "orders", "fields": ["*:count"], "dimensions": ["status"]}' # From a file slayer query @query.json @@ -101,7 +101,7 @@ slayer datasources list After install + ingest, this should return data: ```bash -slayer query '{"source_model": "orders", "fields": [{"formula": "count"}]}' +slayer query '{"source_model": "orders", "fields": ["*:count"]}' ``` Expected output: diff --git a/docs/getting-started/mcp.md b/docs/getting-started/mcp.md index c29eafd..1d6154e 100644 --- a/docs/getting-started/mcp.md +++ b/docs/getting-started/mcp.md @@ -51,7 +51,7 @@ Once the agent is connected, it handles everything conversationally. A typical e > > **You:** How many orders per status? > -> **Agent:** *calls `query(source_model="orders", fields=[{"formula": "count"}], dimensions=["status"])`* +> **Agent:** *calls `query(source_model="orders", fields=["*:count"], dimensions=["status"])`* The agent uses these MCP tools in order: diff --git a/docs/getting-started/python.md b/docs/getting-started/python.md index 0ce44c1..864e759 100644 --- a/docs/getting-started/python.md +++ b/docs/getting-started/python.md @@ -56,8 +56,8 @@ engine = SlayerQueryEngine(storage=storage) result = engine.execute(query=SlayerQuery( source_model="orders", - fields=[{"formula": "count"}, {"formula": "revenue_sum"}], - dimensions=[{"name": "status"}], + fields=["*:count", "revenue:sum"], + dimensions=["status"], )) for row in result.data: @@ -89,8 +89,8 @@ client = SlayerClient(url="http://localhost:5143") # Query — returns SlayerResponse (same as embedded mode) result = client.query(SlayerQuery( source_model="orders", - fields=[{"formula": "count"}], - dimensions=[{"name": "status"}], + fields=["*:count"], + dimensions=["status"], )) print(result.data) ``` @@ -101,8 +101,8 @@ print(result.data) # With pandas (requires motley-slayer[client] extra) df = client.query_df(SlayerQuery( source_model="orders", - fields=[{"formula": "count"}, {"formula": "revenue_sum"}], - dimensions=[{"name": "status"}], + fields=["*:count", "revenue:sum"], + dimensions=["status"], )) print(df) ``` @@ -140,7 +140,7 @@ engine = SlayerQueryEngine(storage=storage) print(storage.list_models()) # Should return data -result = engine.execute(query={"source_model": "orders", "fields": [{"formula": "count"}]}) +result = engine.execute(query={"source_model": "orders", "fields": ["*:count"]}) print(f"{result.row_count} row(s), columns: {result.columns}") ``` diff --git a/docs/getting-started/rest-api.md b/docs/getting-started/rest-api.md index dc8f14d..3a59381 100644 --- a/docs/getting-started/rest-api.md +++ b/docs/getting-started/rest-api.md @@ -60,8 +60,8 @@ curl -X POST http://localhost:5143/query \ -H "Content-Type: application/json" \ -d '{ "source_model": "orders", - "fields": [{"formula": "count"}], - "dimensions": [{"name": "status"}] + "fields": ["*:count"], + "dimensions": ["status"] }' ``` @@ -86,8 +86,8 @@ curl -X POST http://localhost:5143/query \ -H "Content-Type: application/json" \ -d '{ "source_model": "orders", - "fields": [{"formula": "revenue_sum"}], - "time_dimensions": [{"dimension": {"name": "created_at"}, "granularity": "month", "date_range": ["2024-01-01", "2024-12-31"]}] + "fields": ["revenue:sum"], + "time_dimensions": [{"dimension": "created_at", "granularity": "month", "date_range": ["2024-01-01", "2024-12-31"]}] }' # Top 5 customers @@ -95,9 +95,9 @@ curl -X POST http://localhost:5143/query \ -H "Content-Type: application/json" \ -d '{ "source_model": "orders", - "fields": [{"formula": "revenue_sum"}], - "dimensions": [{"name": "customers.name"}], - "order": [{"column": {"name": "revenue_sum"}, "direction": "desc"}], + "fields": ["revenue:sum"], + "dimensions": ["customers.name"], + "order": [{"column": "revenue:sum", "direction": "desc"}], "limit": 5 }' @@ -132,8 +132,8 @@ const res = await fetch("http://localhost:5143/query", { headers: {"Content-Type": "application/json"}, body: JSON.stringify({ source_model: "orders", - fields: [{formula: "count"}], - dimensions: [{name: "status"}], + fields: ["*:count"], + dimensions: ["status"], }), }); const {data} = await res.json(); @@ -141,7 +141,7 @@ const {data} = await res.json(); **Go:** ```go -body := `{"source_model": "orders", "fields": [{"formula": "count"}]}` +body := `{"source_model": "orders", "fields": ["*:count"]}` resp, _ := http.Post("http://localhost:5143/query", "application/json", strings.NewReader(body)) ```