Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion adtech_series_sp26/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,13 @@ adtech_series_sp26/
│ │ ├── src/ # FastAPI backend
│ │ └── frontend/ # React + Tailwind dashboard
│ └── scripts/deploy.sh
├── segment_builder/ # Session 1 — Audience Segmentation app (FastAPI + React)
│ ├── app.yaml # Databricks App config
│ ├── backend/ # FastAPI: routers, services, models, config
│ ├── frontend/ # React + Vite + Tailwind (Agent + Builder modes)
│ ├── docs/DATA_DICTIONARY.md
│ └── tests/ # Playwright app spec
├── identity_graph/ # (Session 2 — placeholder)
├── segment_builder/ # (Session 1 — placeholder)
└── measurement/ # (Session 4 — placeholder)
```

Expand Down
19 changes: 19 additions & 0 deletions adtech_series_sp26/segment_builder/.env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Copy to .env and fill in values. See CLAUDE.md for details.
# Optional when running in Databricks Apps (auth is automatic).
# Required for local dev unless using a Databricks CLI profile.

# --- Databricks SQL (preview, build segment) ---
# Workspace host, e.g. https://your-workspace.cloud.databricks.com
DATABRICKS_SERVER_HOSTNAME=
# SQL warehouse HTTP path, e.g. /sql/1.0/warehouses/<id>
DATABRICKS_HTTP_PATH=
# Personal or service principal token (dapi...). Omit if using profile.
DATABRICKS_TOKEN=

# --- Local dev: CLI profile (alternative to host/path/token) ---
# Profile name in ~/.databrickscfg. Used when host/path/token are not set.
DATABRICKS_CONFIG_PROFILE=e2-demo-field-eng

# --- Databricks Model Serving (Agent mode) ---
# Model endpoint name for natural-language segment parsing.
DATABRICKS_MODEL_ENDPOINT=databricks-claude-sonnet-4-5
58 changes: 58 additions & 0 deletions adtech_series_sp26/segment_builder/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# Dependencies
node_modules/
.venv/
__pycache__/
*.pyc
*.pyo
*.pyd

# Claude Code
.claude/
.databricks/

# Environment files
.env
.env.local
.env.development.local
.env.test.local
.env.production.local
.env.development
.env.test
.env.production

# Build outputs
dist/
build/

# Playwright
playwright-report/
test-results/
playwright/.cache/

# IDE
.idea/
.vscode/
*.swp
*.swo

# OS
.DS_Store
Thumbs.db

# Logs
*.log
npm-debug.log*
yarn-debug.log*
yarn-error.log*

# Dev docs (local only)
dev/

# Screenshots folder (development artifacts)
screenshots/

# Legacy scripts
explore_data.py

# Bun lock file (Databricks Apps uses npm)
bun.lock
61 changes: 61 additions & 0 deletions adtech_series_sp26/segment_builder/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# Audience Segmentation Databricks App

A Databricks App that enables advertisers to create audience segments without writing SQL. Built with React frontend and FastAPI backend.

## Features

**Agent Mode** - Conversational segment building powered by LLM
- Natural language input: "Dog owners in California aged 25-54"
- Automatic conversion to SQL queries
- Real-time preview with audience counts

**Builder Mode** - Visual no-code query builder
- Drag-and-drop condition builder
- Support for AND/OR logic with nested groups
- Multi-select dropdowns for categorical values

## Quick Start

```bash
# Install dependencies
bun install
uv venv && uv pip install -r requirements.txt

# Build frontend
bun run build

# Run backend
source .venv/bin/activate && uvicorn backend.main:app --reload
```

See [CLAUDE.md](CLAUDE.md) for comprehensive documentation including architecture, API endpoints, deployment, and troubleshooting.

## Architecture

```
frontend/ React + TypeScript + Tailwind CSS
src/features/segment-builder/
components/ UI components
hooks/ Custom React hooks
api/ API client functions

backend/ FastAPI + Python
routers/ API endpoints
services/ Business logic (SQL generation, LLM integration)
config/ Feature metadata
```

## Live Demo

https://dq-adtech-1444828305810485.aws.databricksapps.com/

## Deployment

```bash
databricks apps deploy dq-adtech
```

## Data

- **Input**: Unity Catalog table with 3.4M audience profiles
- **Output**: Segments saved to Unity Catalog for activation
1 change: 1 addition & 0 deletions adtech_series_sp26/segment_builder/app.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
command: ["uvicorn", "backend.main:app", "--host", "0.0.0.0", "--port", "8000"]
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# Config module
120 changes: 120 additions & 0 deletions adtech_series_sp26/segment_builder/backend/config/column_overrides.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
"""In-memory overrides for column names and UI labels. Used by Settings API."""

import re
from typing import Any

# Defaults matching current schema
DEFAULT_PROFILE = {
"features_layout": "by_column",
"identity_household_column": "megacorp_hhid",
"identity_individual_column": "megacorp_indid",
}
DEFAULT_SEGMENT_LIST = {
"identity_household_column": "megacorp_hhid",
"identity_individual_column": "megacorp_indid",
"segment_name_column": "campaign_name",
}
DEFAULT_SEGMENT_INFO_LABELS: dict[str, str] = {
"segment_name": "Segment Name",
"segment_definition": "Segment Definition",
"quarter": "Quarter",
"start_date": "Flight Start Date",
"end_date": "Flight End Date",
"megacorp_indid": "Individual Identifier",
"megacorp_hhid": "Household Identifier",
}

_overrides: dict[str, Any] = {}


def _safe_column(name: str) -> str:
"""Allow only alphanumeric and underscore for column names."""
if not name or not re.match(r"^[a-zA-Z_][a-zA-Z0-9_]*$", name):
return ""
return name


def set_column_config(
*,
profile: dict[str, Any] | None = None,
segment_list: dict[str, Any] | None = None,
segment_info_labels: dict[str, str] | None = None,
) -> None:
"""Replace or merge column config. Pass full dict per section to replace."""
if "column_configs" not in _overrides:
_overrides["column_configs"] = {
"profile": dict(DEFAULT_PROFILE),
"segment_list": dict(DEFAULT_SEGMENT_LIST),
"segment_info_labels": dict(DEFAULT_SEGMENT_INFO_LABELS),
}
cfg = _overrides["column_configs"]
if profile is not None:
cfg["profile"] = {**DEFAULT_PROFILE, **profile}
cfg["profile"]["identity_household_column"] = _safe_column(
cfg["profile"].get("identity_household_column", "")
) or DEFAULT_PROFILE["identity_household_column"]
cfg["profile"]["identity_individual_column"] = _safe_column(
cfg["profile"].get("identity_individual_column", "")
) or DEFAULT_PROFILE["identity_individual_column"]
if cfg["profile"].get("features_layout") not in ("by_column", "by_row"):
cfg["profile"]["features_layout"] = DEFAULT_PROFILE["features_layout"]
if segment_list is not None:
cfg["segment_list"] = {**DEFAULT_SEGMENT_LIST, **segment_list}
cfg["segment_list"]["identity_household_column"] = _safe_column(
cfg["segment_list"].get("identity_household_column", "")
) or DEFAULT_SEGMENT_LIST["identity_household_column"]
cfg["segment_list"]["identity_individual_column"] = _safe_column(
cfg["segment_list"].get("identity_individual_column", "")
) or DEFAULT_SEGMENT_LIST["identity_individual_column"]
cfg["segment_list"]["segment_name_column"] = _safe_column(
cfg["segment_list"].get("segment_name_column", "")
) or DEFAULT_SEGMENT_LIST["segment_name_column"]
if segment_info_labels is not None:
cfg["segment_info_labels"] = {**DEFAULT_SEGMENT_INFO_LABELS, **segment_info_labels}


def get_column_configs() -> dict[str, Any]:
"""Return full column config (for API)."""
if "column_configs" not in _overrides:
_overrides["column_configs"] = {
"profile": dict(DEFAULT_PROFILE),
"segment_list": dict(DEFAULT_SEGMENT_LIST),
"segment_info_labels": dict(DEFAULT_SEGMENT_INFO_LABELS),
}
return _overrides["column_configs"]


def get_profile_identity_household_column() -> str:
return get_column_configs()["profile"].get(
"identity_household_column", DEFAULT_PROFILE["identity_household_column"]
)


def get_profile_identity_individual_column() -> str:
return get_column_configs()["profile"].get(
"identity_individual_column", DEFAULT_PROFILE["identity_individual_column"]
)


def get_campaigns_identity_household_column() -> str:
return get_column_configs()["segment_list"].get(
"identity_household_column", DEFAULT_SEGMENT_LIST["identity_household_column"]
)


def get_campaigns_identity_individual_column() -> str:
return get_column_configs()["segment_list"].get(
"identity_individual_column", DEFAULT_SEGMENT_LIST["identity_individual_column"]
)


def get_campaigns_segment_name_column() -> str:
return get_column_configs()["segment_list"].get(
"segment_name_column", DEFAULT_SEGMENT_LIST["segment_name_column"]
)


def get_segment_info_column_labels() -> dict[str, str]:
return dict(
get_column_configs().get("segment_info_labels", DEFAULT_SEGMENT_INFO_LABELS)
)
20 changes: 20 additions & 0 deletions adtech_series_sp26/segment_builder/backend/config/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
"""Shared constants for API responses."""

# Generic message returned for 500 errors (avoid leaking internal details to clients)
GENERIC_ERROR_MESSAGE = "An error occurred. Please try again."


def get_databricks_forbidden_message(
catalog: str | None = None,
profiles_schema: str | None = None,
segments_schema: str | None = None,
) -> str:
"""Build 403 message using current table settings. Uses placeholder names if not set."""
c = catalog or "catalog"
p = profiles_schema or "profiles"
s = segments_schema or "segments"
return (
f"Databricks access denied (403). In Databricks Apps the app runs as a service principal. "
f"Grant that identity: USE_CATALOG on {c}; USE_SCHEMA and SELECT on {c}.{p}; "
f"USE_SCHEMA, SELECT, and MODIFY on {c}.{s}. Get the app's client ID with: databricks apps get <app-name>"
)
Loading