From 2f9bbcdc528e97858ca86c70b400c046a842f825 Mon Sep 17 00:00:00 2001 From: Rodney Kinney Date: Tue, 17 Mar 2026 17:21:38 -0700 Subject: [PATCH 1/2] Add analyze-data command --- src/asta/analyze_data/__init__.py | 5 +++++ src/asta/analyze_data/passthrough.py | 18 ++++++++++++++++++ src/asta/cli.py | 2 ++ src/asta/utils/asta.conf | 10 ++++++++++ 4 files changed, 35 insertions(+) create mode 100644 src/asta/analyze_data/__init__.py create mode 100644 src/asta/analyze_data/passthrough.py diff --git a/src/asta/analyze_data/__init__.py b/src/asta/analyze_data/__init__.py new file mode 100644 index 0000000..4da516c --- /dev/null +++ b/src/asta/analyze_data/__init__.py @@ -0,0 +1,5 @@ +"""Pass-through to DataVoyager (dv) CLI for data analysis""" + +from asta.analyze_data.passthrough import analyze_data + +__all__ = ["analyze_data"] diff --git a/src/asta/analyze_data/passthrough.py b/src/asta/analyze_data/passthrough.py new file mode 100644 index 0000000..d1bd010 --- /dev/null +++ b/src/asta/analyze_data/passthrough.py @@ -0,0 +1,18 @@ +"""Pass-through command for DataVoyager (dv) CLI""" + +from asta.utils.config import get_config +from asta.utils.passthrough import create_passthrough_command + +# Load configuration from asta.conf +config = get_config()["passthrough"]["analyze-data"] + +# Create the analyze-data passthrough command +analyze_data = create_passthrough_command( + tool_name=config["tool_name"], + install_type=config["install_type"], + install_source=config["install_source"], + minimum_version=config["minimum_version"], + command_name=config["command_name"], + friendly_name=config["friendly_name"], + docstring=config["docstring"], +) diff --git a/src/asta/cli.py b/src/asta/cli.py index a128aca..1b2e473 100644 --- a/src/asta/cli.py +++ b/src/asta/cli.py @@ -3,6 +3,7 @@ import click from asta import __version__ +from asta.analyze_data import analyze_data from asta.commands.auth import auth from asta.documents import documents from asta.experiment import experiment @@ -39,6 +40,7 @@ def papers(): cli.add_command(auth) # Register passthrough commands +cli.add_command(analyze_data) cli.add_command(documents) cli.add_command(experiment) diff --git a/src/asta/utils/asta.conf b/src/asta/utils/asta.conf index d04377b..8f91bb1 100644 --- a/src/asta/utils/asta.conf +++ b/src/asta/utils/asta.conf @@ -66,4 +66,14 @@ passthrough { friendly_name = "panda" docstring = "Run computational experiments" } + + analyze-data { + tool_name = "dv" + install_type = "local" + install_source = "/Users/rodneyk/workspace/dv-core-asta-integration" + minimum_version = "0.1.0" + command_name = "analyze-data" + friendly_name = "DataVoyager" + docstring = "Analyze data using DataVoyager AI agent" + } } From 1930b041739c19dd8f2ce00da20dd9a15cd35e07 Mon Sep 17 00:00:00 2001 From: Rodney Kinney Date: Wed, 18 Mar 2026 11:24:48 -0700 Subject: [PATCH 2/2] DV wip --- skills/analyze-data/SKILL.md | 98 ++++++++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) create mode 100644 skills/analyze-data/SKILL.md diff --git a/skills/analyze-data/SKILL.md b/skills/analyze-data/SKILL.md new file mode 100644 index 0000000..7017363 --- /dev/null +++ b/skills/analyze-data/SKILL.md @@ -0,0 +1,98 @@ +--- +name: Asta Data Analysis +description: Analyze data using DataVoyager AI agent. Use when the user asks to "analyze data", "explore dataset", "visualize data", "run data analysis", or needs help with data science tasks. +allowed-tools: + - Bash(asta analyze-data *) + - Bash(mkdir -p .asta/analyze-data/*) + - Read(.asta/analyze-data/*) + - TaskOutput +--- + +# Analyze Data with DataVoyager + +Analyze datasets using the DataVoyager AI agent. This skill provides an interactive AI-powered data analysis environment that can explore datasets, create visualizations, and perform statistical analysis. + +## Installation + +If `asta` command is not available, install it using `uv tool install git+ssh://git@github.com/allenai/asta-plugins.git` + +**Prerequisites:** Python 3.11+ and [uv package manager](https://docs.astral.sh/uv/) + +Verify installation with `asta analyze-data --help` + +## Workflow + +The user will typically: +1. Provide a dataset file path or ask to analyze data +2. Describe the analysis they want to perform +3. Request visualizations or statistical summaries + +### Default Output Locations + +**IMPORTANT**: Always specify output locations to keep analyses organized in `.asta/analyze-data/`: + +- **OUTPUTS_DIR**: `.asta/analyze-data//` where: + - `YYYY-MM-DD` is the current date + - `slug` is a short descriptive name derived from the analysis task (e.g., "sales-analysis", "customer-segmentation") + +**Example directory structure:** +``` +.asta/analyze-data/ +├── 2024-01-15-sales-analysis/ +│ ├── plots/ +│ └── [analysis outputs] +└── 2024-01-16-customer-segmentation/ + ├── plots/ + └── [analysis outputs] +``` + +### Running DataVoyager + +DataVoyager runs in interactive mode by default. The basic command is: + +```bash +# Run DataVoyager with default Docker backend (recommended) +asta analyze-data +``` + +**Backend Options:** +- `--backend docker` (default): Local Docker container for isolated execution +- `--backend modal`: Remote serverless execution + +**Configuration:** +- `--config path/to/config.yaml`: Use custom configuration +- `--log-level INFO`: Set logging level (DEBUG, INFO, WARNING, ERROR) + +### Example Usage + +**Basic interactive analysis:** +```bash +# Start DataVoyager in interactive mode +asta analyze-data + +# With specific backend +asta analyze-data --backend docker + +# With custom config +asta analyze-data --config .asta/analyze-data/config.yaml +``` + +**Creating organized output directories:** +```bash +# Create output directory with date and slug +OUTPUTS_DIR=".asta/analyze-data/$(date +%Y-%m-%d)-sales-analysis" +mkdir -p "$OUTPUTS_DIR" + +# Run DataVoyager (outputs will be saved by the agent) +cd "$OUTPUTS_DIR" +asta analyze-data +``` + +### Notes + +- **Output Directory**: Create `.asta/analyze-data//` directory before running analysis +- **Task Slug**: Create a short descriptive slug from the analysis task (e.g., "sales-analysis", "data-exploration"). Keep it lowercase with hyphens. +- **Docker Backend**: Recommended for safe, isolated code execution. Requires Docker to be installed and running. +- **Modal Backend**: Serverless execution option for remote computation +- **Interactive Mode**: The agent will prompt you for dataset paths and analysis instructions +- Always inform the user where outputs were saved after analysis completes