EVA (Exploratory Visual Analyzer) is an AI-powered Data Science Assistant that automates data science workflows. This documentation covers the complete API for developers who want to integrate EVA into their applications or extend its functionality.
from eva.orchestrator import AnalysisOrchestrator
from eva.models.core import AnalysisContext, AnalysisConfig
from eva.agents.csv_ingestor import CSVIngestorAgent
from eva.agents.eda_generator import EDAGeneratorAgent
from eva.agents.visualizer import VisualizerAgent
# Create orchestrator
orchestrator = AnalysisOrchestrator(max_workers=3)
# Configure analysis
config = AnalysisConfig(
processing_timeout_minutes=5,
enable_ai_suggestions=True
)
# Create context
context = AnalysisContext(
session_id="my_analysis",
config=config
)
# Set up file path
context.metadata = {
'file_path': 'data.csv',
'file_size_bytes': 1024000
}
# Create agents
agents = [
CSVIngestorAgent(),
EDAGeneratorAgent(),
VisualizerAgent()
]
# Execute analysis
results = orchestrator.execute_pipeline(agents, context)
# Access results
if results["CSVIngestorAgent"].success:
print(f"Loaded dataset with shape: {context.dataset.shape}")
if results["EDAGeneratorAgent"].success:
stats = results["EDAGeneratorAgent"].data['statistics']
print(f"Generated statistics for {len(stats)} columns")# Basic analysis
python -m eva.cli analyze data.csv
# With specific output directory
python -m eva.cli analyze data.csv --output ./results
# Enable AI suggestions
python -m eva.cli analyze data.csv --enable-ai
# Export to notebook
python -m eva.cli analyze data.csv --export-notebookThe main orchestrator that manages the execution of analysis agents.
class AnalysisOrchestrator:
"""
Orchestrates the execution of analysis agents in a pipeline.
Handles dependency management, parallel execution, error handling,
and resource monitoring.
"""
def __init__(self, max_workers: int = 3, system_limits: SystemLimits = None):
"""
Initialize the orchestrator.
Args:
max_workers: Maximum number of parallel workers
system_limits: System resource limits for monitoring
"""
def execute_pipeline(self, agents: List[BaseAgent],
context: AnalysisContext) -> Dict[str, AgentResult]:
"""
Execute a pipeline of agents.
Args:
agents: List of agents to execute
context: Analysis context containing data and configuration
Returns:
Dictionary mapping agent names to their results
"""Shared context object that passes data between agents.
@dataclass
class AnalysisContext:
"""
Shared context for analysis pipeline execution.
Contains the dataset, metadata, configuration, and results
that are passed between agents during execution.
"""
dataset: Optional[pd.DataFrame] = None
metadata: Optional[Dict[str, Any]] = None
results: Dict[str, Any] = field(default_factory=dict)
config: AnalysisConfig = field(default_factory=AnalysisConfig)
session_id: str = ""EVA uses an agent-based architecture where each agent specializes in a specific aspect of data analysis.
All agents inherit from EVABaseAgent:
class EVABaseAgent:
"""Base class for all EVA agents"""
def execute(self, context: AnalysisContext) -> AgentResult:
"""Execute the agent's main functionality"""
raise NotImplementedError
def validate_input(self, context: AnalysisContext) -> bool:
"""Validate that the context contains required data"""
raise NotImplementedError
def get_dependencies(self) -> List[str]:
"""Return list of agent names this agent depends on"""
return []Handles CSV file loading, validation, and initial data processing.
from eva.agents.csv_ingestor import CSVIngestorAgent
agent = CSVIngestorAgent()
# The agent expects context.metadata['file_path'] to be set
context.metadata = {'file_path': 'data.csv'}
result = agent.execute(context)
if result.success:
# Dataset is now available in context.dataset
print(f"Loaded {len(context.dataset)} rows")
# Access detailed results
column_info = result.data['column_info']
validation_report = result.data['validation_report']Performs exploratory data analysis including statistics, correlations, and data quality assessment.
from eva.agents.eda_generator import EDAGeneratorAgent
agent = EDAGeneratorAgent()
result = agent.execute(context) # Requires dataset in context
if result.success:
# Access EDA results
statistics = result.data['statistics']
correlations = result.data['correlations']
missing_values = result.data['missing_values']
outliers = result.data['outliers']
quality_score = result.data['data_quality_score']Creates comprehensive visualizations for the dataset.
from eva.agents.visualizer import VisualizerAgent
agent = VisualizerAgent()
result = agent.execute(context)
if result.success:
plots = result.data['plots']
interactive_plots = result.data['interactive_plots']
# Access individual plots
for plot_name, plot_info in plots.items():
print(f"Generated {plot_info['plot_type']}: {plot_info['file_path']}")Provides AI-powered insights and data cleaning suggestions.
from eva.agents.insight_suggester import InsightSuggesterAgent
agent = InsightSuggesterAgent()
result = agent.execute(context)
if result.success:
suggestions = result.data['suggestions']
explanations = result.data['explanations']
for suggestion in suggestions:
print(f"Suggestion: {suggestion['description']}")
print(f"Priority: {suggestion['priority']}")Recommends machine learning models and creates baseline pipelines.
from eva.agents.model_recommender import ModelRecommenderAgent
agent = ModelRecommenderAgent()
result = agent.execute(context)
if result.success:
recommendations = result.data['recommendations']
problem_type = result.data['problem_type']
baselines = result.data['baselines']
print(f"Problem type: {problem_type}")
for model in recommendations:
print(f"Recommended: {model['name']} (score: {model['score']})")Exports analysis results to Jupyter notebooks and Python scripts.
from eva.agents.notebook_exporter import NotebookExporterAgent
agent = NotebookExporterAgent()
result = agent.execute(context)
if result.success:
notebook_path = result.data['notebook_path']
script_path = result.data['script_path']
print(f"Notebook exported to: {notebook_path}")
print(f"Script exported to: {script_path}")from eva.models.core import AnalysisConfig, DatasetMetadata, AgentResult
# Analysis configuration
config = AnalysisConfig(
max_file_size_mb=100,
processing_timeout_minutes=10,
memory_limit_gb=2,
enable_ai_suggestions=True,
export_formats=['ipynb', 'py'],
visualization_formats=['png', 'html']
)
# Dataset metadata
metadata = DatasetMetadata(
filename="data.csv",
shape=(1000, 10),
column_types={'col1': 'int64', 'col2': 'float64'},
encoding="utf-8",
upload_timestamp=datetime.now(),
file_size_bytes=50000
)
# Agent result
result = AgentResult(
success=True,
data={'key': 'value'},
metadata={'execution_info': 'details'},
errors=[],
warnings=[],
execution_time=1.5,
agent_name="ExampleAgent"
)from eva.models.results import (
CSVIngestorResult, EDAResult, VisualizationResult,
InsightResult, ModelResult, ExportResult
)
# Specialized result types provide structured access to agent outputs
csv_result = CSVIngestorResult(
success=True,
dataframe=df,
column_info={'col1': {'type': 'int64', 'non_null_count': 1000}},
validation_report={'is_valid': True, 'issues': []},
preview_data={'head': df.head().to_dict()},
execution_time=0.5,
agent_name="CSVIngestorAgent"
)from eva.services.ai_service import AIService
from eva.services.ai_prompt_engine import AIPromptEngine
# Initialize AI service
ai_service = AIService(
provider="openai", # or "gemini"
api_key="your-api-key",
model="gpt-4"
)
# Create prompt engine
prompt_engine = AIPromptEngine(ai_service)
# Generate insights
insights = prompt_engine.generate_insights(
dataset_summary=summary,
data_quality_issues=issues
)from eva.services.data_quality import DataQualityAssessor
assessor = DataQualityAssessor()
quality_report = assessor.assess_quality(dataframe)
print(f"Overall quality score: {quality_report.overall_score}")
print(f"Issues found: {len(quality_report.issues)}")from eva.services.visualization import VisualizationEngine
viz_engine = VisualizationEngine()
# Generate automatic visualizations
plots = viz_engine.generate_plots(
dataframe=df,
plot_types=['histogram', 'correlation', 'scatter'],
output_format='both' # PNG and HTML
)from eva.utils.logging import EVALogger
logger = EVALogger(session_id="my_session")
# Log events
logger.log_event("agent_started", "CSVIngestorAgent", {"file": "data.csv"})
logger.log_performance("CSVIngestorAgent", execution_time=1.2, memory_usage=50)
# Get execution summary
summary = logger.get_execution_summary()from eva.utils.error_handling import ErrorHandler, ErrorClassifier
error_handler = ErrorHandler()
try:
# Some operation
pass
except Exception as e:
# Classify and handle error
error_type = ErrorClassifier.classify_error(e)
recovery_action = error_handler.get_recovery_action(error_type)
if recovery_action:
recovery_action.execute()from eva.utils.system_monitor import SystemMonitor, SystemLimits
# Set up monitoring
limits = SystemLimits(
max_memory_percent=80.0,
max_cpu_percent=90.0
)
monitor = SystemMonitor(limits)
monitor.start()
# Check system health
health_status = monitor.get_system_status()
print(f"System health: {health_status['overall_health']}")
monitor.stop()See the examples directory for complete working examples:
- Always validate input: Check that required data is present in context
- Handle errors gracefully: Return meaningful error messages
- Use structured results: Return data in consistent, documented formats
- Log important events: Use EVALogger for debugging and monitoring
- Respect dependencies: Declare dependencies accurately
- Use parallel execution: Configure appropriate number of workers
- Monitor resources: Set up system monitoring for production use
- Optimize data loading: Use chunked processing for large files
- Cache expensive operations: Implement caching where appropriate
- Classify errors properly: Use error classification for appropriate responses
- Provide recovery options: Implement recovery mechanisms where possible
- Log errors with context: Include relevant context in error logs
- Fail gracefully: Ensure partial failures don't crash the entire pipeline
- Use environment-specific configs: Different settings for dev/prod
- Validate configurations: Check configuration validity at startup
- Document configuration options: Provide clear documentation
- Use sensible defaults: Ensure system works with minimal configuration
For detailed API reference documentation, see:
See CONTRIBUTING.md for guidelines on extending EVA's functionality.