From fdb592aba87d7ecd4032a80d7ed74eff2069a3a4 Mon Sep 17 00:00:00 2001 From: Norman Banick Date: Mon, 26 May 2025 17:03:05 +0000 Subject: [PATCH] Rest of the refactoring --- .gitignore | 7 +++- backend/src/api/health.py | 1 - backend/src/api/router.py | 3 +- backend/src/api/translate.py | 7 ++-- backend/src/api/validate.py | 6 +-- backend/src/api/vdb_list.py | 45 ++++++++++++++++++++++ backend/src/config.py | 2 +- backend/src/schemas/common.py | 4 ++ backend/src/utils/ai_analyzer.py | 62 ++++++++++++++++++------------ backend/src/utils/denodo_client.py | 1 - 10 files changed, 102 insertions(+), 36 deletions(-) create mode 100644 backend/src/api/vdb_list.py diff --git a/.gitignore b/.gitignore index e818757..7c79c7c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,7 @@ -backend/test.py .env +backend/__pycache__/* +backend/src/__pycache__/* +backend/src/api/__pycache__/* +backend/src/db/__pycache__/* +backend/src/schemas/__pycache__/* +backend/src/utils/__pycache__/* \ No newline at end of file diff --git a/backend/src/api/health.py b/backend/src/api/health.py index 1e41874..4b6f12b 100644 --- a/backend/src/api/health.py +++ b/backend/src/api/health.py @@ -1,4 +1,3 @@ -# src/api/endpoints/health.py from fastapi import APIRouter, status from src.schemas.common import HealthCheck diff --git a/backend/src/api/router.py b/backend/src/api/router.py index 51d61c5..c5c6e61 100644 --- a/backend/src/api/router.py +++ b/backend/src/api/router.py @@ -1,7 +1,8 @@ from fastapi import APIRouter -from src.api import health, translate, validate +from src.api import health, translate, validate, vdb_list api_router = APIRouter() api_router.include_router(health.router) # /health api_router.include_router(translate.router) # /translate api_router.include_router(validate.router) # /validate +api_router.include_router(vdb_list.router) # /vdb_list diff --git a/backend/src/api/translate.py b/backend/src/api/translate.py index f3e20a0..afb96ac 100644 --- a/backend/src/api/translate.py +++ b/backend/src/api/translate.py @@ -1,4 +1,3 @@ -# src/api/endpoints/translate.py import logging import sqlglot from sqlglot import parse_one @@ -7,7 +6,8 @@ from src.schemas.translation import SqlQueryRequest, TranslateApiResponse from src.utils.ai_analyzer import analyze_sql_translation_error -from src.utils.vdb_transformer import transform_vdb_table_qualification # Updated import +from src.utils.vdb_transformer import transform_vdb_table_qualification +from src.schemas.translation import TranslationError logger = logging.getLogger(__name__) router = APIRouter() @@ -24,7 +24,6 @@ def translate_sql_to_vql(request: SqlQueryRequest) -> TranslateApiResponse: if not dialect: raise HTTPException(status_code=400, detail="Missing 'dialect' in request body") - logger.info(f"SQLGlot version: {sqlglot.__version__}") logger.debug(f"Translation request: dialect='{dialect}', vdb='{vdb}', SQL='{source_sql[:100]}...'") try: @@ -39,7 +38,7 @@ def translate_sql_to_vql(request: SqlQueryRequest) -> TranslateApiResponse: except ParseError as pe: logger.warning(f"SQL Parsing Error during translation: {pe}", exc_info=True) try: - ai_analysis_result = analyze_sql_translation_error(str(pe), source_sql) + ai_analysis_result: TranslationError = analyze_sql_translation_error(str(pe), source_sql) return TranslateApiResponse(error_analysis=ai_analysis_result) except HTTPException as http_exc: # AI service's own HTTPExceptions (e.g. API key) raise http_exc diff --git a/backend/src/api/validate.py b/backend/src/api/validate.py index a39448b..12d2820 100644 --- a/backend/src/api/validate.py +++ b/backend/src/api/validate.py @@ -1,4 +1,3 @@ -# src/api/endpoints/validate.py import logging from fastapi import APIRouter, HTTPException from sqlalchemy import text @@ -6,7 +5,8 @@ from src.schemas.validation import VqlValidateRequest, VqlValidationApiResponse from src.utils.ai_analyzer import analyze_vql_validation_error -from src.db.session import get_engine # Use the centralized engine +from src.db.session import get_engine +from src.schemas.validation import ValidationError # Use the centralized engine logger = logging.getLogger(__name__) router = APIRouter() @@ -40,7 +40,7 @@ def validate_vql_query_endpoint(request: VqlValidateRequest) -> VqlValidationApi db_error_message = str(getattr(e, "orig", e)) # Get specific DB error logger.warning(f"Denodo VQL validation failed: {db_error_message}") try: - ai_analysis_result = analyze_vql_validation_error(db_error_message, request.vql) + ai_analysis_result: ValidationError = analyze_vql_validation_error(db_error_message, request.vql) return VqlValidationApiResponse( validated=False, error_analysis=ai_analysis_result ) diff --git a/backend/src/api/vdb_list.py b/backend/src/api/vdb_list.py new file mode 100644 index 0000000..38245fa --- /dev/null +++ b/backend/src/api/vdb_list.py @@ -0,0 +1,45 @@ +import logging +from fastapi import APIRouter, HTTPException +from src.schemas.common import VDBResponse +from src.config import settings +import os +import yaml +from typing import List, Dict, Any +router = APIRouter() + + +def load_config_values() -> dict[list[str]]: + logging.info(f"Loading configuration from {settings.APP_VDB_CONF}") + with open(settings.APP_VDB_CONF, 'r') as f: + raw_config = yaml.safe_load(f) # Use safe_load for security + return raw_config + + +def transform_values(string_list: List[str]) -> List[Dict[str, str]]: + """Transforms a list of strings into a list of {'value': string, 'label': string}.""" + return [{"value": item, "label": item} for item in string_list] + + +@router.get("/vdbs", response_model=VDBResponse, tags=["VQL Forge"]) +async def get_vdb_list() -> VDBResponse: + """ + Retrieves a list of VDBs from the configuration file. + """ + if not settings.APP_VDB_CONF: + logging.error("No VDB CONFIG FILE") + raise HTTPException( + status_code=500, detail="VDB service error: config missing." + ) + logging.info( + f"Request received for /vdbs. Using config file: {os.path.abspath(settings.APP_VDB_CONF)}") + config = load_config_values() # Your config loading function + + if config['vdbs'] is None: + logging.warning("'vdbs' list empty in configuration. Returning empty list.") + return VDBResponse(results=[]) + + try: + return VDBResponse(results=transform_values(config['vdbs'])) + except Exception as e: + logging.error(f"Error creating VDBResponse: {e}. Data was: {config}", exc_info=True) + raise HTTPException(status_code=500, detail="Error processing VDB list.") diff --git a/backend/src/config.py b/backend/src/config.py index 4f3c001..81d79f5 100644 --- a/backend/src/config.py +++ b/backend/src/config.py @@ -14,7 +14,7 @@ class Settings(BaseSettings): GEMINI_API_KEY: str DATABASE_URL: str | None = None # Will be constructed - + APP_VDB_CONF: str model_config = SettingsConfigDict(env_file=".env", extra="ignore") def __init__(self, **values): diff --git a/backend/src/schemas/common.py b/backend/src/schemas/common.py index 940813f..95d6a89 100644 --- a/backend/src/schemas/common.py +++ b/backend/src/schemas/common.py @@ -14,3 +14,7 @@ class QueryResponse(BaseModel): results: List[Dict[str, Any]] parsed_ast: str | None = None message: str | None = None + + +class VDBResponse(BaseModel): + results: List[Dict[str, str]] diff --git a/backend/src/utils/ai_analyzer.py b/backend/src/utils/ai_analyzer.py index ee4828c..141cbfb 100644 --- a/backend/src/utils/ai_analyzer.py +++ b/backend/src/utils/ai_analyzer.py @@ -1,8 +1,7 @@ -# src/services/ai_analyzer.py import logging from typing import Type from fastapi import HTTPException -from pydantic_ai import Agent, RunContext +from pydantic_ai import Agent, RunContext, Tool from src.config import settings from src.schemas.translation import TranslationError @@ -13,45 +12,59 @@ logger = logging.getLogger(__name__) -def _initialize_ai_agent(system_prompt: str, output_type: Type) -> Agent: +def _initialize_ai_agent(system_prompt: str, output_type: Type, tools: list[Tool] = []) -> Agent: if not settings.GEMINI_API_KEY: logger.error("GEMINI_API_KEY environment variable not set.") raise HTTPException( status_code=500, detail="AI service configuration error: API key missing." ) + + print(tools) return Agent( # Consider making model name a config variable "gemini-2.5-flash-preview-04-17", # "gemini-1.5-flash-latest" might be more current system_prompt=system_prompt, output_type=output_type, + tools=tools # llm_kwargs={"api_key": settings.GEMINI_API_KEY} # pydantic-ai typically handles GOOGLE_API_KEY env var directly ) +def _get_functions() -> list[str]: + """Retrieves a list of available Denodo functions. Use this tool when an error indicates a function was not found or has incorrect arity.""" + logger.info("Executing _get_functions tool") + return get_denodo_functions_list() + + +def _get_views() -> list[str]: + """Retrieves a list of available Denodo views. Use this tool when an error suggests a table or view is missing or misspelled.""" + return get_available_views_from_denodo() + + +def _get_vdbs() -> list[str]: + """Retrieves a list of available Denodo Virtual DataBases (VDBs). Use this tool when an error refers to an invalid database name.""" + return get_vdb_names_list() + + +def _extract_tables(ctx: RunContext[str]) -> str: + """Get the player's name.""" + + return "bla" + + def analyze_vql_validation_error(error: str, input_vql: str) -> ValidationError: agent = _initialize_ai_agent( - "You are an SQL Validation assistant for Denodo VQL", ValidationError + "You are an SQL Validation assistant for Denodo VQL", ValidationError, tools=[ + Tool(_get_functions), Tool(_get_views), Tool(_get_vdbs), Tool(_extract_tables)] ) - @agent.tool - def get_views(ctx: RunContext[str]) -> list[str]: - # Potentially pass vdb context if available from the original request - # For now, calling without specific VDB context for views - return get_available_views_from_denodo() - - @agent.tool - def get_denodo_functions(ctx: RunContext[str]) -> list[str]: - return get_denodo_functions_list() - - @agent.tool - def get_vdbs(ctx: RunContext[str]) -> list[str]: - return get_vdb_names_list() - - prompt = f"""Analyze the Denodo VQL Validation error. Explain concisely why the `Input VQL` failed based on the `Error` and provide the corrected `Valid VQL Suggestion`. - Do not use ```sql markdown for the corrected VQL response. Do not explain what you are doing, just provide the explanation and the suggestion directly. - If the table/view is missing, use the get_views tool to determine which views are available and use the best guess in your suggestion. - If a function is not valid, use get_denodo_functions tool to check for available Denodo functions. - If a database name (VDB) is invalid, use get_vdbs tool to check for database names. Suggest one that is similar or advise the user to check. + prompt: str = f"""You are an expert Denodo VQL Assistant. Your primary goal is to analyze Denodo VQL validation errors, explain them concisely, and provide accurate, corrected VQL suggestions. + Explain concisely why the `Input VQL` failed based on the `Error` and provide the corrected `Valid VQL Suggestion`. + Do not explain what you are doing, just provide the explanation and the suggestion directly. + + If the table/view is missing, use the _get_views tool to determine which views are available and use the best guess in your suggestion. + If you get a 'Function with arity not found' exception, use _get_functions tool to check for available Denodo functions. + If a database name (VDB) is invalid, use _get_vdbs tool to check for database names. Suggest one that is similar or advise the user to check. **ERROR:** {error} **Input VQL:** @@ -81,7 +94,8 @@ def analyze_sql_translation_error(exception_message: str, input_sql: str) -> Tra ) # Add tools here if the translation assistant needs them (e.g., to understand target VQL features) - prompt = f"""Analyze the SQL parsing/translation error. Explain concisely why the `Input SQL` failed based on the `Error` and provide a corrected `Valid SQL Suggestion` that would be parsable by the original dialect or a hint for VQL. + prompt = f"""Analyze the SQL parsing/translation error. + Explain concisely why the `Input SQL` failed based on the `Error` and provide a corrected `Valid SQL Suggestion` that would be parsable by the original dialect or a hint for VQL. Do not use ```sql markdown for the corrected SQL response. Do not explain what you are doing, just provide the explanation and the suggestion directly. **ERROR:** {exception_message} diff --git a/backend/src/utils/denodo_client.py b/backend/src/utils/denodo_client.py index 14084e1..85d58b7 100644 --- a/backend/src/utils/denodo_client.py +++ b/backend/src/utils/denodo_client.py @@ -1,4 +1,3 @@ -# src/services/denodo_client.py import logging from fastapi import HTTPException from sqlalchemy import text