From 69bf431a3db3e6f45d10c3722df6bb9c537433e5 Mon Sep 17 00:00:00 2001 From: Stav Ponte Date: Wed, 17 Jun 2026 02:01:50 +0300 Subject: [PATCH] feat: implement feature flags UI overrides builder and admin attribution --- agent/.coverage | Bin 0 -> 53248 bytes agent/src/agent/config.py | 4 + agent/src/agent/graph.py | 7 +- agent/src/agent/llm.py | 65 +- agent/src/agent/mcp_server.py | 18 +- agent/src/agent/nodes/extractor.py | 7 +- agent/src/agent/nodes/init_flags.py | 66 ++ agent/src/agent/nodes/init_skills.py | 14 +- agent/src/agent/nodes/query_builder.py | 8 +- agent/src/agent/nodes/refiner.py | 12 +- agent/src/agent/nodes/satisfaction_check.py | 47 +- agent/src/agent/nodes/schema_explorer.py | 37 +- agent/src/agent/state.py | 3 + agent/src/agent/utils/flag_bridge.py | 155 ++++ agent/src/agent/utils/skill_registry.py | 6 +- agent/tests/conftest.py | 19 +- .../f9a3d1c8e205_add_config_schema_flags.py | 230 ++++++ backend/app/config.py | 1 + backend/app/main.py | 2 + backend/app/routers/__init__.py | 3 +- backend/app/routers/flags.py | 178 +++++ backend/app/services/flag_service.py | 271 +++++++ backend/tests/conftest.py | 1 + core/src/core/models/models.py | 87 +++ frontend/src/App.tsx | 9 + frontend/src/api/flags.ts | 76 ++ frontend/src/components/layout/Sidebar.tsx | 6 +- frontend/src/config/constants.ts | 2 + frontend/src/pages/FlagsPage.css | 396 ++++++++++ frontend/src/pages/FlagsPage.tsx | 733 ++++++++++++++++++ 30 files changed, 2399 insertions(+), 64 deletions(-) create mode 100644 agent/.coverage create mode 100644 agent/src/agent/nodes/init_flags.py create mode 100644 agent/src/agent/utils/flag_bridge.py create mode 100644 backend/alembic/versions/f9a3d1c8e205_add_config_schema_flags.py create mode 100644 backend/app/routers/flags.py create mode 100644 backend/app/services/flag_service.py create mode 100644 frontend/src/api/flags.ts create mode 100644 frontend/src/pages/FlagsPage.css create mode 100644 frontend/src/pages/FlagsPage.tsx diff --git a/agent/.coverage b/agent/.coverage new file mode 100644 index 0000000000000000000000000000000000000000..626dc57e07176e2f08dc184a100ef12b59b8898a GIT binary patch literal 53248 zcmeI4U5wmD9l-5f?|Qvob04HjIKml3b9c(!Z4OW&pfowsC`5@!1NlJEoa6P*?mF4E z_iS%+M+J>TAbp`c^&wKJfS2%qDnwOOAxI;>gjy+u2PC8l5hT=9Kp_=vX_HHi|M=r} z_bwatVNKKMZ{6(}$Azvif=I&~~nqn7`j+wp~+vj=v{2WCIJcc<*<$>V7mwy7z3R<_KO za+6weMXys?H_E!EI=WGnof-{Gw`^Kwa~9;mti=jWd-d6#qL-nTL#rU7Y3U8sT9l8| z#YwL~pv|ERj-LTSs9rT#33)uIJ|SDQLM>`&)b_0y*UJ;>iEliT77N3}{8{Sxq*@yM zWxBk9jJ7MgDo!)?~qs*bBe$7Bv2c351Tht7S2bC*eoMDDo+ZCB=sO;Hnv>GKq zeyr7Kvc`cLzGBDhRkK%J#|)f($8B?qwru;`)tn)6)}nr`mZ8tL=t|>H%I%tyo*RZa zDKjS^0`AQu3tNVH4x*AS*z_}Zfvy$B-blCacal?8^caX4NRIkfs#p^Pxw(K_!M001KDTXjqi*+>F3<`p!?`pI$ zW5EWwR5wfAvF1gLO`(IpSP&MgvnJ1{FR_Szb)*lw$?LdOinURu(ouj=% zXC(xbnU&qNyFMuvMn?G4k{3onYXyne6+&T4-eMRHPc^7&x2$j=x!oWc)@YBCVIK+3 zfICxSvM@W+%blQ{G862iTu~4U>(=qE-y_(##6F2GCsKJhM-Sx(#JJ#=yEgj$Euq^ar_bXgdK z{TV=J1qdg00Dp0>TdfcA079!gDk#i4sYXxg?8(<+;`>`#Ze@>or^ z*(hX>$tBaQQ`PY22B@*y2A#Ssdp4|{mA18I*o#a?^~9KGQ#iS>Vg3Mgy0Cx2PNSh3 zU*L3#^l>Eo&Bqlfhi-83x2^tF7|Id=AcpW2o=V#8~`xvBR2J znPJ~-m7HRia_%TE7B+3--B!?p{lQ7G;Yy&PhQ0=ev98Ekb+R2~`(TuYOQg==V!%5X zvO`}38xj3xFZt-gQo#jMdV($Rd2*eDKirT25Ar<^euF zvOf49fKN`(d}4Yg4PM2_D;#-+yz!oN5RFCxNB{{S0VIF~kN^@u0!RP}AOR$R1Xc)S z;~V(!XMk8bJ|YFb0L0Y`7YU-e?G70mUG|F6|=v|9?V?Ld@HjteIfm2>7UZK zrSa4+pcrmQ00|%gB!C2v021gyfDQ|syRUfAhQ~gOwxgbCvNs(RAE5Se$835z#inH* zqncwE;c0C{TP(6CUOxuU)K8g4$yB|UCU~cuqHSrxo(0d&GLafQ846E`>O-LBaCB-E zMK^RuQ9zY82&(qor0%lG{NFVwpJRnHSG4Vof|W4~3Xt*PLSA%PM#DtG)xn-#jytiyX9So73UQR0Jf z3Ep1gm`qbs1WmO6o&`beJr=G|!_u`{gBom%dngGSxAtFSz20Et648*Wt46f~Z|PCA zy6*d-2%u?4|NVeBOjPtLxH2CH1=ap5@cWPr?;x@(5C-rT0|lB%I}R%6VxTgDVF6}L zhk|^S2l>1D?^4IS#_sgdZq11)cVRC=w zwcO_PS9(nMn2iLG01`j~NB{{S0gkH(?moW%7cQ&T|HH=w_i%)2I z|G_!I-4=xsfBiqu5ZogXh`;svpJ;-+I|6N;>;HUJaQ8=`WA*w!rwHzB1bV{te^wXV zgAvGIyZ+Ba6vtib|FkK%YGm%LUH?mw;_F=hr{)EhM&McR^}l#TaJNRF(O>^3BOSb|%PK*4R+|M7^1g>d~JQw4XI)c+h6IoR5la$^3TF)+>ihgKmter2_OL^ zfCP{L5i>BzVdFh&1=oP4V2XU;6VPca?Niet+lD@5lbL_nBq= zt*2j{j>TdZIev-!=#BVxD01;9Ubzf+IZ;T1_|5UysW%@#^~9FZ0Z!()>&q9nqy{B8 z@zLD1=U#R%k@sm3}O6I;BKU|lD%*oV+?`*o5 zc#I#oyy0&*&j0b+hf@j29!h@b8lMzkFQ0gGV;s`YXJg!dmxr>k*Wo=i9JlM~`k&Ovnkfc1%?*9{Vjw9E}Q{?C5b@DR# z3;7=T6M2T5Cx0NXl3&5^0nbAr+>ihgKmter2_OL^fCP{L5|^paH4OOgpMNeEt&jeCg@6W|8`@BTkG(0}t$ zA`(CXNB{{S0VIF~kN^@u0!RP}AOR$B=MZ4`|8f0)=d=e}f&`EN5CO|3^UcIOmU str: workflow = StateGraph(AgentState) workflow.add_node("validate_config", validate_config_node) +workflow.add_node("init_flags", init_flags_node) workflow.add_node("init_skills", init_skills_node) workflow.add_node("extractor", extractor_node) workflow.add_node("schema_explorer", schema_explorer_node) @@ -203,9 +205,10 @@ def route_rejection(state: AgentState) -> str: workflow.add_node("hitl_escalation", hitl_escalation_node) workflow.add_node("finalizer", finalizer_node) -# Entry: validate config before anything else (G2-01 fail-fast) +# Entry: validate config → resolve flags → load skills → start reasoning workflow.add_edge(START, "validate_config") -workflow.add_edge("validate_config", "init_skills") +workflow.add_edge("validate_config", "init_flags") +workflow.add_edge("init_flags", "init_skills") workflow.add_edge("init_skills", "extractor") workflow.add_edge("extractor", "schema_explorer") diff --git a/agent/src/agent/llm.py b/agent/src/agent/llm.py index 540d28f..b29eaba 100644 --- a/agent/src/agent/llm.py +++ b/agent/src/agent/llm.py @@ -1,16 +1,67 @@ import logging +from typing import Optional + from langchain_openai import ChatOpenAI + from agent.config import settings -from typing import Optional +logger = logging.getLogger(__name__) + +# Flag name → LLM_MODEL env-var fallback for each node +_NODE_MODEL_FLAGS: dict[str, str] = { + "extractor": "EXTRACTOR_MODEL", + "schema_explorer": "SCHEMA_SUMMARY_MODEL", + "query_builder": "QUERY_BUILDER_MODEL", + "refiner": "REFINER_MODEL", + "satisfaction_check": "SATISFACTION_JUDGE_MODEL", + "routing": "QUERY_BUILDER_MODEL", # rejection router reuses QB model + "default": "QUERY_BUILDER_MODEL", +} + +_NODE_TEMP_FLAGS: dict[str, str] = { + "extractor": "EXTRACTOR_TEMPERATURE", + "query_builder": "QUERY_BUILDER_TEMPERATURE", +} + + +def get_llm( + node: str = "default", + temperature: Optional[float] = None, + runtime_flags: Optional[dict] = None, +) -> ChatOpenAI: + """ + Factory for per-node LLM instances. + + Priority for model/temperature selection: + 1. runtime_flags (resolved by init_flags_node from DB + execution mode) + 2. AgentSettings env-var defaults + + Args: + node: Name of the calling graph node (used to pick the right flag). + temperature: Optional hard override — bypasses flag resolution. + runtime_flags: The state["runtime_flags"] dict from the current invocation. + Pass None when initialising at module level (will use env defaults). + """ + flags = runtime_flags or {} + + # Resolve model + model_flag = _NODE_MODEL_FLAGS.get(node, "QUERY_BUILDER_MODEL") + model = flags.get(model_flag) or settings.LLM_MODEL + + # Resolve temperature + if temperature is None: + temp_flag = _NODE_TEMP_FLAGS.get(node) + temperature = float(flags.get(temp_flag, 0.0)) if temp_flag else 0.0 + + logger.debug( + "Instantiating LLM for node='%s': model='%s' temperature=%.2f", + node, + model, + temperature, + ) -# We could dynamically configure settings based on the node name. -# For now, it delegates to agent.config settings. -def get_llm(node: str = "default", temperature: Optional[float] = 0.0) -> ChatOpenAI: - """Factory function for instantiating the unified LLM.""" - logging.debug(f"Instantiating LLM for node: {node}") return ChatOpenAI( - model=settings.LLM_MODEL, + model=model, base_url=settings.LLM_BASE_URL, api_key=settings.LLM_API_KEY, temperature=temperature, diff --git a/agent/src/agent/mcp_server.py b/agent/src/agent/mcp_server.py index de98a98..d2e03fe 100644 --- a/agent/src/agent/mcp_server.py +++ b/agent/src/agent/mcp_server.py @@ -20,9 +20,24 @@ async def chat_with_agent( allowed_statuses: list[str] | None = None, extractors: list[str] | None = None, active_skills: list[str] | None = None, + execution_mode: str | None = None, hitl_enabled: bool = True, ) -> str: - """Run the Text2SQL agent to answer database queries.""" + """Run the Text2SQL agent to answer database queries. + + Args: + query: The natural language question to answer. + thread_id: Optional thread ID for session continuity. + resume_value: HITL resume payload (pass after receiving an interrupt). + allowed_tables: Restrict the agent to specific tables. + allowed_statuses: Filter tables by status. + extractors: List of extractor names/IDs to use. + active_skills: List of Jeen skill UUIDs to inject. + execution_mode: Named configuration preset (e.g. 'cost_saving', + 'high_quality', 'benchmark'). Overrides flag defaults + for this invocation only. + hitl_enabled: If False, skip all human-in-the-loop interrupts. + """ thread_id = thread_id or str(uuid.uuid4()) config = { "configurable": {"thread_id": thread_id}, @@ -91,6 +106,7 @@ async def chat_with_agent( "allowed_statuses": allowed_statuses, "active_extractors": active_extractors, "active_skills": active_skills, + "execution_mode": execution_mode, "non_interactive": not hitl_enabled, }, config=config, diff --git a/agent/src/agent/nodes/extractor.py b/agent/src/agent/nodes/extractor.py index 594a03e..ed0adf5 100644 --- a/agent/src/agent/nodes/extractor.py +++ b/agent/src/agent/nodes/extractor.py @@ -45,8 +45,8 @@ def extract(self, query: str) -> List[ContextEntry]: class LLMExtractor(BaseExtractor): - def __init__(self): - self.llm = get_llm("extractor") + def __init__(self, runtime_flags: dict | None = None): + self.llm = get_llm("extractor", runtime_flags=runtime_flags) langfuse_prompt = langfuse_client.get_prompt(settings.LANGFUSE_PROMPT_EXTRACTOR) self.prompt = ChatPromptTemplate.from_messages( @@ -102,10 +102,11 @@ def extractor_node(state: AgentState): """Enrich the user query with additional context to help downstream phases.""" user_query = state["user_query"] active_extractors = state.get("active_extractors") or [] + runtime_flags = state.get("runtime_flags") or {} import concurrent.futures - extractors: List[BaseExtractor] = [TimeExtractor(), LLMExtractor()] + extractors: List[BaseExtractor] = [TimeExtractor(), LLMExtractor(runtime_flags=runtime_flags)] for ext_info in active_extractors: extractors.append(HTTPExtractor(ext_info["url"], ext_info["name"])) diff --git a/agent/src/agent/nodes/init_flags.py b/agent/src/agent/nodes/init_flags.py new file mode 100644 index 0000000..de7f2cf --- /dev/null +++ b/agent/src/agent/nodes/init_flags.py @@ -0,0 +1,66 @@ +""" +init_flags_node (G4) +==================== +Runs immediately after validate_config, before init_skills. + +Responsibilities: + 1. Call FlagBridge.resolve_flags(execution_mode) to merge: + mode overrides → DB flags → env-var defaults + 2. Write the resolved dict to state["runtime_flags"] + 3. Log runtime_flags to Langfuse trace metadata for full observability +""" + +import logging + +from agent.langfuse_client import langfuse_client +from agent.state import AgentState +from agent.utils.flag_bridge import FlagBridge + +logger = logging.getLogger(__name__) + +_flag_bridge = FlagBridge() + + +async def init_flags_node(state: AgentState) -> dict: + """ + Resolve all runtime configuration flags for this invocation. + + The resolved dict is stored in state["runtime_flags"] and read by every + downstream node instead of directly accessing AgentSettings env vars. + This guarantees that: + - DS team changes in the Studio UI take effect within the cache TTL (30s). + - Execution mode overrides are applied consistently to all nodes. + - Every Langfuse trace carries the exact config used for that query. + """ + execution_mode: str | None = state.get("execution_mode") + + try: + runtime_flags = await _flag_bridge.resolve_flags(execution_mode) + except Exception as exc: + logger.warning("init_flags_node: FlagBridge failed (%s), using env defaults", exc) + # FlagBridge already handles its own fallback internally, so this is a + # safety net for any unexpected error in the bridge itself. + from agent.utils.flag_bridge import _ENV_DEFAULTS + runtime_flags = dict(_ENV_DEFAULTS) + + # Emit to Langfuse for observability + try: + trace_id = langfuse_client.get_current_trace_id() + if trace_id: + langfuse_client.trace( + id=trace_id, + metadata={ + "runtime_flags": runtime_flags, + "execution_mode": execution_mode or "default", + }, + ) + except Exception as exc: + logger.warning("init_flags_node: Langfuse trace failed: %s", exc) + + logger.info( + "init_flags_node: resolved %d flags (mode=%s)", + len(runtime_flags), + execution_mode or "default", + ) + + return {"runtime_flags": runtime_flags} diff --git a/agent/src/agent/nodes/init_skills.py b/agent/src/agent/nodes/init_skills.py index 7dff575..98ad88d 100644 --- a/agent/src/agent/nodes/init_skills.py +++ b/agent/src/agent/nodes/init_skills.py @@ -14,13 +14,23 @@ async def init_skills_node(state: AgentState) -> dict: keeping reasoning nodes pure and state reproducible. """ active_skills = state.get("active_skills") + runtime_flags = state.get("runtime_flags") or {} - if not active_skills: + from agent.config import settings + skills_enabled = bool(runtime_flags.get("SKILLS_ENABLED", settings.SKILLS_ENABLED)) + hot_reload = bool(runtime_flags.get("SKILLS_HOT_RELOAD", settings.SKILLS_HOT_RELOAD)) + cache_ttl = int(runtime_flags.get("SKILLS_CACHE_TTL", settings.SKILLS_CACHE_TTL)) + + if not skills_enabled or not active_skills: return {"loaded_skills": None} try: _skill_registry.redis = get_redis_client() - loaded_skills = await _skill_registry.get_skills(active_skills) + loaded_skills = await _skill_registry.get_skills( + active_skills, + hot_reload=hot_reload, + cache_ttl=cache_ttl, + ) if loaded_skills: try: diff --git a/agent/src/agent/nodes/query_builder.py b/agent/src/agent/nodes/query_builder.py index 176f764..221b20d 100644 --- a/agent/src/agent/nodes/query_builder.py +++ b/agent/src/agent/nodes/query_builder.py @@ -4,12 +4,9 @@ from agent.config import settings from agent.langfuse_client import langfuse_client from langgraph.types import interrupt - -llm = get_llm("query_builder") - - def query_builder_node(state: AgentState): """Build SQL from plan and pause for user approval.""" + runtime_flags = state.get("runtime_flags") or {} feedback = state.get("feedback") feedback_str = f"\nUser Feedback to apply: {feedback}" if feedback else "" @@ -23,7 +20,8 @@ def query_builder_node(state: AgentState): langfuse_prompt = langfuse_client.get_prompt(settings.LANGFUSE_PROMPT_QUERY_BUILDER) prompt = ChatPromptTemplate.from_messages(langfuse_prompt.get_langchain_prompt()) - chain = prompt | llm + _llm = get_llm("query_builder", runtime_flags=runtime_flags) + chain = prompt | _llm response = chain.invoke( { "schema_plan": state.get("schema_plan"), diff --git a/agent/src/agent/nodes/refiner.py b/agent/src/agent/nodes/refiner.py index ed03302..382d990 100644 --- a/agent/src/agent/nodes/refiner.py +++ b/agent/src/agent/nodes/refiner.py @@ -6,7 +6,6 @@ from core import execute_query_sync from agent.config import settings from agent.langfuse_client import langfuse_client -from agent.langfuse_client import langfuse_client from langchain_core.prompts import ChatPromptTemplate from agent.llm import get_llm from agent.utils.sql import clean_sql @@ -33,6 +32,10 @@ async def refiner_node(state: AgentState): sql = state.get("sql_query") count = state.get("refinement_count", 0) error_history = state.get("error_history") or [] + runtime_flags = state.get("runtime_flags") or {} + + # Resolve per-invocation limit (DS-tunable via flags) + max_iterations = int(runtime_flags.get("MAX_REFINER_ITERATIONS", MAX_REFINER_ITERATIONS)) # Execute against Trino try: @@ -49,14 +52,14 @@ async def refiner_node(state: AgentState): if not success: # If we reached the refinement limit, just stop and don't prompt LLM - if count >= MAX_REFINER_ITERATIONS: + if count >= max_iterations: return { "trino_error": trino_error, "last_error": trino_error, "refinement_count": count + 1, "error_history": error_history, "escalation_reason": ( - f"Refiner exhausted {MAX_REFINER_ITERATIONS} iterations. " + f"Refiner exhausted {max_iterations} iterations. " f"Last Trino error: {trino_error}" ), } @@ -65,7 +68,8 @@ async def refiner_node(state: AgentState): prompt = ChatPromptTemplate.from_messages( langfuse_prompt.get_langchain_prompt() ) - chain = prompt | llm + _llm = get_llm("refiner", runtime_flags=runtime_flags) + chain = prompt | _llm schema_context = build_refiner_schema_context(state) diff --git a/agent/src/agent/nodes/satisfaction_check.py b/agent/src/agent/nodes/satisfaction_check.py index 4a892d3..52dbef8 100644 --- a/agent/src/agent/nodes/satisfaction_check.py +++ b/agent/src/agent/nodes/satisfaction_check.py @@ -3,7 +3,7 @@ ================================= A quality-control gateway node placed between the refiner's success path and the finalizer. Runs up to four independent verification checks, each -individually gated by a feature flag. +individually gated by a feature flag read from runtime_flags (G4). Graph position: [refiner: success] → [satisfaction_check] @@ -25,7 +25,10 @@ logger = logging.getLogger(__name__) -llm = get_llm("satisfaction_check") + +def _f(runtime_flags: dict, name: str, default): + """Read a flag from runtime_flags, falling back to *default*.""" + return runtime_flags.get(name, default) async def satisfaction_check_node(state: AgentState) -> dict: @@ -35,10 +38,16 @@ async def satisfaction_check_node(state: AgentState) -> dict: Returns a partial state dict. The conditional edge `route_satisfaction` in graph.py inspects `satisfaction_failures` to decide the next node. """ + runtime_flags = state.get("runtime_flags") or {} + # ── Global gate ─────────────────────────────────────────────────────────── - if not settings.SATISFACTION_CHECK_ENABLED: + check_enabled = _f(runtime_flags, "SATISFACTION_CHECK_ENABLED", settings.SATISFACTION_CHECK_ENABLED) + if not check_enabled: return {} # route_satisfaction will forward directly to finalizer + # ── LLM (used for Check C and D) ────────────────────────────────────────── + llm = get_llm("satisfaction_check", runtime_flags=runtime_flags) + failures: list[str] = [] rows = state.get("inline_result_rows") or [] columns: list[str] = [] @@ -48,24 +57,26 @@ async def satisfaction_check_node(state: AgentState) -> dict: columns = list(rows[0].keys()) # ── Check A: Execution Success ──────────────────────────────────────────── - if settings.SATISFACTION_CHECK_EXECUTION: + if _f(runtime_flags, "SATISFACTION_CHECK_EXECUTION", settings.SATISFACTION_CHECK_EXECUTION): if state.get("trino_error"): failures.append(f"[CHECK_A] Execution failed: {state['trino_error']}") # ── Check B: Row Plausibility ───────────────────────────────────────────── - if settings.SATISFACTION_CHECK_PLAUSIBILITY: + if _f(runtime_flags, "SATISFACTION_CHECK_PLAUSIBILITY", settings.SATISFACTION_CHECK_PLAUSIBILITY): n = len(rows) - if n < settings.SATISFACTION_MIN_ROWS: + min_rows = _f(runtime_flags, "SATISFACTION_MIN_ROWS", settings.SATISFACTION_MIN_ROWS) + max_rows = _f(runtime_flags, "SATISFACTION_MAX_ROWS", settings.SATISFACTION_MAX_ROWS) + if n < min_rows: failures.append( - f"[CHECK_B] Result returned {n} rows — below minimum {settings.SATISFACTION_MIN_ROWS}." + f"[CHECK_B] Result returned {n} rows — below minimum {min_rows}." ) - elif n > settings.SATISFACTION_MAX_ROWS: + elif n > max_rows: failures.append( - f"[CHECK_B] Result returned {n} rows — exceeds maximum {settings.SATISFACTION_MAX_ROWS}." + f"[CHECK_B] Result returned {n} rows — exceeds maximum {max_rows}." ) # ── Check C: Structural Column Coverage ─────────────────────────────────── - if settings.SATISFACTION_CHECK_COLUMNS and columns: + if _f(runtime_flags, "SATISFACTION_CHECK_COLUMNS", settings.SATISFACTION_CHECK_COLUMNS) and columns: prompt = ( f"User question: {state.get('user_query', '')}\n" f"SQL column headers returned: {', '.join(columns)}\n\n" @@ -82,7 +93,9 @@ async def satisfaction_check_node(state: AgentState) -> dict: logger.warning("satisfaction_check Check C failed: %s", exc) # ── Check D: Semantic Alignment (LLM judge, scored 0–1) ─────────────────── - if settings.SATISFACTION_CHECK_SEMANTIC and columns: + check_semantic = _f(runtime_flags, "SATISFACTION_CHECK_SEMANTIC", settings.SATISFACTION_CHECK_SEMANTIC) + threshold = float(_f(runtime_flags, "SATISFACTION_SEMANTIC_THRESHOLD", settings.SATISFACTION_SEMANTIC_THRESHOLD)) + if check_semantic and columns: prompt = ( f"User question: {state.get('user_query', '')}\n" f"SQL generated: {state.get('sql_query', '')}\n" @@ -92,10 +105,10 @@ async def satisfaction_check_node(state: AgentState) -> dict: try: structured = llm.with_structured_output(SemanticAlignmentOutput, method="json_schema") result: SemanticAlignmentOutput = await structured.ainvoke(prompt) - if result.alignment_score < settings.SATISFACTION_SEMANTIC_THRESHOLD: + if result.alignment_score < threshold: failures.append( f"[CHECK_D] Semantic alignment score {result.alignment_score:.2f} " - f"below threshold {settings.SATISFACTION_SEMANTIC_THRESHOLD}: {result.reason}" + f"below threshold {threshold}: {result.reason}" ) except Exception as exc: logger.warning("satisfaction_check Check D failed: %s", exc) @@ -113,10 +126,10 @@ async def satisfaction_check_node(state: AgentState) -> dict: "satisfaction_failures": failures, "satisfaction_fail_count": fail_count, "satisfaction_checks_run": { - "execution": settings.SATISFACTION_CHECK_EXECUTION, - "plausibility": settings.SATISFACTION_CHECK_PLAUSIBILITY, - "columns": settings.SATISFACTION_CHECK_COLUMNS, - "semantic": settings.SATISFACTION_CHECK_SEMANTIC, + "execution": _f(runtime_flags, "SATISFACTION_CHECK_EXECUTION", settings.SATISFACTION_CHECK_EXECUTION), + "plausibility": _f(runtime_flags, "SATISFACTION_CHECK_PLAUSIBILITY", settings.SATISFACTION_CHECK_PLAUSIBILITY), + "columns": _f(runtime_flags, "SATISFACTION_CHECK_COLUMNS", settings.SATISFACTION_CHECK_COLUMNS), + "semantic": check_semantic, }, }, ) diff --git a/agent/src/agent/nodes/schema_explorer.py b/agent/src/agent/nodes/schema_explorer.py index e9f2080..f3379d3 100644 --- a/agent/src/agent/nodes/schema_explorer.py +++ b/agent/src/agent/nodes/schema_explorer.py @@ -317,9 +317,22 @@ async def schema_explorer_node(state: AgentState, config: RunnableConfig | None allowed_tables = state.get("allowed_tables") allowed_statuses = state.get("allowed_statuses") feedback = state.get("feedback") + runtime_flags = state.get("runtime_flags") or {} - # ── G2-01: Resolve scoping mode ─────────────────────────────────────────── - scoping_mode: str = state.get("scoping_mode") or settings.TABLE_SCOPING_MODE + # Resolve all flag-tunable parameters for this invocation + profile_fetch_concurrency = int(runtime_flags.get("PROFILE_FETCH_CONCURRENCY", settings.PROFILE_FETCH_CONCURRENCY)) + max_profiles_to_fetch = int(runtime_flags.get("MAX_PROFILES_TO_FETCH", settings.MAX_PROFILES_TO_FETCH)) + schema_semantic_typing = bool(runtime_flags.get("SCHEMA_SEMANTIC_TYPING", settings.ENABLE_SEMANTIC_TYPING)) + schema_join_graph = bool(runtime_flags.get("SCHEMA_JOIN_GRAPH", settings.ENABLE_JOIN_GRAPH)) + schema_summarization = bool(runtime_flags.get("SCHEMA_SUMMARIZATION", settings.ENABLE_SCHEMA_SUMMARIZATION)) + schema_ambiguity_detect = bool(runtime_flags.get("SCHEMA_AMBIGUITY_DETECT", settings.ENABLE_AMBIGUITY_DETECT)) + scoping_mode_flag = runtime_flags.get("TABLE_SCOPING_MODE", settings.TABLE_SCOPING_MODE) + + # Per-invocation LLM (supports model switching via execution mode) + _llm = get_llm("schema_explorer", runtime_flags=runtime_flags) + + # ── G2-01: Resolve scoping mode (state > runtime_flag > env default) ───────── + scoping_mode: str = state.get("scoping_mode") or scoping_mode_flag # ── G2-05: Cache hit/miss counters (pushed to Langfuse at end) ──────────── cache_hit_count = 0 @@ -338,7 +351,7 @@ async def schema_explorer_node(state: AgentState, config: RunnableConfig | None # 2. Get profiles for top candidate tables (G2-05 cache-aware) import asyncio - sem = asyncio.Semaphore(settings.PROFILE_FETCH_CONCURRENCY) + sem = asyncio.Semaphore(profile_fetch_concurrency) async def fetch_profile(t_id, t_name): nonlocal cache_hit_count, cache_miss_count @@ -374,7 +387,7 @@ async def fetch_profile(t_id, t_name): "description": "", } ) - if i < settings.MAX_PROFILES_TO_FETCH: + if i < max_profiles_to_fetch: fetch_tasks.append(fetch_profile(t.id, t.name)) if fetch_tasks: @@ -403,15 +416,15 @@ async def fetch_profile(t_id, t_name): ) # Phase A: Semantic Typing - if settings.ENABLE_SEMANTIC_TYPING and profile_details: + if schema_semantic_typing and profile_details: try: - profile_details = await run_semantic_typing(profile_details, llm) + profile_details = await run_semantic_typing(profile_details, _llm) active_phases.append("SCHEMA_SEMANTIC_TYPING") except Exception as exc: logger.warning("SCHEMA_SEMANTIC_TYPING phase failed: %s", exc) # Phase B: Join Graph - if settings.ENABLE_JOIN_GRAPH and len(table_ids) >= 2: + if schema_join_graph and len(table_ids) >= 2: try: join_paths_json = await run_join_graph(table_ids) if join_paths_json: @@ -436,18 +449,18 @@ async def fetch_profile(t_id, t_name): # Phase C: Schema Summarization (replaces profiles_json in prompt) profiles_json_str = json.dumps(profile_details, indent=2) - if settings.ENABLE_SCHEMA_SUMMARIZATION and profile_details: + if schema_summarization and profile_details: try: - summaries = await run_schema_summarization(profile_details, llm) + summaries = await run_schema_summarization(profile_details, _llm) profiles_json_str = "\n".join(summaries) active_phases.append("SCHEMA_SUMMARIZATION") except Exception as exc: logger.warning("SCHEMA_SUMMARIZATION phase failed: %s", exc) # Phase D: Ambiguity Detection - if settings.ENABLE_AMBIGUITY_DETECT and profile_details: + if schema_ambiguity_detect and profile_details: try: - notes = await run_ambiguity_detection(profile_details, user_query, llm) + notes = await run_ambiguity_detection(profile_details, user_query, _llm) if notes: human_message += "\n\n[AMBIGUITY NOTES]\n" + "\n".join(f"- {n}" for n in notes) active_phases.append("SCHEMA_AMBIGUITY_DETECT") @@ -476,7 +489,7 @@ async def fetch_profile(t_id, t_name): ) prompt = ChatPromptTemplate.from_messages(langfuse_prompt.get_langchain_prompt()) - structured_llm = llm.with_structured_output( + structured_llm = _llm.with_structured_output( SchemaExplorerOutput, method="json_schema" ) chain = prompt | structured_llm diff --git a/agent/src/agent/state.py b/agent/src/agent/state.py index 22b7e52..712b7b3 100644 --- a/agent/src/agent/state.py +++ b/agent/src/agent/state.py @@ -36,3 +36,6 @@ class AgentState(TypedDict): # G2-04: satisfaction check satisfaction_failures: list[str] | None satisfaction_fail_count: int | None + # G4: feature flags & execution modes + execution_mode: str | None # e.g. "cost_saving", "high_quality", "benchmark" + runtime_flags: dict[str, Any] | None # resolved by init_flags_node diff --git a/agent/src/agent/utils/flag_bridge.py b/agent/src/agent/utils/flag_bridge.py new file mode 100644 index 0000000..ce9fc0f --- /dev/null +++ b/agent/src/agent/utils/flag_bridge.py @@ -0,0 +1,155 @@ +""" +FlagBridge (G4 Agent Integration) +=================================== +Resolves runtime flag values for a single agent invocation. + +Resolution order (highest → lowest priority): + 1. Execution mode overrides (config.execution_modes.flag_overrides by name) + 2. DB flag overrides (config.feature_flags.value — cached 30s) + 3. AgentSettings env defaults (always-on fallback when backend unreachable) + +Usage: + bridge = FlagBridge() + flags = await bridge.resolve_flags(execution_mode="cost_saving") + model = flags.get("QUERY_BUILDER_MODEL", settings.LLM_MODEL) +""" + +import logging +from typing import Any + +import httpx +from agent.config import settings + +logger = logging.getLogger(__name__) + +# Default env-var flag map — used as fallback when backend is unreachable +_ENV_DEFAULTS: dict[str, Any] = { + # Extraction + "EXTRACTOR_MODEL": settings.LLM_MODEL, + "EXTRACTOR_TEMPERATURE": 0.0, + "EXTRACTOR_TOP_K_TABLES": settings.HYBRID_SEARCH_MAX_TABLES, + "TABLE_SCOPING_MODE": settings.TABLE_SCOPING_MODE, + # Schema Explorer + "MAX_PROFILES_TO_FETCH": settings.MAX_PROFILES_TO_FETCH, + "PROFILE_FETCH_CONCURRENCY": settings.PROFILE_FETCH_CONCURRENCY, + "SCHEMA_CACHE_TTL": settings.SCHEMA_CACHE_TTL, + "PROFILE_CACHE_TTL": settings.PROFILE_CACHE_TTL, + "SCHEMA_SEMANTIC_TYPING": settings.ENABLE_SEMANTIC_TYPING, + "SCHEMA_JOIN_GRAPH": settings.ENABLE_JOIN_GRAPH, + "SCHEMA_SUMMARIZATION": settings.ENABLE_SCHEMA_SUMMARIZATION, + "SCHEMA_AMBIGUITY_DETECT": settings.ENABLE_AMBIGUITY_DETECT, + "SCHEMA_SUMMARY_MODEL": settings.LLM_MODEL, + "SCHEMA_TOP_K_JOINS": 5, + # Query Builder + "QUERY_BUILDER_MODEL": settings.LLM_MODEL, + "QUERY_BUILDER_TEMPERATURE": 0.0, + # Refiner + "MAX_REFINER_ITERATIONS": 4, + "MAX_SCHEMA_REPLAN_ITERATIONS": 2, + "REFINER_MODEL": settings.LLM_MODEL, + # Satisfaction Check + "SATISFACTION_CHECK_ENABLED": settings.SATISFACTION_CHECK_ENABLED, + "SATISFACTION_CHECK_EXECUTION": settings.SATISFACTION_CHECK_EXECUTION, + "SATISFACTION_CHECK_PLAUSIBILITY": settings.SATISFACTION_CHECK_PLAUSIBILITY, + "SATISFACTION_CHECK_COLUMNS": settings.SATISFACTION_CHECK_COLUMNS, + "SATISFACTION_CHECK_SEMANTIC": settings.SATISFACTION_CHECK_SEMANTIC, + "SATISFACTION_MIN_ROWS": settings.SATISFACTION_MIN_ROWS, + "SATISFACTION_MAX_ROWS": settings.SATISFACTION_MAX_ROWS, + "SATISFACTION_SEMANTIC_THRESHOLD": settings.SATISFACTION_SEMANTIC_THRESHOLD, + "SATISFACTION_JUDGE_MODEL": settings.LLM_MODEL, + # Skills + "SKILLS_ENABLED": True, + "SKILLS_HOT_RELOAD": settings.SKILLS_HOT_RELOAD, + "SKILLS_CACHE_TTL": 900, + # Evaluation + "LLM_JUDGE_ENABLED": True, + "EVAL_PARALLEL_WORKERS": 4, + "EVAL_JUDGE_MODEL": settings.LLM_MODEL, + # Catalog Validation + "CATALOG_VALIDATION_ENABLED": True, + "CATALOG_CACHE_TTL": 300, +} + + +class FlagBridge: + """ + Lightweight async HTTP client that fetches flag values from the Studio backend. + Falls back gracefully if BACKEND_URL is not set or backend is unreachable. + """ + + def __init__(self) -> None: + self._base_url = settings.BACKEND_URL.rstrip("/") if settings.BACKEND_URL else "" + + async def resolve_flags(self, execution_mode: str | None = None) -> dict[str, Any]: + """ + Build and return the fully-merged runtime flag map for this invocation. + + Steps: + 1. Start with env-var defaults (always available). + 2. Overlay DB flag values fetched from backend (if reachable). + 3. Overlay execution mode overrides (if a mode name is given). + """ + # Layer 1: env defaults + resolved: dict[str, Any] = dict(_ENV_DEFAULTS) + + if not self._base_url: + logger.debug("FlagBridge: BACKEND_URL not set, using env-var defaults only") + if execution_mode: + logger.warning( + "FlagBridge: execution_mode='%s' requested but BACKEND_URL is not set", + execution_mode, + ) + return resolved + + # Layer 2: DB flag overrides + try: + async with httpx.AsyncClient(timeout=3.0) as client: + resp = await client.get(f"{self._base_url}/api/flags/map") + if resp.status_code == 200: + db_flags: dict[str, Any] = resp.json() + # Only overlay flags that have a non-null value in the DB + for name, value in db_flags.items(): + if value is not None: + resolved[name] = value + logger.debug("FlagBridge: loaded %d DB flag overrides", len(db_flags)) + else: + logger.warning( + "FlagBridge: /api/flags/map returned %d, using env defaults", + resp.status_code, + ) + except Exception as exc: + logger.warning("FlagBridge: failed to fetch flag map: %s — using env defaults", exc) + + # Layer 3: execution mode overrides + if execution_mode and execution_mode != "default": + try: + async with httpx.AsyncClient(timeout=3.0) as client: + resp = await client.get( + f"{self._base_url}/api/flags/modes/{execution_mode}" + ) + if resp.status_code == 200: + mode_data = resp.json() + overrides: dict = mode_data.get("flag_overrides") or {} + resolved.update(overrides) + logger.info( + "FlagBridge: applied execution_mode='%s' (%d overrides)", + execution_mode, + len(overrides), + ) + elif resp.status_code == 404: + logger.warning( + "FlagBridge: execution_mode='%s' not found in DB", + execution_mode, + ) + else: + logger.warning( + "FlagBridge: /api/flags/modes/%s returned %d", + execution_mode, + resp.status_code, + ) + except Exception as exc: + logger.warning( + "FlagBridge: failed to fetch mode '%s': %s", execution_mode, exc + ) + + return resolved diff --git a/agent/src/agent/utils/skill_registry.py b/agent/src/agent/utils/skill_registry.py index 841d7d6..978a177 100644 --- a/agent/src/agent/utils/skill_registry.py +++ b/agent/src/agent/utils/skill_registry.py @@ -16,7 +16,7 @@ def __init__(self, redis_client: Redis | None = None): self.redis = redis_client self.cache_ttl = 300 # 5 minutes - async def get_skills(self, skill_ids: list[str]) -> list[dict[str, Any]]: + async def get_skills(self, skill_ids: list[str], hot_reload: bool = False, cache_ttl: int = 300) -> list[dict[str, Any]]: """ Fetch skills by ID, trying Redis cache first, then Jeen API. """ @@ -31,7 +31,7 @@ async def get_skills(self, skill_ids: list[str]) -> list[dict[str, Any]]: missing_ids = [] # 1. Try fetching from Redis - if self.redis and not settings.SKILLS_HOT_RELOAD: + if self.redis and not hot_reload: try: keys = [f"skill:{sid}" for sid in skill_ids] cached_values = await self.redis.mget(keys) @@ -57,7 +57,7 @@ async def get_skills(self, skill_ids: list[str]) -> list[dict[str, Any]]: pipeline = self.redis.pipeline() for skill in fetched: key = f"skill:{skill['id']}" - pipeline.setex(key, self.cache_ttl, json.dumps(skill)) + pipeline.setex(key, cache_ttl, json.dumps(skill)) await pipeline.execute() except Exception as e: logger.error(f"Redis error while caching skills: {e}") diff --git a/agent/tests/conftest.py b/agent/tests/conftest.py index a0c60d0..7e16b43 100644 --- a/agent/tests/conftest.py +++ b/agent/tests/conftest.py @@ -48,12 +48,19 @@ def with_structured_output(self, schema, method="json_schema"): @pytest.fixture(autouse=True) def mock_llm(): mock_instance = MockLLM() - with patch("agent.nodes.schema_explorer.llm", mock_instance), \ - patch("agent.nodes.refiner.llm", mock_instance), \ - patch("agent.nodes.query_builder.llm", mock_instance), \ - patch("agent.graph.llm", mock_instance), \ - patch("agent.nodes.finalizer.llm", mock_instance), \ - patch("agent.nodes.satisfaction_check.llm", mock_instance): + with patch("agent.llm.get_llm", return_value=mock_instance), \ + patch("agent.nodes.schema_explorer.get_llm", return_value=mock_instance), \ + patch("agent.nodes.refiner.get_llm", return_value=mock_instance), \ + patch("agent.nodes.query_builder.get_llm", return_value=mock_instance), \ + patch("agent.nodes.extractor.get_llm", return_value=mock_instance), \ + patch("agent.nodes.satisfaction_check.get_llm", return_value=mock_instance), \ + patch("agent.nodes.finalizer.get_llm", return_value=mock_instance), \ + patch("agent.nodes.schema_explorer.llm", mock_instance, create=True), \ + patch("agent.nodes.refiner.llm", mock_instance, create=True), \ + patch("agent.nodes.query_builder.llm", mock_instance, create=True), \ + patch("agent.graph.llm", mock_instance, create=True), \ + patch("agent.nodes.finalizer.llm", mock_instance, create=True), \ + patch("agent.nodes.satisfaction_check.llm", mock_instance, create=True): yield mock_instance # --- Mock Redis --- diff --git a/backend/alembic/versions/f9a3d1c8e205_add_config_schema_flags.py b/backend/alembic/versions/f9a3d1c8e205_add_config_schema_flags.py new file mode 100644 index 0000000..20c86fa --- /dev/null +++ b/backend/alembic/versions/f9a3d1c8e205_add_config_schema_flags.py @@ -0,0 +1,230 @@ +"""Add config schema: feature_flags, feature_flag_audit_log, execution_modes + +Revision ID: f9a3d1c8e205 +Revises: 4f7c2b9a8e1d +Create Date: 2026-06-17 00:00:00.000000 + +""" +from typing import Sequence, Union + +import sqlalchemy as sa +import sqlmodel +from alembic import op +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision: str = "f9a3d1c8e205" +down_revision: Union[str, None] = "4f7c2b9a8e1d" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # 1. Create config schema + op.execute("CREATE SCHEMA IF NOT EXISTS config") + + # 2. feature_flags table + op.create_table( + "feature_flags", + sa.Column("name", sqlmodel.sql.sqltypes.AutoString(), primary_key=True, nullable=False), + sa.Column("value", postgresql.JSON(astext_type=sa.Text()), nullable=True), + sa.Column("type", sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column("description", sqlmodel.sql.sqltypes.AutoString(), nullable=False, server_default=""), + sa.Column("owner", sqlmodel.sql.sqltypes.AutoString(), nullable=False, server_default=""), + sa.Column("last_modified_by", sqlmodel.sql.sqltypes.AutoString(), nullable=False, server_default=""), + sa.Column("last_modified_at", sa.DateTime(), nullable=False, server_default=sa.func.now()), + sa.PrimaryKeyConstraint("name"), + schema="config", + ) + + # 3. feature_flag_audit_log table + op.create_table( + "feature_flag_audit_log", + sa.Column("id", sqlmodel.sql.sqltypes.AutoString(), primary_key=True, nullable=False), + sa.Column("flag_name", sqlmodel.sql.sqltypes.AutoString(), nullable=False, index=True), + sa.Column("actor", sqlmodel.sql.sqltypes.AutoString(), nullable=False), + sa.Column("old_value", postgresql.JSON(astext_type=sa.Text()), nullable=True), + sa.Column("new_value", postgresql.JSON(astext_type=sa.Text()), nullable=True), + sa.Column("changed_at", sa.DateTime(), nullable=False, server_default=sa.func.now()), + sa.PrimaryKeyConstraint("id"), + schema="config", + ) + op.create_index( + "ix_feature_flag_audit_log_flag_name", + "feature_flag_audit_log", + ["flag_name"], + schema="config", + ) + + # 4. execution_modes table + op.create_table( + "execution_modes", + sa.Column("name", sqlmodel.sql.sqltypes.AutoString(), primary_key=True, nullable=False), + sa.Column("description", sqlmodel.sql.sqltypes.AutoString(), nullable=False, server_default=""), + sa.Column("flag_overrides", postgresql.JSON(astext_type=sa.Text()), nullable=False, server_default="{}"), + sa.Column("is_active", sa.Boolean(), nullable=False, server_default="true"), + sa.Column("created_by", sqlmodel.sql.sqltypes.AutoString(), nullable=False, server_default="system"), + sa.Column("created_at", sa.DateTime(), nullable=False, server_default=sa.func.now()), + sa.Column("updated_at", sa.DateTime(), nullable=False, server_default=sa.func.now()), + sa.PrimaryKeyConstraint("name"), + schema="config", + ) + + # 5. Seed initial feature flags (42 flags from TTS-G4-02) + flags = [ + # Extraction + ("EXTRACTOR_MODEL", "gpt-4o", "string", "LLM model for extractor node", "DS team"), + ("EXTRACTOR_TEMPERATURE", 0.0, "float", "Sampling temperature for extractor", "DS team"), + ("EXTRACTOR_TOP_K_TABLES", 10, "int", "Max candidate tables from extractor", "DS team"), + ("TABLE_SCOPING_MODE", "hybrid", "string", "Table scoping mode: strict | hybrid", "DS team"), + # Schema Explorer + ("MAX_PROFILES_TO_FETCH", 8, "int", "Max table profiles fetched per run", "DS team"), + ("PROFILE_FETCH_CONCURRENCY", 4, "int", "asyncio.Semaphore limit for profile fetch", "Eng"), + ("SCHEMA_CACHE_TTL", 600, "int", "DDL cache TTL in seconds", "Eng"), + ("PROFILE_CACHE_TTL", 1800, "int", "Profile cache TTL in seconds", "Eng"), + ("SCHEMA_SEMANTIC_TYPING", False, "bool", "Enable column semantic type classification", "DS team"), + ("SCHEMA_JOIN_GRAPH", False, "bool", "Enable join graph injection (Phase 2)", "DS team"), + ("SCHEMA_SUMMARIZATION", False, "bool", "Enable table summarization for large schemas", "DS team"), + ("SCHEMA_AMBIGUITY_DETECT", True, "bool", "Enable ambiguous column detection", "DS team"), + ("SCHEMA_SUMMARY_MODEL", "gpt-4o-mini","string","Model for schema summarization (cost-saving)", "DS team"), + ("SCHEMA_TOP_K_JOINS", 5, "int", "Max join suggestions to inject", "DS team"), + # Query Builder + ("QUERY_BUILDER_MODEL", "gpt-4o", "string", "LLM model for SQL generation", "DS team"), + ("QUERY_BUILDER_TEMPERATURE", 0.0, "float", "Temperature for query builder", "DS team"), + # Refiner + ("MAX_REFINER_ITERATIONS", 4, "int", "Max refiner retry attempts before fallback", "DS team"), + ("MAX_SCHEMA_REPLAN_ITERATIONS",2, "int", "Max schema_explorer re-entries before HITL", "DS team"), + ("REFINER_MODEL", "gpt-4o", "string", "LLM model for refiner", "DS team"), + # Satisfaction Check + ("SATISFACTION_CHECK_ENABLED", False, "bool", "Master switch for satisfaction check module", "DS team"), + ("SATISFACTION_CHECK_EXECUTION",True, "bool", "Check: SQL executed without error", "DS team"), + ("SATISFACTION_CHECK_PLAUSIBILITY",True, "bool", "Check: result row count plausible", "DS team"), + ("SATISFACTION_CHECK_COLUMNS", False, "bool", "Check: result columns match question intent", "DS team"), + ("SATISFACTION_CHECK_SEMANTIC", False, "bool", "Check: LLM semantic alignment score", "DS team"), + ("SATISFACTION_MIN_ROWS", 0, "int", "Min acceptable result rows (0 = allow empty)", "DS team"), + ("SATISFACTION_MAX_ROWS", 1000000, "int", "Max acceptable result rows", "DS team"), + ("SATISFACTION_SEMANTIC_THRESHOLD", 0.75, "float", "Min semantic alignment score (0-1)", "DS team"), + ("SATISFACTION_JUDGE_MODEL", "gpt-4o-mini","string","Model for satisfaction semantic check", "DS team"), + # Skills + ("SKILLS_ENABLED", True, "bool", "Enable Jeen Skills API integration", "DS team"), + ("SKILLS_HOT_RELOAD", False, "bool", "Re-fetch skills on every agent invocation", "DS team"), + ("SKILLS_CACHE_TTL", 900, "int", "Skills cache TTL in seconds", "Eng"), + # Evaluation + ("LLM_JUDGE_ENABLED", True, "bool", "Enable LLM judge in evaluations", "DS team"), + ("EVAL_PARALLEL_WORKERS", 4, "int", "Parallel eval task workers", "Eng"), + ("EVAL_JUDGE_MODEL", "gpt-4-turbo","string","Model used by LLM judge", "DS team"), + # Catalog Validation + ("CATALOG_VALIDATION_ENABLED", True, "bool", "Validate extracted tables against Trino catalog","DS team"), + ("CATALOG_CACHE_TTL", 300, "int", "Catalog validation cache TTL in seconds", "Eng"), + ] + + import json as _json + from datetime import datetime as _dt + + now = _dt.utcnow() + flag_rows = [ + { + "name": name, + "value": _json.dumps(value), # store as JSON-encoded value + "type": flag_type, + "description": description, + "owner": owner, + "last_modified_by": "seed", + "last_modified_at": now, + } + for name, value, flag_type, description, owner in flags + ] + op.bulk_insert( + sa.table( + "feature_flags", + sa.column("name"), + sa.column("value"), + sa.column("type"), + sa.column("description"), + sa.column("owner"), + sa.column("last_modified_by"), + sa.column("last_modified_at"), + schema="config", + ), + flag_rows, + ) + + # 6. Seed built-in execution modes + modes = [ + { + "name": "default", + "description": "Standard production configuration. No flag overrides.", + "flag_overrides": _json.dumps({}), + "is_active": True, + "created_by": "system", + "created_at": now, + "updated_at": now, + }, + { + "name": "cost_saving", + "description": "Use cheaper models and disable expensive LLM checks. Suitable for high-volume batch runs.", + "flag_overrides": _json.dumps({ + "QUERY_BUILDER_MODEL": "gpt-4o-mini", + "REFINER_MODEL": "gpt-4o-mini", + "SATISFACTION_CHECK_SEMANTIC": False, + "SCHEMA_SUMMARIZATION": False, + "LLM_JUDGE_ENABLED": False, + }), + "is_active": True, + "created_by": "system", + "created_at": now, + "updated_at": now, + }, + { + "name": "high_quality", + "description": "Use strongest models and enable all quality checks. Best accuracy, higher cost.", + "flag_overrides": _json.dumps({ + "QUERY_BUILDER_MODEL": "gpt-4o", + "REFINER_MODEL": "gpt-4o", + "SATISFACTION_CHECK_ENABLED": True, + "SATISFACTION_CHECK_SEMANTIC": True, + "SCHEMA_SUMMARIZATION": True, + "SCHEMA_SEMANTIC_TYPING": True, + "MAX_REFINER_ITERATIONS": 6, + }), + "is_active": True, + "created_by": "system", + "created_at": now, + "updated_at": now, + }, + { + "name": "benchmark", + "description": "Disable HITL and satisfaction checks for uninterrupted eval runs.", + "flag_overrides": _json.dumps({ + "SATISFACTION_CHECK_ENABLED": False, + "MAX_REFINER_ITERATIONS": 2, + "SCHEMA_SUMMARIZATION": False, + }), + "is_active": True, + "created_by": "system", + "created_at": now, + "updated_at": now, + }, + ] + op.bulk_insert( + sa.table( + "execution_modes", + sa.column("name"), + sa.column("description"), + sa.column("flag_overrides"), + sa.column("is_active"), + sa.column("created_by"), + sa.column("created_at"), + sa.column("updated_at"), + schema="config", + ), + modes, + ) + + +def downgrade() -> None: + op.drop_index("ix_feature_flag_audit_log_flag_name", table_name="feature_flag_audit_log", schema="config") + op.drop_table("feature_flag_audit_log", schema="config") + op.drop_table("feature_flags", schema="config") + op.drop_table("execution_modes", schema="config") + op.execute("DROP SCHEMA IF EXISTS config") diff --git a/backend/app/config.py b/backend/app/config.py index 0c702bb..3099e63 100644 --- a/backend/app/config.py +++ b/backend/app/config.py @@ -5,6 +5,7 @@ class Settings(BaseSettings): model_config = SettingsConfigDict(env_file=".env", extra="ignore") DATABASE_URL: str = "sqlite:///./text2sql.db" + REDIS_URL: str = "redis://localhost:6379" OPENAI_API_KEY: str | None = None LANGFUSE_PUBLIC_KEY: str = "" LANGFUSE_SECRET_KEY: str = "" diff --git a/backend/app/main.py b/backend/app/main.py index c27a47a..296a888 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -19,6 +19,7 @@ evaluation, extractors, feedback, + flags, health, orchestration, profiling, @@ -126,6 +127,7 @@ async def audit_middleware(request: Request, call_next): api_router.include_router(admin_auth.router) api_router.include_router(admin_approval.router) api_router.include_router(agent.router) +api_router.include_router(flags.router) app.include_router(api_router) diff --git a/backend/app/routers/__init__.py b/backend/app/routers/__init__.py index 1a8a646..09ae260 100644 --- a/backend/app/routers/__init__.py +++ b/backend/app/routers/__init__.py @@ -2,10 +2,11 @@ audit, enrichment, evaluation, + flags, publish, questions, scopes, tables, ) -__all__ = [tables, enrichment, questions, evaluation, publish, scopes, audit] +__all__ = [tables, enrichment, questions, evaluation, flags, publish, scopes, audit] diff --git a/backend/app/routers/flags.py b/backend/app/routers/flags.py new file mode 100644 index 0000000..f5a9895 --- /dev/null +++ b/backend/app/routers/flags.py @@ -0,0 +1,178 @@ +""" +Feature Flags & Execution Modes API (G4-01) +============================================ +Endpoints: + GET /flags/ – list all flags + GET /flags/{name} – get single flag + PATCH /flags/{name} – update flag value (operator only) + DELETE /flags/{name} – reset flag to env default (operator only) + + GET /flags/modes/ – list all execution modes + GET /flags/modes/{name} – get single mode + PUT /flags/modes/{name} – create or update a mode (operator only) + DELETE /flags/modes/{name} – delete a mode (operator only) + +Auth: all write operations require X-Admin-Email header pointing to + a SecurityUser with is_admin=True (reuses existing admin auth pattern). +""" + +import logging + +from app.config import settings +from app.services.auth import require_admin +from app.services.flag_service import FlagService +from core.db.engine import get_session +from core.models.models import ( + ExecutionModeRead, + ExecutionModeUpsert, + FeatureFlagRead, + FeatureFlagUpdate, + SecurityUser, +) +from fastapi import APIRouter, Depends, Header, HTTPException +from sqlmodel import Session + +logger = logging.getLogger(__name__) + +router = APIRouter(prefix="/flags", tags=["feature_flags"]) + +# Singleton service wired to the app's Redis +_flag_service: FlagService | None = None + + +def get_flag_service() -> FlagService: + global _flag_service + if _flag_service is None: + _flag_service = FlagService(redis_url=settings.REDIS_URL) + return _flag_service + + +def _get_admin( + x_admin_email: str = Header(..., alias="X-Admin-Email"), + session: Session = Depends(get_session), +) -> SecurityUser: + """Dependency: validates the caller is an active admin (operator role).""" + return require_admin(x_admin_email, session) + + +# ── Feature Flag endpoints ──────────────────────────────────────────────────── + + +@router.get("/", response_model=list[FeatureFlagRead]) +def list_flags( + current_admin: SecurityUser = Depends(_get_admin), + svc: FlagService = Depends(get_flag_service), +): + """List all feature flags with their current values and metadata.""" + return svc.list_all() + + +@router.get("/map") +def get_flag_map(svc: FlagService = Depends(get_flag_service)): + """ + Return a flat {name: value} dict for all flags. + Used by the agent's FlagBridge. No admin auth required (internal service call). + """ + return svc.get_map() + + +@router.get("/{name}", response_model=FeatureFlagRead) +def get_flag( + name: str, + current_admin: SecurityUser = Depends(_get_admin), + svc: FlagService = Depends(get_flag_service), +): + """Get a single flag by name.""" + flags = svc.list_all() + flag = next((f for f in flags if f.name == name), None) + if flag is None: + raise HTTPException(status_code=404, detail=f"Flag '{name}' not found") + return flag + + +@router.patch("/{name}", response_model=FeatureFlagRead) +def update_flag( + name: str, + body: FeatureFlagUpdate, + current_admin: SecurityUser = Depends(_get_admin), + svc: FlagService = Depends(get_flag_service), +): + """ + Update a flag's value. Enforces type validation. + Returns 422 if value type does not match the flag's declared type. + All changes are audited with actor email and timestamp. + """ + logger.info("Admin '%s' updating flag '%s'", current_admin.email, name) + return svc.set(name, body.value, actor=current_admin.email) + + +@router.delete("/{name}", status_code=204) +def reset_flag( + name: str, + current_admin: SecurityUser = Depends(_get_admin), + svc: FlagService = Depends(get_flag_service), +): + """ + Reset a flag to its env-var default by clearing the DB override value. + Writes an audit record with new_value=null to mark the reset event. + """ + logger.info("Admin '%s' resetting flag '%s' to env default", current_admin.email, name) + svc.delete(name, actor=current_admin.email) + + +# ── Execution Mode endpoints ────────────────────────────────────────────────── + + +@router.get("/modes/", response_model=list[ExecutionModeRead]) +def list_modes( + current_admin: SecurityUser = Depends(_get_admin), + svc: FlagService = Depends(get_flag_service), +): + """List all execution modes.""" + return svc.list_modes() + + +@router.get("/modes/map") +def get_modes_map(svc: FlagService = Depends(get_flag_service)): + """ + Return a flat list of active mode names. + Used by the agent and Studio to populate the execution_mode selector. + No admin auth required. + """ + modes = svc.list_modes() + return [{"name": m.name, "description": m.description, "is_active": m.is_active} for m in modes] + + +@router.get("/modes/{name}", response_model=ExecutionModeRead) +def get_mode( + name: str, + svc: FlagService = Depends(get_flag_service), +): + """Get a single execution mode (flag_overrides included). No admin auth required.""" + mode = svc.get_mode(name) + if mode is None: + raise HTTPException(status_code=404, detail=f"Execution mode '{name}' not found") + return mode + + +@router.put("/modes/{name}", response_model=ExecutionModeRead) +def upsert_mode( + name: str, + body: ExecutionModeUpsert, + current_admin: SecurityUser = Depends(_get_admin), + svc: FlagService = Depends(get_flag_service), +): + """Create or update an execution mode (operator only).""" + logger.info("Admin '%s' upserting execution mode '%s'", current_admin.email, name) + return svc.upsert_mode(name, body, actor=current_admin.email) + + +@router.delete("/modes/{name}", status_code=204) +def delete_mode( + name: str, + current_admin: SecurityUser = Depends(_get_admin), + svc: FlagService = Depends(get_flag_service), +): + """Delete an execution mode (operator only).""" + logger.info("Admin '%s' deleting execution mode '%s'", current_admin.email, name) + svc.delete_mode(name) diff --git a/backend/app/services/flag_service.py b/backend/app/services/flag_service.py new file mode 100644 index 0000000..cd8a4d2 --- /dev/null +++ b/backend/app/services/flag_service.py @@ -0,0 +1,271 @@ +""" +FlagService (G4-01) +=================== +Redis-backed feature flag and execution mode service. + +Resolution contract (highest → lowest priority): + 1. config.execution_modes.flag_overrides (by execution_mode name) + 2. config.feature_flags.value (DS-managed, cached 30 s) + 3. AgentSettings env-var defaults (always-on fallback) + +Cache keys: + flag:all – full dict of all DB flag values TTL=30s + mode:{name} – single mode's flag_overrides dict TTL=30s + +A missing row in config.feature_flags means "no DB override" — +callers must fall back to their env-var default. +""" + +import json +import logging +from datetime import datetime +from typing import Any + +import redis.asyncio as aioredis +from core.db.engine import engine +from core.models.models import ( + ExecutionMode, + ExecutionModeUpsert, + FeatureFlag, + FeatureFlagAuditLog, +) +from fastapi import HTTPException +from sqlmodel import Session, select + +logger = logging.getLogger(__name__) + +FLAG_CACHE_TTL = 30 # seconds — per TTS-G4-01 AC1 +MODE_CACHE_TTL = 30 # seconds + +# Valid types and coercion rules +_VALID_TYPES = {"bool", "int", "float", "string", "json"} + +_REDIS: aioredis.Redis | None = None + + +def _get_redis(redis_url: str) -> aioredis.Redis: + global _REDIS + if _REDIS is None: + _REDIS = aioredis.from_url( + redis_url, + decode_responses=True, + socket_connect_timeout=2, + socket_timeout=2, + ) + return _REDIS + + +def validate_flag_type(value: Any, flag_type: str) -> bool: + """Return True if *value* is compatible with the declared *flag_type*.""" + if flag_type == "bool": + return isinstance(value, bool) + if flag_type == "int": + return isinstance(value, int) and not isinstance(value, bool) + if flag_type == "float": + return isinstance(value, (int, float)) and not isinstance(value, bool) + if flag_type == "string": + return isinstance(value, str) + if flag_type == "json": + return isinstance(value, (dict, list)) + return False + + +class FlagService: + """ + Service layer for feature flags and execution modes. + All methods are synchronous (called from FastAPI sync routes). + Redis calls are wrapped in try/except — a Redis outage never crashes the API. + """ + + def __init__(self, redis_url: str) -> None: + self._redis_url = redis_url + + @property + def _redis(self) -> aioredis.Redis: + return _get_redis(self._redis_url) + + # ── Cache helpers ───────────────────────────────────────────────────────── + + def _try_cache_get(self, key: str) -> dict | None: + """Synchronous Redis GET (creates a new event loop if needed for sync context).""" + import asyncio + try: + loop = asyncio.get_event_loop() + except RuntimeError: + loop = asyncio.new_event_loop() + try: + raw = loop.run_until_complete(self._redis.get(key)) + if raw: + return json.loads(raw) + except Exception as exc: + logger.warning("Flag cache GET error for %r: %s", key, exc) + return None + + def _try_cache_set(self, key: str, value: dict, ttl: int) -> None: + import asyncio + try: + loop = asyncio.get_event_loop() + except RuntimeError: + loop = asyncio.new_event_loop() + try: + loop.run_until_complete(self._redis.setex(key, ttl, json.dumps(value))) + except Exception as exc: + logger.warning("Flag cache SET error for %r: %s", key, exc) + + def _invalidate(self, *keys: str) -> None: + import asyncio + try: + loop = asyncio.get_event_loop() + except RuntimeError: + loop = asyncio.new_event_loop() + try: + loop.run_until_complete(self._redis.delete(*keys)) + except Exception as exc: + logger.warning("Flag cache DELETE error for %r: %s", keys, exc) + + # ── Audit log ───────────────────────────────────────────────────────────── + + def _write_audit( + self, + session: Session, + flag_name: str, + actor: str, + old_value: Any, + new_value: Any, + ) -> None: + audit = FeatureFlagAuditLog( + flag_name=flag_name, + actor=actor, + old_value=old_value, + new_value=new_value, + changed_at=datetime.utcnow(), + ) + session.add(audit) + # caller is responsible for commit + + # ── Feature Flag CRUD ───────────────────────────────────────────────────── + + def list_all(self) -> list[FeatureFlag]: + """Return all flags from DB (no cache — always fresh for UI display).""" + with Session(engine) as session: + return session.exec(select(FeatureFlag)).all() + + def get_map(self) -> dict[str, Any]: + """ + Return {name: value} dict for all flags, using cache when warm. + Used by FlagBridge inside the agent. + """ + cached = self._try_cache_get("flag:all") + if cached is not None: + return cached + + with Session(engine) as session: + flags = session.exec(select(FeatureFlag)).all() + + flag_map = {f.name: f.value for f in flags} + self._try_cache_set("flag:all", flag_map, FLAG_CACHE_TTL) + return flag_map + + def set(self, name: str, value: Any, actor: str) -> FeatureFlag: + """ + Upsert a flag value. Validates type, writes audit log, invalidates cache. + Raises HTTPException(422) on type mismatch, HTTPException(404) if flag unknown. + """ + with Session(engine) as session: + flag = session.get(FeatureFlag, name) + if flag is None: + raise HTTPException(status_code=404, detail=f"Flag '{name}' not found") + + if not validate_flag_type(value, flag.type): + raise HTTPException( + status_code=422, + detail=( + f"Type mismatch: flag '{name}' expects type '{flag.type}', " + f"but received value of Python type '{type(value).__name__}'" + ), + ) + + old_value = flag.value + flag.value = value + flag.last_modified_by = actor + flag.last_modified_at = datetime.utcnow() + session.add(flag) + self._write_audit(session, name, actor, old_value, value) + session.commit() + session.refresh(flag) + + self._invalidate("flag:all") + return flag + + def delete(self, name: str, actor: str) -> None: + """ + Reset a flag to its env-var default by deleting the DB row. + Writes audit log with new_value=None to mark the reset. + """ + with Session(engine) as session: + flag = session.get(FeatureFlag, name) + if flag is None: + raise HTTPException(status_code=404, detail=f"Flag '{name}' not found") + + old_value = flag.value + self._write_audit(session, name, actor, old_value, None) + # Reset: clear value rather than deleting the row so metadata is preserved + flag.value = None + flag.last_modified_by = actor + flag.last_modified_at = datetime.utcnow() + session.add(flag) + session.commit() + + self._invalidate("flag:all") + + # ── Execution Mode CRUD ─────────────────────────────────────────────────── + + def list_modes(self) -> list[ExecutionMode]: + with Session(engine) as session: + return session.exec(select(ExecutionMode)).all() + + def get_mode(self, name: str) -> ExecutionMode | None: + with Session(engine) as session: + return session.get(ExecutionMode, name) + + def get_mode_overrides(self, name: str) -> dict[str, Any]: + """Return the flag_overrides dict for a named mode, with caching.""" + cache_key = f"mode:{name}" + cached = self._try_cache_get(cache_key) + if cached is not None: + return cached + + with Session(engine) as session: + mode = session.get(ExecutionMode, name) + + if mode is None: + return {} + overrides = mode.flag_overrides or {} + self._try_cache_set(cache_key, overrides, MODE_CACHE_TTL) + return overrides + + def upsert_mode(self, name: str, data: ExecutionModeUpsert, actor: str) -> ExecutionMode: + with Session(engine) as session: + mode = session.get(ExecutionMode, name) + if mode is None: + mode = ExecutionMode(name=name, created_by=actor) + mode.description = data.description + mode.flag_overrides = data.flag_overrides + mode.is_active = data.is_active + mode.updated_at = datetime.utcnow() + session.add(mode) + session.commit() + session.refresh(mode) + + self._invalidate(f"mode:{name}") + return mode + + def delete_mode(self, name: str) -> None: + with Session(engine) as session: + mode = session.get(ExecutionMode, name) + if mode is None: + raise HTTPException(status_code=404, detail=f"Execution mode '{name}' not found") + session.delete(mode) + session.commit() + + self._invalidate(f"mode:{name}") diff --git a/backend/tests/conftest.py b/backend/tests/conftest.py index 3c5f48d..a26ce28 100644 --- a/backend/tests/conftest.py +++ b/backend/tests/conftest.py @@ -80,6 +80,7 @@ def test_engine(setup_test_db): with engine.connect() as conn: conn.execute(text("CREATE EXTENSION IF NOT EXISTS vector")) conn.execute(text("CREATE SCHEMA IF NOT EXISTS security")) + conn.execute(text("CREATE SCHEMA IF NOT EXISTS config")) conn.commit() # Create all tables programmatically diff --git a/core/src/core/models/models.py b/core/src/core/models/models.py index 1a876d2..b95fff5 100644 --- a/core/src/core/models/models.py +++ b/core/src/core/models/models.py @@ -768,3 +768,90 @@ class HttpExtractorRead(SQLModel): status: ExtractorStatus created_at: datetime updated_at: datetime + + +# ───────────────────────────────────────────────────────────────────────────── +# CONFIG SCHEMA: FEATURE FLAGS & EXECUTION MODES (G4) +# ───────────────────────────────────────────────────────────────────────────── + + +class FeatureFlag(SQLModel, table=True): + """ + A single runtime-configurable parameter. + Stored in the config schema so it's logically separated from app data. + A *missing* row means "no DB override" — callers fall back to the + AgentSettings env-var default. + """ + + __tablename__ = "feature_flags" + __table_args__ = {"schema": "config"} + + name: str = Field(primary_key=True) + value: Any | None = Field(default=None, sa_column=Column(JSON)) + type: str = Field(description="bool | int | float | string | json") + description: str = Field(default="") + owner: str = Field(default="") + last_modified_by: str = Field(default="") + last_modified_at: datetime = Field(default_factory=datetime.utcnow) + + +class FeatureFlagRead(SQLModel): + name: str + value: Any | None + type: str + description: str + owner: str + last_modified_by: str + last_modified_at: datetime + + +class FeatureFlagUpdate(SQLModel): + value: Any + + +class FeatureFlagAuditLog(SQLModel, table=True): + """Immutable audit trail for every flag mutation.""" + + __tablename__ = "feature_flag_audit_log" + __table_args__ = {"schema": "config"} + + id: str = Field(default_factory=lambda: str(uuid.uuid4()), primary_key=True) + flag_name: str = Field(index=True) + actor: str + old_value: Any | None = Field(default=None, sa_column=Column(JSON)) + new_value: Any | None = Field(default=None, sa_column=Column(JSON)) + changed_at: datetime = Field(default_factory=datetime.utcnow) + + +class ExecutionMode(SQLModel, table=True): + """ + A named set of flag overrides that DS researchers select by name + when calling the MCP agent tool (execution_mode="cost_saving"). + """ + + __tablename__ = "execution_modes" + __table_args__ = {"schema": "config"} + + name: str = Field(primary_key=True) + description: str = Field(default="") + flag_overrides: Any = Field(default_factory=dict, sa_column=Column(JSON)) + is_active: bool = Field(default=True) + created_by: str = Field(default="system") + created_at: datetime = Field(default_factory=datetime.utcnow) + updated_at: datetime = Field(default_factory=datetime.utcnow) + + +class ExecutionModeRead(SQLModel): + name: str + description: str + flag_overrides: dict + is_active: bool + created_by: str + created_at: datetime + updated_at: datetime + + +class ExecutionModeUpsert(SQLModel): + description: str = "" + flag_overrides: dict = Field(default_factory=dict) + is_active: bool = True diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx index 1af1e4f..286d2eb 100644 --- a/frontend/src/App.tsx +++ b/frontend/src/App.tsx @@ -16,6 +16,7 @@ import { AgentTestingPage } from './pages/AgentTestingPage'; import { AnalyticsPage } from './pages/AnalyticsPage'; import { ControlCenterPage } from './pages/ControlCenterPage'; import { EvaluationsPage } from './pages/EvaluationsPage'; +import { FlagsPage } from './pages/FlagsPage'; import { LandingPage } from './pages/LandingPage'; import { ScopesPage } from './pages/ScopesPage'; import { useAdminStore } from './store/adminStore'; @@ -85,6 +86,14 @@ function AppLayout() { } /> + + + + } + /> {/* Catch-all redirect for unmatched inner routes */} } /> diff --git a/frontend/src/api/flags.ts b/frontend/src/api/flags.ts new file mode 100644 index 0000000..110d923 --- /dev/null +++ b/frontend/src/api/flags.ts @@ -0,0 +1,76 @@ +import { API_BASE_URL } from '../config/constants'; +import { useAdminStore } from '../store/adminStore'; + +const fetchWithAdminEmail = async (url: string, options: RequestInit = {}) => { + const user = useAdminStore.getState().user; + if (!user?.email) throw new Error('Not authenticated'); + + const headers = new Headers(options.headers || {}); + headers.set('X-Admin-Email', user.email); + headers.set('Content-Type', 'application/json'); + + const response = await fetch(`${API_BASE_URL}${url}`, { ...options, headers }); + + if (response.status === 403) { + useAdminStore.getState().logout(); + const err = await response.json().catch(() => null); + throw new Error(err?.detail || 'Forbidden'); + } + if (!response.ok) { + const err = await response.json().catch(() => null); + throw new Error(err?.detail || 'Request failed'); + } + if (response.status === 204) return null; + return response.json(); +}; + +export type FlagType = 'bool' | 'int' | 'float' | 'string' | 'json'; + +export interface FeatureFlag { + name: string; + value: unknown; + type: FlagType; + description: string; + owner: string; + last_modified_by: string; + last_modified_at: string; +} + +export interface ExecutionMode { + name: string; + description: string; + flag_overrides: Record; + is_active: boolean; + created_by: string; + created_at: string; + updated_at: string; +} + +export const flagsApi = { + // ── Feature Flags ───────────────────────────────────────────────────────── + list: (): Promise => fetchWithAdminEmail('/flags/'), + + update: (name: string, value: unknown): Promise => + fetchWithAdminEmail(`/flags/${name}`, { + method: 'PATCH', + body: JSON.stringify({ value }), + }), + + reset: (name: string): Promise => + fetchWithAdminEmail(`/flags/${name}`, { method: 'DELETE' }), + + // ── Execution Modes ─────────────────────────────────────────────────────── + listModes: (): Promise => fetchWithAdminEmail('/flags/modes/'), + + getMode: (name: string): Promise => + fetchWithAdminEmail(`/flags/modes/${name}`), + + upsertMode: (name: string, data: Partial): Promise => + fetchWithAdminEmail(`/flags/modes/${name}`, { + method: 'PUT', + body: JSON.stringify(data), + }), + + deleteMode: (name: string): Promise => + fetchWithAdminEmail(`/flags/modes/${name}`, { method: 'DELETE' }), +}; diff --git a/frontend/src/components/layout/Sidebar.tsx b/frontend/src/components/layout/Sidebar.tsx index 9b60296..941801c 100644 --- a/frontend/src/components/layout/Sidebar.tsx +++ b/frontend/src/components/layout/Sidebar.tsx @@ -8,6 +8,7 @@ import { Database, LayoutDashboard, Shield, + SlidersHorizontal, } from 'lucide-react'; import { orchestrationApi } from '../../api/orchestration'; @@ -49,7 +50,10 @@ const NAV_GROUPS = [ }, { label: 'Administration', - items: [{ to: '/admin', icon: Shield, key: 'nav.admin', label: 'Admin Panel' }], + items: [ + { to: '/admin', icon: Shield, key: 'nav.admin', label: 'Admin Panel' }, + { to: '/flags', icon: SlidersHorizontal, key: 'nav.flags', label: 'Feature Flags' }, + ], }, ]; diff --git a/frontend/src/config/constants.ts b/frontend/src/config/constants.ts index 2456e0e..662d657 100644 --- a/frontend/src/config/constants.ts +++ b/frontend/src/config/constants.ts @@ -26,6 +26,8 @@ export const QUERY_KEYS = { ALERTS: 'alerts', TRENDS: 'trends', TABLE_ANALYTICS: 'table-analytics', + FLAGS: 'flags', + EXECUTION_MODES: 'execution-modes', } as const; // Default pagination values diff --git a/frontend/src/pages/FlagsPage.css b/frontend/src/pages/FlagsPage.css new file mode 100644 index 0000000..44f66ed --- /dev/null +++ b/frontend/src/pages/FlagsPage.css @@ -0,0 +1,396 @@ +/* ── FlagsPage Styles ────────────────────────────────────────────────────── */ + +.flags-tabs { + display: flex; + gap: 0; + border-bottom: 1px solid var(--color-border); + margin-bottom: var(--space-6); +} + +.flags-tab { + background: none; + border: none; + border-bottom: 2px solid transparent; + color: var(--color-text-secondary); + cursor: pointer; + font-size: 14px; + padding: 10px 20px; + transition: all 0.15s; + display: flex; + align-items: center; + gap: 4px; +} + +.flags-tab:hover { + color: var(--color-text-primary); + background: var(--color-surface-secondary); +} + +.flags-tab--active { + border-bottom-color: var(--color-accent-primary); + color: var(--color-text-primary); + font-weight: 600; +} + +/* ── Toolbar ─────────────────────────────────────────────────────────────── */ + +.flags-toolbar { + display: flex; + align-items: center; + justify-content: space-between; + gap: var(--space-4); + margin-bottom: var(--space-5); + flex-wrap: wrap; +} + +.flags-toolbar__info { + display: flex; + align-items: center; + gap: 6px; + color: var(--color-text-tertiary); + font-size: 12px; +} + +/* ── Collapse ────────────────────────────────────────────────────────────── */ + +.flags-collapse .ant-collapse-item { + border: 1px solid var(--color-border) !important; + border-radius: 8px !important; + margin-bottom: 8px; + overflow: hidden; +} + +.flags-collapse .ant-collapse-header { + background: var(--color-surface-secondary) !important; + padding: 10px 16px !important; +} + +.collapse-header { + display: flex; + align-items: center; + gap: 10px; + font-weight: 600; + font-size: 13px; +} + +/* ── Flag Row ────────────────────────────────────────────────────────────── */ + +.flags-list { + display: flex; + flex-direction: column; + gap: 0; +} + +.flag-row { + display: grid; + grid-template-columns: 320px 1fr auto auto; + align-items: center; + gap: 12px; + padding: 10px 16px; + border-top: 1px solid var(--color-border); + transition: background 0.1s; +} + +.flag-row:first-child { + border-top: none; +} + +.flag-row:hover { + background: var(--color-surface-hover); +} + +.flag-row__meta { + display: flex; + align-items: center; + gap: 8px; + min-width: 0; +} + +.flag-row__name { + font-family: 'JetBrains Mono', 'Fira Code', monospace; + font-size: 12px; + font-weight: 600; + color: var(--color-text-primary); + white-space: nowrap; + overflow: hidden; + text-overflow: ellipsis; +} + +.flag-row__desc { + font-size: 12px; + color: var(--color-text-secondary); + min-width: 0; + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; +} + +.flag-row__value { + display: flex; + align-items: center; +} + +.flag-row__display { + display: flex; + align-items: center; + gap: 6px; +} + +.flag-row__edit-controls { + display: flex; + align-items: center; + gap: 4px; +} + +.flag-value { + font-family: 'JetBrains Mono', monospace; + font-size: 12px; + background: var(--color-surface-secondary); + padding: 2px 6px; + border-radius: 4px; + color: var(--color-accent-primary); +} + +.flag-value--null { + color: var(--color-text-tertiary); + font-style: italic; + font-size: 12px; +} + +.flag-row__actions { + display: flex; + align-items: center; + gap: 6px; + white-space: nowrap; +} + +.flag-row__owner { + font-size: 11px; + color: var(--color-text-tertiary); +} + +.flag-row__modifier { + font-size: 11px; + color: var(--color-text-tertiary); + font-style: italic; +} + +/* ── Loading ─────────────────────────────────────────────────────────────── */ + +.flags-loading { + text-align: center; + padding: 48px; + color: var(--color-text-tertiary); +} + +/* ── Execution Modes ─────────────────────────────────────────────────────── */ + +.modes-description { + color: var(--color-text-secondary); + font-size: 13px; + margin: 0; +} + +.modes-description code { + background: var(--color-surface-secondary); + padding: 1px 6px; + border-radius: 4px; + font-size: 12px; +} + +.modes-grid { + display: grid; + grid-template-columns: repeat(auto-fill, minmax(320px, 1fr)); + gap: 16px; +} + +.mode-card { + background: var(--color-surface-primary); + border: 1px solid var(--color-border); + border-radius: 10px; + padding: 16px; + transition: box-shadow 0.15s; +} + +.mode-card:hover { + box-shadow: 0 4px 16px rgba(0, 0, 0, 0.15); +} + +.mode-card--inactive { + opacity: 0.6; +} + +.mode-card__header { + display: flex; + align-items: center; + justify-content: space-between; + margin-bottom: 8px; +} + +.mode-card__title { + display: flex; + align-items: center; + gap: 8px; + font-weight: 600; + font-size: 14px; + color: var(--color-text-primary); +} + +.mode-card__icon { + color: var(--color-accent-primary); +} + +.mode-card__actions { + display: flex; + gap: 2px; +} + +.mode-card__desc { + font-size: 12px; + color: var(--color-text-secondary); + margin-bottom: 12px; + line-height: 1.5; +} + +.mode-card__overrides { + display: flex; + flex-wrap: wrap; + gap: 4px; + align-items: center; +} + +.mode-card__override-count { + font-size: 11px; + color: var(--color-text-tertiary); + margin-right: 4px; +} + +/* ── Mode Form ───────────────────────────────────────────────────────────── */ + +.mode-form { + display: flex; + flex-direction: column; + gap: 16px; +} + +.mode-form__field { + display: flex; + flex-direction: column; + gap: 6px; +} + +.mode-form__field label { + font-size: 13px; + font-weight: 600; + color: var(--color-text-primary); +} + +.mode-form__field--inline { + flex-direction: row; + align-items: center; + justify-content: space-between; +} + +.mode-form__hint { + font-size: 11px; + color: var(--color-text-tertiary); +} + +.required { + color: #ff4d4f; + margin-left: 2px; +} + +/* ── Overrides Builder Styles ────────────────────────────────────────────── */ + +.overrides-builder { + border: 1px solid var(--color-border); + border-radius: 8px; + background: var(--color-surface-secondary); + padding: 12px; +} + +.overrides-builder-list { + display: flex; + flex-direction: column; + gap: 8px; + max-height: 250px; + overflow-y: auto; + margin-bottom: 12px; + padding-right: 4px; +} + +.override-builder-item { + display: flex; + align-items: center; + justify-content: space-between; + gap: 12px; + padding: 10px 14px; + background: var(--color-surface-primary); + border: 1px solid var(--color-border); + border-radius: 6px; + transition: border-color 0.15s, box-shadow 0.15s; +} + +.override-builder-item:hover { + border-color: var(--color-accent-primary); +} + +.override-builder-item__info { + display: flex; + flex-direction: column; + gap: 2px; + flex: 1; + min-width: 0; +} + +.override-builder-item__name { + font-family: 'JetBrains Mono', 'Fira Code', monospace; + font-size: 12px; + font-weight: 600; + color: var(--color-text-primary); + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; +} + +.override-builder-item__type { + font-size: 10px; + color: var(--color-text-tertiary); + display: flex; + align-items: center; + gap: 6px; +} + +.override-builder-item__value { + min-width: 180px; + display: flex; + justify-content: flex-end; +} + +.override-builder-item__delete { + color: var(--color-text-tertiary); + transition: color 0.15s; +} + +.override-builder-item__delete:hover { + color: #ff4d4f; +} + +.overrides-builder-empty { + text-align: center; + padding: 24px; + border: 1px dashed var(--color-border); + border-radius: 6px; + color: var(--color-text-tertiary); + font-size: 12px; + background: var(--color-surface-primary); +} + +.overrides-add-control { + display: flex; + align-items: center; + gap: 8px; + margin-top: 8px; +} + diff --git a/frontend/src/pages/FlagsPage.tsx b/frontend/src/pages/FlagsPage.tsx new file mode 100644 index 0000000..c440213 --- /dev/null +++ b/frontend/src/pages/FlagsPage.tsx @@ -0,0 +1,733 @@ +import { useState, useMemo } from 'react'; +import { + Badge, + Button, + Collapse, + Input, + InputNumber, + message, + Modal, + Popconfirm, + Select, + Switch, + Tag, + Tooltip, +} from 'antd'; +import { + AlertTriangle, + ChevronDown, + Edit3, + Plus, + RotateCcw, + Save, + SlidersHorizontal, + Trash2, + X, + Zap, +} from 'lucide-react'; +import { useMutation, useQuery, useQueryClient } from '@tanstack/react-query'; + +import { flagsApi, type FeatureFlag, type ExecutionMode } from '../api/flags'; +import { QUERY_KEYS } from '../config/constants'; +import { useAdminStore } from '../store/adminStore'; +import './FlagsPage.css'; + +// ── Flag category grouping ───────────────────────────────────────────────── + +const FLAG_CATEGORIES: Record = { + Extraction: [ + 'EXTRACTOR_MODEL', 'EXTRACTOR_TEMPERATURE', 'EXTRACTOR_TOP_K_TABLES', 'TABLE_SCOPING_MODE', + ], + 'Schema Explorer': [ + 'MAX_PROFILES_TO_FETCH', 'PROFILE_FETCH_CONCURRENCY', 'SCHEMA_CACHE_TTL', 'PROFILE_CACHE_TTL', + 'SCHEMA_SEMANTIC_TYPING', 'SCHEMA_JOIN_GRAPH', 'SCHEMA_SUMMARIZATION', 'SCHEMA_AMBIGUITY_DETECT', + 'SCHEMA_SUMMARY_MODEL', 'SCHEMA_TOP_K_JOINS', + ], + 'Query Builder': ['QUERY_BUILDER_MODEL', 'QUERY_BUILDER_TEMPERATURE'], + Refiner: ['MAX_REFINER_ITERATIONS', 'MAX_SCHEMA_REPLAN_ITERATIONS', 'REFINER_MODEL'], + 'Satisfaction Check': [ + 'SATISFACTION_CHECK_ENABLED', 'SATISFACTION_CHECK_EXECUTION', 'SATISFACTION_CHECK_PLAUSIBILITY', + 'SATISFACTION_CHECK_COLUMNS', 'SATISFACTION_CHECK_SEMANTIC', 'SATISFACTION_MIN_ROWS', + 'SATISFACTION_MAX_ROWS', 'SATISFACTION_SEMANTIC_THRESHOLD', 'SATISFACTION_JUDGE_MODEL', + ], + Skills: ['SKILLS_ENABLED', 'SKILLS_HOT_RELOAD', 'SKILLS_CACHE_TTL'], + Evaluation: ['LLM_JUDGE_ENABLED', 'EVAL_PARALLEL_WORKERS', 'EVAL_JUDGE_MODEL'], + 'Catalog Validation': ['CATALOG_VALIDATION_ENABLED', 'CATALOG_CACHE_TTL'], +}; + +const TYPE_COLORS: Record = { + bool: 'blue', int: 'green', float: 'cyan', string: 'purple', json: 'orange', +}; + +// ── Inline Flag Editor ───────────────────────────────────────────────────── + +function FlagEditor({ + flag, + onSave, + onReset, + isSaving, + isResetting, +}: { + flag: FeatureFlag; + onSave: (name: string, value: unknown) => void; + onReset: (name: string) => void; + isSaving: boolean; + isResetting: boolean; +}) { + const [editing, setEditing] = useState(false); + const [draft, setDraft] = useState(flag.value); + + const handleSave = () => { + onSave(flag.name, draft); + setEditing(false); + }; + + const handleCancel = () => { + setDraft(flag.value); + setEditing(false); + }; + + const displayValue = () => { + if (flag.value === null || flag.value === undefined) { + return env default; + } + if (flag.type === 'bool') { + return ( + onSave(flag.name, v)} + loading={isSaving} + /> + ); + } + return {JSON.stringify(flag.value)}; + }; + + const editor = () => { + if (flag.type === 'bool') return null; // bool uses switch directly + if (flag.type === 'int') { + return ( + setDraft(v)} + size="small" + style={{ width: 120 }} + /> + ); + } + if (flag.type === 'float') { + return ( + setDraft(v)} + size="small" + style={{ width: 120 }} + /> + ); + } + if (flag.type === 'json') { + return ( + { + try { setDraft(JSON.parse(e.target.value)); } catch { setDraft(e.target.value); } + }} + autoSize={{ minRows: 2, maxRows: 6 }} + style={{ width: 280, fontFamily: 'monospace', fontSize: 12 }} + /> + ); + } + return ( + setDraft(e.target.value)} + size="small" + style={{ width: 200 }} + /> + ); + }; + + return ( +
+
+ {flag.name} + {flag.type} +
+ +
{flag.description}
+ +
+ {editing && flag.type !== 'bool' ? ( +
+ {editor()} + +
+ ) : ( +
+ {displayValue()} + {flag.type !== 'bool' && ( + +
+ )} +
+ +
+ {flag.owner} + {flag.last_modified_by && flag.last_modified_by !== 'seed' && ( + by {flag.last_modified_by} + )} + onReset(flag.name)} + okText="Reset" + okButtonProps={{ danger: true }} + > + +
+
+ ); +} + +// ── Execution Mode Card ──────────────────────────────────────────────────── + +function ModeCard({ + mode, + onEdit, + onDelete, +}: { + mode: ExecutionMode; + onEdit: () => void; + onDelete: () => void; +}) { + const overrideCount = Object.keys(mode.flag_overrides || {}).length; + return ( +
+
+
+ + {mode.name} + {!mode.is_active && } +
+
+
+
+

{mode.description || No description}

+
+ {overrideCount} flag override{overrideCount !== 1 ? 's' : ''} + {Object.entries(mode.flag_overrides || {}).slice(0, 4).map(([k, v]) => ( + {k}: {JSON.stringify(v)} + ))} + {overrideCount > 4 && +{overrideCount - 4} more} +
+
+ Created by: {mode.created_by || 'system'} + Updated: {new Date(mode.updated_at).toLocaleDateString()} +
+
+ ); +} + +export function FlagsPage() { + const queryClient = useQueryClient(); + const user = useAdminStore((state) => state.user); + + const [activeTab, setActiveTab] = useState('flags'); + const [search, setSearch] = useState(''); + const [modeModalOpen, setModeModalOpen] = useState(false); + const [editingMode, setEditingMode] = useState(null); + const [modeDraft, setModeDraft] = useState({ name: '', description: '', flag_overrides: '{}', is_active: true }); + const [savingFlags, setSavingFlags] = useState>({}); + const [resettingFlags, setResettingFlags] = useState>({}); + + // Overrides list for the interactive builder + const [overridesList, setOverridesList] = useState<{ name: string; value: any }[]>([]); + const [selectedFlagToAdd, setSelectedFlagToAdd] = useState(null); + + const { data: flags = [], isLoading: flagsLoading } = useQuery({ + queryKey: [QUERY_KEYS.FLAGS], + queryFn: flagsApi.list, + staleTime: 30_000, + }); + + const { data: modes = [], isLoading: modesLoading } = useQuery({ + queryKey: [QUERY_KEYS.EXECUTION_MODES], + queryFn: flagsApi.listModes, + staleTime: 30_000, + }); + + // Get all flags that are not yet overridden + const availableFlags = useMemo(() => { + const overriddenNames = new Set(overridesList.map(o => o.name)); + return flags.filter(f => !overriddenNames.has(f.name)); + }, [flags, overridesList]); + + const handleAddOverride = (flagName: string) => { + const flagMeta = flags.find(f => f.name === flagName); + if (!flagMeta) return; + + let defaultValue: any = ''; + if (flagMeta.type === 'bool') defaultValue = false; + else if (flagMeta.type === 'int') defaultValue = 0; + else if (flagMeta.type === 'float') defaultValue = 0.0; + else if (flagMeta.type === 'json') defaultValue = {}; + + setOverridesList([...overridesList, { name: flagName, value: defaultValue }]); + setSelectedFlagToAdd(null); + }; + + const handleUpdateOverride = (name: string, value: any) => { + setOverridesList(overridesList.map(item => item.name === name ? { ...item, value } : item)); + }; + + const handleRemoveOverride = (name: string) => { + setOverridesList(overridesList.filter(item => item.name !== name)); + }; + + const updateFlagMutation = useMutation({ + mutationFn: ({ name, value }: { name: string; value: unknown }) => flagsApi.update(name, value), + onMutate: ({ name }) => setSavingFlags((s) => ({ ...s, [name]: true })), + onSettled: (_d, _e, { name }) => setSavingFlags((s) => ({ ...s, [name]: false })), + onSuccess: (_, { name }) => { + message.success(`Flag "${name}" updated`); + queryClient.invalidateQueries({ queryKey: [QUERY_KEYS.FLAGS] }); + }, + onError: (err: Error, { name }) => { + message.error(`Failed to update "${name}": ${err.message}`); + }, + }); + + const resetFlagMutation = useMutation({ + mutationFn: (name: string) => flagsApi.reset(name), + onMutate: (name) => setResettingFlags((s) => ({ ...s, [name]: true })), + onSettled: (_d, _e, name) => setResettingFlags((s) => ({ ...s, [name]: false })), + onSuccess: (_, name) => { + message.success(`Flag "${name}" reset to env default`); + queryClient.invalidateQueries({ queryKey: [QUERY_KEYS.FLAGS] }); + }, + onError: (err: Error, name) => { + message.error(`Failed to reset "${name}": ${err.message}`); + }, + }); + + const upsertModeMutation = useMutation({ + mutationFn: ({ name, data }: { name: string; data: object }) => flagsApi.upsertMode(name, data), + onSuccess: () => { + message.success('Execution mode saved'); + queryClient.invalidateQueries({ queryKey: [QUERY_KEYS.EXECUTION_MODES] }); + setModeModalOpen(false); + }, + onError: (err: Error) => message.error(`Failed to save mode: ${err.message}`), + }); + + const deleteModeMutation = useMutation({ + mutationFn: (name: string) => flagsApi.deleteMode(name), + onSuccess: () => { + message.success('Execution mode deleted'); + queryClient.invalidateQueries({ queryKey: [QUERY_KEYS.EXECUTION_MODES] }); + }, + onError: (err: Error) => message.error(`Failed to delete mode: ${err.message}`), + }); + + // Group and filter flags + const flagMap = useMemo( + () => Object.fromEntries(flags.map((f) => [f.name, f])), + [flags], + ); + + const filteredCategories = useMemo(() => { + const q = search.toLowerCase(); + return Object.entries(FLAG_CATEGORIES).filter(([cat, names]) => { + if (!q) return true; + return cat.toLowerCase().includes(q) || names.some((n) => n.toLowerCase().includes(q)); + }).map(([cat, names]) => ({ + cat, + flags: names + .filter((n) => !q || n.toLowerCase().includes(q) || cat.toLowerCase().includes(q)) + .map((n) => flagMap[n]) + .filter(Boolean) as FeatureFlag[], + })).filter((g) => g.flags.length > 0); + }, [flags, flagMap, search]); + + const openNewMode = () => { + setEditingMode(null); + setModeDraft({ name: '', description: '', flag_overrides: '{}', is_active: true }); + setOverridesList([]); + setModeModalOpen(true); + }; + + const openEditMode = (mode: ExecutionMode) => { + setEditingMode(mode); + setModeDraft({ + name: mode.name, + description: mode.description, + flag_overrides: JSON.stringify(mode.flag_overrides, null, 2), + is_active: mode.is_active, + }); + const parsed = mode.flag_overrides || {}; + const list = Object.entries(parsed).map(([name, value]) => ({ name, value })); + setOverridesList(list); + setModeModalOpen(true); + }; + + const submitMode = () => { + // Construct overrides object from overridesList + const overridesObj: Record = {}; + for (const item of overridesList) { + overridesObj[item.name] = item.value; + } + + upsertModeMutation.mutate({ + name: modeDraft.name, + data: { + description: modeDraft.description, + flag_overrides: overridesObj, + is_active: modeDraft.is_active + }, + }); + }; + + const collapseItems = filteredCategories.map(({ cat, flags: catFlags }) => ({ + key: cat, + label: ( +
+ {cat} + +
+ ), + children: ( +
+ {catFlags.map((flag) => ( + updateFlagMutation.mutate({ name, value })} + onReset={(name) => resetFlagMutation.mutate(name)} + isSaving={!!savingFlags[flag.name]} + isResetting={!!resettingFlags[flag.name]} + /> + ))} +
+ ), + })); + + return ( +
+
+
+

+ + Feature Flags +

+

+ Configure agent parameters and execution modes without redeployment. + Changes take effect within 30 seconds. +

+
+
+ + {/* Tabs */} +
+ + +
+ + {/* ── FEATURE FLAGS TAB ─────────────────────────────────────────────── */} + {activeTab === 'flags' && ( +
+
+ } + value={search} + onChange={(e) => setSearch(e.target.value)} + allowClear + style={{ maxWidth: 340 }} + /> +
+ + Changes are audited and cached for 30s. Restart the agent process if you need immediate effect. +
+
+ + {flagsLoading ? ( +
Loading flags…
+ ) : ( + g.cat)} + ghost + className="flags-collapse" + /> + )} +
+ )} + + {/* ── EXECUTION MODES TAB ───────────────────────────────────────────── */} + {activeTab === 'modes' && ( +
+
+

+ Execution modes are named sets of flag overrides. Pass execution_mode="cost_saving" to the MCP tool to activate a preset. +

+ +
+ + {modesLoading ? ( +
Loading modes…
+ ) : ( +
+ {modes.map((mode) => ( + openEditMode(mode)} + onDelete={() => deleteModeMutation.mutate(mode.name)} + /> + ))} +
+ )} +
+ )} + + {/* ── MODE EDITOR MODAL ─────────────────────────────────────────────── */} + setModeModalOpen(false)} + okText="Save Mode" + confirmLoading={upsertModeMutation.isPending} + width={580} + > +
+ {!editingMode && ( +
+ + setModeDraft((d) => ({ ...d, name: e.target.value }))} + /> + Used as the value for execution_mode in MCP calls. +
+ )} +
+ + setModeDraft((d) => ({ ...d, description: e.target.value }))} + placeholder="What is this mode for?" + /> +
+ +
+
+ + +
+
+ + setModeDraft((d) => ({ ...d, is_active: v }))} + /> +
+
+ +
+ + +
+ {overridesList.length > 0 ? ( +
+ {overridesList.map((item) => { + const flagMeta = flags.find(f => f.name === item.name); + const flagType = flagMeta ? flagMeta.type : 'string'; + const flagDesc = flagMeta ? flagMeta.description : ''; + + return ( +
+
+ + {item.name} + + + Type: {flagType} + +
+ +
+ {flagType === 'bool' && ( + handleUpdateOverride(item.name, v)} + /> + )} + {flagType === 'int' && ( + handleUpdateOverride(item.name, v)} + style={{ width: '100%', maxWidth: 140 }} + /> + )} + {flagType === 'float' && ( + handleUpdateOverride(item.name, v)} + style={{ width: '100%', maxWidth: 140 }} + /> + )} + {flagType === 'string' && ( + handleUpdateOverride(item.name, e.target.value)} + style={{ width: '100%', maxWidth: 160 }} + /> + )} + {flagType === 'json' && ( + { + let parsedVal; + try { + parsedVal = JSON.parse(e.target.value); + } catch { + parsedVal = e.target.value; + } + handleUpdateOverride(item.name, parsedVal); + }} + style={{ fontFamily: 'monospace', fontSize: 11, width: '100%', maxWidth: 180 }} + /> + )} +
+ +
+ ); + })} +
+ ) : ( +
+ No configuration overrides configured for this mode. +
+ )} + + {availableFlags.length > 0 ? ( +
+