diff --git a/README.md b/README.md index 2d6eaabaa..d116bc328 100644 --- a/README.md +++ b/README.md @@ -63,7 +63,7 @@ Run the application using the default `docker-compose` configuration. - By default, only OpenAI and Diffbot are enabled. Gemini requires additional GCP configurations. - Use the `VITE_LLM_MODELS_PROD` variable to configure the models you need. Example: ```bash - VITE_LLM_MODELS_PROD="openai_gpt_4o,openai_gpt_4o_mini,diffbot,gemini_1.5_flash" + VITE_LLM_MODELS_PROD="openai_gpt_5.1,openai_gpt_5_mini,diffbot,gemini_2.5_flash" ``` 2. **Input Sources**: @@ -199,10 +199,10 @@ VITE_BACKEND_API_URL=${VITE_BACKEND_API_URL-backendurl} | DUPLICATE_TEXT_DISTANCE | Mandatory | 5 | This value used to find distance for all node pairs in the graph and calculated based on node properties | | DUPLICATE_SCORE_VALUE | Mandatory | 0.97 | Node score value to match duplicate node | | EFFECTIVE_SEARCH_RATIO | Mandatory | 1 | | -| GRAPH_CLEANUP_MODEL | Optional | 0.97 | Model name to clean-up graph in post processing | +| GRAPH_CLEANUP_MODEL | Optional | "openai_gpt_5_mini" | Model name to clean-up graph in post processing | | MAX_TOKEN_CHUNK_SIZE | Optional | 10000 | Maximum token size to process file content | | YOUTUBE_TRANSCRIPT_PROXY| Optional | | Proxy key to process youtube video for getting transcript | -| EMBEDDING_MODEL | Optional | all-MiniLM-L6-v2 | Model for generating the text embedding (all-MiniLM-L6-v2 , openai , vertexai) | +| EMBEDDING_MODEL | Optional | | Model for generating the text embedding (default all-MiniLM-L6-v2 , openai , vertexai, titan) | | IS_EMBEDDING | Optional | true | Flag to enable text embedding | | KNN_MIN_SCORE | Optional | 0.94 | Minimum score for KNN algorithm | | GEMINI_ENABLED | Optional | False | Flag to enable Gemini | @@ -219,7 +219,7 @@ VITE_BACKEND_API_URL=${VITE_BACKEND_API_URL-backendurl} | LANGCHAIN_ENDPOINT | Optional | https://api.smith.langchain.com | Endpoint for Langchain API | | ENTITY_EMBEDDING | Optional | False | If set to True, It will add embeddings for each entity in database | | LLM_MODEL_CONFIG_ollama_ | Optional | | Set ollama config as - model_name,model_local_url for local deployments | -| RAGAS_EMBEDDING_MODEL | Optional | openai | embedding model used by ragas evaluation framework | +| RAGAS_EMBEDDING_MODEL | Optional | | embedding model used by ragas evaluation framework | | | | **FRONTEND ENV** | VITE_BLOOM_URL | Mandatory | https://workspace-preview.neo4j.io/workspace/explore?connectURL={CONNECT_URL}&search=Show+me+a+graph&featureGenAISuggestions=true&featureGenAISuggestionsInternal=true | URL for Bloom visualization | diff --git a/backend/example.env b/backend/example.env index c0c24a90e..7d947513f 100644 --- a/backend/example.env +++ b/backend/example.env @@ -1,10 +1,10 @@ OPENAI_API_KEY = "" #This is required if you are using openai embedding model -EMBEDDING_MODEL = "all-MiniLM-L6-v2" #this can be openai or vertexai or by default all-MiniLM-L6-v2 -RAGAS_EMBEDDING_MODEL = "openai" #Keep blank if you want to use all-MiniLM-L6-v2 for ragas embeddings +EMBEDDING_MODEL = "" #values can be blank or "openai" or "vertexai" "titan" - defaults to all-MiniLM-L6-v2 +RAGAS_EMBEDDING_MODEL = "" #values can be blank or "openai" IS_EMBEDDING = "TRUE" KNN_MIN_SCORE = "0.94" # Enable Gemini (default is False) | Can be False or True -GEMINI_ENABLED = False +GEMINI_ENABLED = "False" #Keep true if you are using gemini model # Enable Google Cloud logs (default is False) | Can be False or True GCP_LOG_METRICS_ENABLED = False NUMBER_OF_CHUNKS_TO_COMBINE = 6 @@ -19,7 +19,7 @@ LANGCHAIN_API_KEY = "" LANGCHAIN_PROJECT = "" LANGCHAIN_TRACING_V2 = "" LANGCHAIN_ENDPOINT = "" -GCS_FILE_CACHE = "" #save the file into GCS or local, SHould be True or False +GCS_FILE_CACHE = "" #save the file into GCS or local, Should be True or False NEO4J_USER_AGENT="" ENABLE_USER_AGENT = "" LLM_MODEL_CONFIG_model_version="" @@ -28,30 +28,26 @@ DUPLICATE_SCORE_VALUE =0.97 DUPLICATE_TEXT_DISTANCE =3 DEFAULT_DIFFBOT_CHAT_MODEL="openai_gpt_4o" #whichever model specified here , need to add config for that model in below format) #examples -LLM_MODEL_CONFIG_openai_gpt_3.5="gpt-3.5-turbo-0125,openai_api_key" -LLM_MODEL_CONFIG_openai_gpt_4o_mini="gpt-4o-mini-2024-07-18,openai_api_key" -LLM_MODEL_CONFIG_openai_gpt_4o="gpt-4o-2024-11-20,openai_api_key" -LLM_MODEL_CONFIG_openai_gpt_4.1_mini="gpt-4.1-mini,openai_api_key" -LLM_MODEL_CONFIG_openai_gpt_4.1="gpt-4.1,openai_api_key" -LLM_MODEL_CONFIG_openai_gpt_o3_mini="o3-mini-2025-01-31,openai_api_key" -LLM_MODEL_CONFIG_gemini_1.5_pro="gemini-1.5-pro-002" -LLM_MODEL_CONFIG_gemini_1.5_flash="gemini-1.5-flash-002" -LLM_MODEL_CONFIG_gemini_2.0_flash="gemini-2.0-flash-001" +LLM_MODEL_CONFIG_openai_gpt_5.1="gpt-5.1,openai-key" +LLM_MODEL_CONFIG_openai_gpt_5_mini="gpt-5-mini,openai-key" +LLM_MODEL_CONFIG_openai_gpt_4.1="gpt-4.1,openai-key" +LLM_MODEL_CONFIG_openai_gpt_4.1_mini="gpt-4.1-mini,openai-key" +LLM_MODEL_CONFIG_gemini_2.5_flash="gemini-2.5-flash" LLM_MODEL_CONFIG_gemini_2.5_pro="gemini-2.5-pro" LLM_MODEL_CONFIG_diffbot="diffbot,diffbot_api_key" -LLM_MODEL_CONFIG_azure_ai_gpt_35="azure_deployment_name,azure_endpoint or base_url,azure_api_key,api_version" +LLM_MODEL_CONFIG_groq_llama3.1_8b="llama-3.1-8b-instant,base_url,groq_api_key" +LLM_MODEL_CONFIG_anthropic_claude_4.5_sonnet="claude-sonnet-4-5-20250929,anthropic_api_key" +LLM_MODEL_CONFIG_llama4_maverick="Llama-4-Maverick-17B-128E-Instruct-FP8,https://api.llama.com/compat/v1/,LLM|1207839134334841|NT9iYvy201sMmsOJB6spkslwoiM" LLM_MODEL_CONFIG_azure_ai_gpt_4o="gpt-4o,https://YOUR-ENDPOINT.openai.azure.com/,azure_api_key,api_version" -LLM_MODEL_CONFIG_groq_llama3_70b="model_name,base_url,groq_api_key" -LLM_MODEL_CONFIG_anthropic_claude_4_sonnet="model_name,anthropic_api_key" #model_name="claude-sonnet-4-20250514" -LLM_MODEL_CONFIG_fireworks_llama4_maverick="model_name,fireworks_api_key" +LLM_MODEL_CONFIG_fireworks_qwen3_30b="accounts/fireworks/models/qwen3-30b-a3b,fireworks_api_key" +LLM_MODEL_CONFIG_fireworks_gpt_oss="accounts/fireworks/models/gpt-oss-120b,fireworks_api_key" +LLM_MODEL_CONFIG_fireworks_deepseek_v3="accounts/fireworks/models/deepseek-v3p1,fireworks_api_key" +LLM_MODEL_CONFIG_bedrock_nova_micro_v1="amazon.nova-micro-v1:0,aws_access_key,aws_secret_key,region_name" +LLM_MODEL_CONFIG_bedrock_nova_lite_v1="amazon.nova-lite-v1:0,aws_access_key,aws_secret_key,region_name" +LLM_MODEL_CONFIG_bedrock_nova_pro_v1="amazon.nova-pro-v1:0,aws_access_key,aws_secret_key,region_name" LLM_MODEL_CONFIG_ollama_llama3="llama3_model_name,model_local_url" YOUTUBE_TRANSCRIPT_PROXY="https://user:pass@domain:port" EFFECTIVE_SEARCH_RATIO=5 -GRAPH_CLEANUP_MODEL="openai_gpt_4o" -BEDROCK_EMBEDDING_MODEL="model_name,aws_access_key,aws_secret_key,region_name" #model_name="amazon.titan-embed-text-v1" -LLM_MODEL_CONFIG_bedrock_nova_micro_v1="model_name,aws_access_key,aws_secret_key,region_name" #model_name="amazon.nova-micro-v1:0" -LLM_MODEL_CONFIG_bedrock_nova_lite_v1="model_name,aws_access_key,aws_secret_key,region_name" #model_name="amazon.nova-lite-v1:0" -LLM_MODEL_CONFIG_bedrock_nova_pro_v1="model_name,aws_access_key,aws_secret_key,region_name" #model_name="amazon.nova-pro-v1:0" -LLM_MODEL_CONFIG_fireworks_deepseek_r1="model_name,fireworks_api_key" #model_name="accounts/fireworks/models/deepseek-r1" -LLM_MODEL_CONFIG_fireworks_deepseek_v3="model_name,fireworks_api_key" #model_name="accounts/fireworks/models/deepseek-v3" +GRAPH_CLEANUP_MODEL="openai_gpt_5_mini" +BEDROCK_EMBEDDING_MODEL="model_name,aws_access_key,aws_secret_key,region_name" #model_name="amazon.titan-embed-text-v2.0" MAX_TOKEN_CHUNK_SIZE=2000 #Max token used to process/extract the file content. \ No newline at end of file diff --git a/backend/score.py b/backend/score.py index 11209a0b4..bd419f793 100644 --- a/backend/score.py +++ b/backend/score.py @@ -112,9 +112,9 @@ async def __call__(self, scope: Scope, receive: Receive, send: Send): ) app.add_middleware(SessionMiddleware, secret_key=os.urandom(24)) -is_gemini_enabled = os.environ.get("GEMINI_ENABLED", "False").lower() in ("true", "1", "yes") -if is_gemini_enabled: - add_routes(app,ChatVertexAI(), path="/vertexai") +# is_gemini_enabled = os.environ.get("GEMINI_ENABLED", "False").lower() in ("true", "1", "yes") +# if is_gemini_enabled: +# add_routes(app,ChatVertexAI(), path="/vertexai") app.add_api_route("/health", health([healthy_condition, healthy])) diff --git a/backend/src/communities.py b/backend/src/communities.py index 0ecf493cc..62195e6e7 100644 --- a/backend/src/communities.py +++ b/backend/src/communities.py @@ -14,7 +14,7 @@ MAX_WORKERS = 10 MAX_COMMUNITY_LEVELS = 3 MIN_COMMUNITY_SIZE = 1 -COMMUNITY_CREATION_DEFAULT_MODEL = "openai_gpt_4o" +COMMUNITY_CREATION_DEFAULT_MODEL = "openai_gpt_4.1" CREATE_COMMUNITY_GRAPH_PROJECTION = """ diff --git a/backend/src/graphDB_dataAccess.py b/backend/src/graphDB_dataAccess.py index 77cc9e592..9ea646f35 100644 --- a/backend/src/graphDB_dataAccess.py +++ b/backend/src/graphDB_dataAccess.py @@ -239,7 +239,6 @@ def connection_check_and_get_vector_dimensions(self,database): embedding_model = os.getenv('EMBEDDING_MODEL') embeddings, application_dimension = load_embedding_model(embedding_model) - logging.info(f'embedding model:{embeddings} and dimesion:{application_dimension}') gds_status = self.check_gds_version() write_access = self.check_account_access(database=database) diff --git a/backend/src/llm.py b/backend/src/llm.py index 854e5926f..0cdf6413c 100644 --- a/backend/src/llm.py +++ b/backend/src/llm.py @@ -50,7 +50,7 @@ def get_llm(model: str): ) elif "openai" in model: model_name, api_key = env_value.split(",") - if "o3-mini" in model: + if "mini" in model: llm= ChatOpenAI( api_key=api_key, model=model_name) @@ -189,17 +189,14 @@ async def get_graph_document_list( else: node_properties = ["description"] relationship_properties = ["description"] - TOOL_SUPPORTED_MODELS = {"qwen3", "deepseek"} model_name = get_llm_model_name(llm) - ignore_tool_usage = not any(pattern in model_name for pattern in TOOL_SUPPORTED_MODELS) - logging.info(f"Keeping ignore tool usage parameter as {ignore_tool_usage}") llm_transformer = LLMGraphTransformer( llm=llm, node_properties=node_properties, relationship_properties=relationship_properties, allowed_nodes=allowedNodes, allowed_relationships=allowedRelationship, - ignore_tool_usage=ignore_tool_usage, + ignore_tool_usage=True, additional_instructions=ADDITIONAL_INSTRUCTIONS+ (additional_instructions if additional_instructions else "") ) diff --git a/backend/src/post_processing.py b/backend/src/post_processing.py index 0865c5ad3..9535234e7 100644 --- a/backend/src/post_processing.py +++ b/backend/src/post_processing.py @@ -204,7 +204,7 @@ def graph_schema_consolidation(graph): messages=[("system", GRAPH_CLEANUP_PROMPT), ("human", "{input}")], partial_variables={"format_instructions": parser.get_format_instructions()} ) - graph_cleanup_model = os.getenv("GRAPH_CLEANUP_MODEL", 'openai_gpt_4o') + graph_cleanup_model = os.getenv("GRAPH_CLEANUP_MODEL") llm, _ = get_llm(graph_cleanup_model) chain = prompt | llm | parser diff --git a/backend/src/shared/common_fn.py b/backend/src/shared/common_fn.py index 13beafdb6..3737ed3fa 100644 --- a/backend/src/shared/common_fn.py +++ b/backend/src/shared/common_fn.py @@ -113,13 +113,13 @@ def load_embedding_model(embedding_model_name: str): logging.info(f"Embedding: Using OpenAI Embeddings , Dimension:{dimension}") elif embedding_model_name == "vertexai": embeddings = VertexAIEmbeddings( - model="textembedding-gecko@003" + model="gemini-embedding-001" ) - dimension = 768 + dimension = 3072 logging.info(f"Embedding: Using Vertex AI Embeddings , Dimension:{dimension}") elif embedding_model_name == "titan": embeddings = get_bedrock_embeddings() - dimension = 1536 + dimension = 1024 logging.info(f"Embedding: Using bedrock titan Embeddings , Dimension:{dimension}") else: # embeddings = HuggingFaceEmbeddings(model_name="./local_model") diff --git a/backend/test_integrationqa.py b/backend/test_integrationqa.py index f80ed2f0c..838feab5c 100644 --- a/backend/test_integrationqa.py +++ b/backend/test_integrationqa.py @@ -8,6 +8,12 @@ from dotenv import load_dotenv from src.main import * from src.QA_integration import QA_RAG +from pathlib import Path +import time +from concurrent.futures import ThreadPoolExecutor, as_completed +from tqdm import tqdm +from typing import Callable, List, Dict, Any, Tuple +import pandas as pd # Load environment variables load_dotenv() @@ -18,9 +24,12 @@ # Logging configuration logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") # Directory Paths -BASE_DIR = os.path.dirname(__file__) -CHUNK_DIR = os.path.join(BASE_DIR, "chunks") -MERGED_DIR = os.path.join(BASE_DIR, "merged_files") +BASE_DIR = Path(__file__).parent +CHUNK_DIR = BASE_DIR / "chunks" +MERGED_DIR = BASE_DIR / "merged_files" +RESULTS_DIR = BASE_DIR / "test_results" +RESULTS_DIR.mkdir(exist_ok=True) + # Initialize Neo4j connection graph = create_graph_database_connection(URI, USERNAME, PASSWORD, DATABASE) @@ -37,18 +46,32 @@ def create_source_node_local(graph, model, file_name): graphDB_data_Access.create_source_node(source_node) return source_node +def save_result_csv(data: Dict[str, Any], filename: str): + file_path = RESULTS_DIR / filename + df = pd.DataFrame([data]) + if not file_path.exists(): + df.to_csv(file_path, index=False) + else: + df.to_csv(file_path, mode='a', header=False, index=False) + +def save_result_json(data: Any, filename: str): + file_path = RESULTS_DIR / filename + tmp_path = file_path.with_suffix('.tmp') + with open(tmp_path, "w") as f: + json.dump(data, f, indent=4) + tmp_path.rename(file_path) def test_graph_from_file_local(model_name): """Tests graph creation from a local file.""" try: file_name = 'About Amazon.pdf' - merged_file_path = os.path.join(MERGED_DIR, file_name) + merged_file_path = MERGED_DIR / file_name shutil.copyfile('/workspaces/llm-graph-builder/backend/files/About Amazon.pdf', merged_file_path) graph = create_graph_database_connection(URI, USERNAME, PASSWORD, DATABASE) create_source_node_local(graph, model_name, file_name) result = asyncio.run( extract_graph_from_file_local_file( - URI, USERNAME, PASSWORD, DATABASE, model_name, merged_file_path, file_name, '', '',100,20,1, None,'' + URI, USERNAME, PASSWORD, DATABASE, model_name, str(merged_file_path), file_name, '', '',100,20,1, None,'' ) ) logging.info(f"Local file test result: {result}") @@ -98,12 +121,13 @@ def test_graph_from_youtube_video(model_name): def test_graph_website(model_name): """Tests graph creation from a Website page.""" try: - source_url = 'https://www.cloudskillsboost.google/' + source_url = 'https://www.scrapethissite.com/pages/simple/' + file_name = 'Countries of the World: A Simple Example | Scrape This Site | A public sandbox for learning web scraping-simple' graph = create_graph_database_connection(URI, USERNAME, PASSWORD, DATABASE) create_source_node_graph_web_url(graph, model_name, source_url, "web-url") result = asyncio.run( extract_graph_from_web_page( - URI, USERNAME, PASSWORD, DATABASE, model_name, source_url, "Google Cloud Skills Boost-www", '', '',100,20,1, None,'' + URI, USERNAME, PASSWORD, DATABASE, model_name, source_url, file_name, '', '', 100, 20, 1, None, '' ) ) logging.info(f"Web URL test result: {result}") @@ -119,7 +143,6 @@ def test_chatbot_qna(model_name, mode='vector'): try: graph = create_graph_database_connection(URI, USERNAME, PASSWORD, DATABASE) result = QA_RAG(graph, model_name, 'Tell me about Amazon', '[]', 1, mode) - # assert len(result['message']) > 20 logging.info(f"Chatbot QnA test passed for mode: {mode}") final_result = {'model_name':model_name,'mode':mode,'result':result} return final_result @@ -155,7 +178,7 @@ def test_populate_graph_schema_from_text(model_name): """Tests schema population from text.""" try: schema_text = "Amazon was founded on July 5, 1994, by Jeff Bezos in Bellevue, Washington." - result_schema = populate_graph_schema_from_text(schema_text, model_name, True) + result_schema = populate_graph_schema_from_text(schema_text, model_name, True, False) logging.info(f"Schema test result: {result_schema}") return result_schema except Exception as e: @@ -163,7 +186,6 @@ def test_populate_graph_schema_from_text(model_name): return {"status": "Failed", "error": str(e)} def get_duplicate_nodes(): - #graph = create_graph_database_connection(uri, userName, password, database) graphDb_data_Access = graphDBdataAccess(graph) nodes_list, total_nodes = graphDb_data_Access.get_duplicate_nodes_list() if total_nodes['total']>0: @@ -190,92 +212,174 @@ def flatten_extract_dataframe(df: pd.DataFrame): flat_df = pd.DataFrame(rows) return flat_df -def run_tests(): - """Runs all integration tests and logs results.""" - extract_list = [] - extract_error_list = [] - chatbot_list = [] - chatbot_error_list = [] - other_api_list = [] - models = ['openai_gpt_4o','openai_gpt_4o_mini','openai_gpt_4.1','openai_gpt_4.1_mini','gemini_2.0_flash','fireworks_llama4_maverick','bedrock_nova_pro_v1'] - chatbot_modes = [ - "vector", - "graph+vector", - "fulltext", - "graph+vector+fulltext", - "entity search+vector" - ] - for model_name in models: - logging.info(f"Starting tests for model: {model_name}") - # Run each test independently to capture all errors - for test_func, test_args in [ - (test_graph_from_file_local, [model_name]), - (test_graph_from_wikipedia, [model_name]), - (test_graph_from_youtube_video,[model_name]), - (test_graph_website,[model_name]), - ]: - try: - result = test_func(*test_args) - if isinstance(result, dict) and result.get("status") == "Failed": - extract_error_list.append((model_name, test_func.__name__, result.get("error", "Unknown error"))) - else: - extract_list.append(result) - except Exception as e: - logging.error(f"Error in {test_func.__name__} for {model_name}: {e}") - extract_error_list.append((model_name, test_func.__name__, str(e))) - # Run all chatbot QnA modes - for mode in chatbot_modes: - try: - result = test_chatbot_qna(model_name,mode=mode) - if isinstance(result, dict) and result.get("status") == "Failed": - chatbot_error_list.append((model_name, f"test_chatbot_qna ({mode})", result.get("error", "Unknown error"))) - else: - chatbot_list.append(result) - except Exception as e: - logging.error(f"Error in test_chatbot_qna ({mode}) for {model_name}: {e}") - chatbot_error_list.append((model_name, f"test_chatbot_qna ({mode})", str(e))) - - try: +def run_model_tests(model_name: str, chatbot_modes: List[str]) -> Dict[str, Any]: + """ + Runs all test functions for a single model, saving results incrementally. + Returns a summary dict for reporting. + """ + test_funcs: List[Tuple[Callable, List[Any], str]] = [ + (test_graph_from_file_local, [model_name], f"Extract_Integration_TestResult_{model_name}.csv"), + (test_graph_from_wikipedia, [model_name], f"Extract_Integration_TestResult_{model_name}.csv"), + (test_graph_from_youtube_video, [model_name], f"Extract_Integration_TestResult_{model_name}.csv"), + (test_graph_website, [model_name], f"Extract_Integration_TestResult_{model_name}.csv"), + ] + extract_error_list = [] + chatbot_error_list = [] + other_api_list = [] + test_results = [] + with tqdm(total=len(test_funcs), desc=f"Model: {model_name}", position=1, leave=False) as test_bar: + for test_func, test_args, result_file in test_funcs: + start_time = time.time() + try: + result = test_func(*test_args) + elapsed = time.time() - start_time + logging.info(f"{test_func.__name__} for {model_name} completed in {elapsed:.2f} seconds.") + result_with_time = result.copy() if isinstance(result, dict) else {"result": result} + result_with_time["time_taken_sec"] = round(elapsed, 2) + result_with_time["test_function"] = test_func.__name__ + save_result_csv(result_with_time, result_file) + test_results.append(result_with_time) + if isinstance(result, dict) and result.get("status") == "Failed": + extract_error_list.append((model_name, test_func.__name__, result.get("error", "Unknown error"), round(elapsed, 2))) + except Exception as e: + elapsed = time.time() - start_time + logging.error(f"Error in {test_func.__name__} for {model_name}: {e} (Time taken: {elapsed:.2f}s)") + extract_error_list.append((model_name, test_func.__name__, str(e), round(elapsed, 2))) + save_result_csv({"model": model_name, "function": test_func.__name__, "error": str(e), "time_taken_sec": round(elapsed, 2)}, result_file) + test_bar.update(1) + # Chatbot tests + with tqdm(total=len(chatbot_modes), desc=f"Chatbot: {model_name}", position=2, leave=False) as chatbot_bar: + for mode in chatbot_modes: + start_time = time.time() + try: + result = test_chatbot_qna(model_name, mode=mode) + elapsed = time.time() - start_time + logging.info(f"test_chatbot_qna ({mode}) for {model_name} completed in {elapsed:.2f} seconds.") + result_with_time = result.copy() if isinstance(result, dict) else {"result": result} + result_with_time["time_taken_sec"] = round(elapsed, 2) + result_with_time["mode"] = mode + save_result_csv(result_with_time, f"chatbot_Integration_TestResult_{model_name}.csv") + test_results.append(result_with_time) + if isinstance(result, dict) and result.get("status") == "Failed": + chatbot_error_list.append((model_name, f"test_chatbot_qna ({mode})", result.get("error", "Unknown error"), round(elapsed, 2))) + except Exception as e: + elapsed = time.time() - start_time + logging.error(f"Error in test_chatbot_qna ({mode}) for {model_name}: {e} (Time taken: {elapsed:.2f}s)") + chatbot_error_list.append((model_name, f"test_chatbot_qna ({mode})", str(e), round(elapsed, 2))) + save_result_csv({"model": model_name, "function": "test_chatbot_qna", "mode": mode, "error": str(e), "time_taken_sec": round(elapsed, 2)}, f"chatbot_Integration_TestResult_{model_name}.csv") + chatbot_bar.update(1) + # Schema test + start_time = time.time() + try: schema_result = test_populate_graph_schema_from_text(model_name) - other_api_list.append({f"{model_name}":schema_result}) - except Exception as e: - logging.error(f"Error in test_populate_graph_schema_from_text for {model_name}: {e}") - other_api_list.append({f"{model_name}":str(e)}) - # Handle disconnected nodes separately - try: - dis_elementid, dis_status = get_disconnected_nodes() - delete_status = delete_disconnected_nodes([dis_elementid]) if dis_elementid else "No disconnected nodes found" - except Exception as e: - dis_status, delete_status = "Error fetching nodes", "Error deleting nodes" - logging.error(f"Error handling disconnected nodes: {e}") - - try: - dup = get_duplicate_nodes() - except Exception as e: - dup = "Error getting duplicate nodes" - logging.error(f"Error getting duplicate nodes: {e}") - # Convert results to DataFrame - df_extract = pd.DataFrame(extract_list) - df_extract['execution_date'] = dt.today().strftime('%Y-%m-%d') - df_extract.to_csv(f"test_results/Extract_Integration_TestResult_{dt.now().strftime('%Y%m%d_%H%M%S')}.csv", index=False) + elapsed = time.time() - start_time + logging.info(f"test_populate_graph_schema_from_text for {model_name} completed in {elapsed:.2f} seconds.") + schema_result_with_time = schema_result.copy() if isinstance(schema_result, dict) else {"result": schema_result} + schema_result_with_time["time_taken_sec"] = round(elapsed, 2) + save_result_json(schema_result_with_time, f"schema_result_{model_name}.json") + other_api_list.append({f"{model_name}": schema_result_with_time}) + except Exception as e: + elapsed = time.time() - start_time + logging.error(f"Error in test_populate_graph_schema_from_text for {model_name}: {e} (Time taken: {elapsed:.2f}s)") + other_api_list.append({f"{model_name}": str(e)}) + save_result_json({"model": model_name, "error": str(e), "time_taken_sec": round(elapsed, 2)}, f"schema_result_{model_name}.json") + return { + "model": model_name, + "extract_errors": extract_error_list, + "chatbot_errors": chatbot_error_list, + "other_api": other_api_list, + "test_results": test_results + } - df_chatbot = pd.DataFrame(chatbot_list) - df_chatbot['execution_date'] = dt.today().strftime('%Y-%m-%d') - df_chatbot.to_csv(f"test_results/chatbot_Integration_TestResult_{dt.now().strftime('%Y%m%d_%H%M%S')}.csv", index=False) +def run_tests_sequential(models: List[str], chatbot_modes: List[str]) -> None: + """ + Runs all model tests sequentially, without progress bars, and generates a summary report. + """ + all_summaries = [] + for idx, model in enumerate(models): + logging.info(f"Running tests for model {idx+1}/{len(models)}: {model}") + summary = run_model_tests(model, chatbot_modes) + all_summaries.append(summary) + # Handle disconnected nodes and duplicates (single-threaded, after all models) + start_time = time.time() + try: + dis_elementid, dis_status = get_disconnected_nodes() + delete_status = delete_disconnected_nodes([dis_elementid]) if dis_elementid else "No disconnected nodes found" + elapsed = time.time() - start_time + save_result_json({"disconnected_nodes": dis_status, "delete_status": delete_status, "time_taken_sec": round(elapsed, 2)}, "disconnected_nodes.json") + except Exception as e: + elapsed = time.time() - start_time + save_result_json({"error": str(e), "time_taken_sec": round(elapsed, 2)}, "disconnected_nodes.json") + start_time = time.time() + try: + dup = get_duplicate_nodes() + elapsed = time.time() - start_time + save_result_json({"duplicate_nodes": dup, "time_taken_sec": round(elapsed, 2)}, "duplicate_nodes.json") + except Exception as e: + elapsed = time.time() - start_time + save_result_json({"error": str(e), "time_taken_sec": round(elapsed, 2)}, "duplicate_nodes.json") + # Save errors incrementally + for summary in all_summaries: + if summary["extract_errors"]: + df_errors = pd.DataFrame(summary["extract_errors"], columns=['Model', 'Function', 'Error', 'TimeTakenSec']) + df_errors['execution_date'] = dt.today().strftime('%Y-%m-%d') + df_errors.to_csv(RESULTS_DIR / f"Extract_Error_details.csv", mode='a', header=not (RESULTS_DIR / f"Extract_Error_details.csv").exists(), index=False) + if summary["chatbot_errors"]: + df_errors = pd.DataFrame(summary["chatbot_errors"], columns=['Model', 'Function', 'Error', 'TimeTakenSec']) + df_errors['execution_date'] = dt.today().strftime('%Y-%m-%d') + df_errors.to_csv(RESULTS_DIR / f"chatbot_Error_details.csv", mode='a', header=not (RESULTS_DIR / f"chatbot_Error_details.csv").exists(), index=False) + # Generate summary report + generate_summary_report(all_summaries, RESULTS_DIR / "summary_report.md") + logging.info("All tests completed.") - other_api_dict = {'disconnected_nodes':dis_status,'delete_disconnected_nodes' : delete_status,'get_duplicate_nodes':dup,'test_populate_graph_schema_from_text':other_api_list} - with open(f"test_results/other_api_results_{dt.now().strftime('%Y%m%d_%H%M%S')}.txt", "w") as file: - file.write(json.dumps(other_api_dict, indent=4)) - # Save errors - if extract_error_list: - df_errors = pd.DataFrame(extract_error_list, columns=['Model', 'Function', 'Error']) - df_errors['execution_date'] = dt.today().strftime('%Y-%m-%d') - df_errors.to_csv(f"test_results/Extract_Error_details_{dt.now().strftime('%Y%m%d_%H%M%S')}.csv", index=False) - if chatbot_error_list: - df_errors = pd.DataFrame(chatbot_error_list, columns=['Model', 'Function', 'Error']) - df_errors['execution_date'] = dt.today().strftime('%Y-%m-%d') - df_errors.to_csv(f"test_results/chatbot_Error_details_{dt.now().strftime('%Y%m%d_%H%M%S')}.csv", index=False) - logging.info("All tests completed.") +def generate_summary_report(summaries: List[Dict[str, Any]], report_path: Path) -> None: + """ + Generates a Markdown summary report from all model test summaries. + """ + lines = ["# Integration Test Summary Report\n"] + for summary in summaries: + lines.append(f"## Model: {summary['model']}\n") + lines.append("### Test Results\n") + for result in summary["test_results"]: + status = result.get("status", "Success") + func = result.get("test_function", result.get("mode", "")) + time_taken = result.get("time_taken_sec", "") + lines.append(f"- **{func}**: {status} (Time: {time_taken}s)") + if summary["extract_errors"]: + lines.append("\n### Extract Errors\n") + for err in summary["extract_errors"]: + lines.append(f"- {err}") + if summary["chatbot_errors"]: + lines.append("\n### Chatbot Errors\n") + for err in summary["chatbot_errors"]: + lines.append(f"- {err}") + lines.append("\n---\n") + with open(report_path, "w") as f: + f.write("\n".join(lines)) +# Usage in main if __name__ == "__main__": - run_tests() + models = [ + 'openai_gpt_5.1', + 'openai_gpt_5_mini', + 'openai_gpt_4.1', + 'openai_gpt_4.1_mini', + 'gemini_2.5_flash', + 'gemini_2.5_pro', + 'groq_llama3.1_8b', + 'anthropic_claude_4.5_sonnet', + 'llama4_maverick', + 'fireworks_gpt_oss', + 'fireworks_deepseek_v3', + 'bedrock_nova_micro_v1', + 'bedrock_nova_lite_v1', + 'bedrock_nova_pro_v1' + ] + chatbot_modes = [ + "vector", + "graph+vector", + "fulltext", + "graph+vector+fulltext", + "entity search+vector" + ] + run_tests_sequential(models, chatbot_modes) diff --git a/frontend/src/components/ChatBot/Chatbot.tsx b/frontend/src/components/ChatBot/Chatbot.tsx index e9f961ac7..afa6a92f4 100644 --- a/frontend/src/components/ChatBot/Chatbot.tsx +++ b/frontend/src/components/ChatBot/Chatbot.tsx @@ -444,7 +444,7 @@ const Chatbot: FC = (props) => { status={connectionStatus ? 'online' : 'offline'} type='image' htmlAttributes={{ - shape: 'square' + shape: 'square', }} /> ) : ( @@ -456,7 +456,7 @@ const Chatbot: FC = (props) => { status={connectionStatus ? 'online' : 'offline'} type='image' htmlAttributes={{ - shape: 'square' + shape: 'square', }} /> )} diff --git a/frontend/src/components/ChatBot/ChunkInfo.tsx b/frontend/src/components/ChatBot/ChunkInfo.tsx index e68361135..782c6a82b 100644 --- a/frontend/src/components/ChatBot/ChunkInfo.tsx +++ b/frontend/src/components/ChatBot/ChunkInfo.tsx @@ -88,7 +88,7 @@ const ChunkInfo: FC = ({ loading, chunks, mode }) => { href={generateYouTubeLink(chunk?.url, chunk?.start_time)} type={'external'} htmlAttributes={{ - target: '_blank' + target: '_blank', }} > = ({ loading, chunks, mode }) => { <>
- + {chunk?.url}
diff --git a/frontend/src/components/ChatBot/SourcesInfo.tsx b/frontend/src/components/ChatBot/SourcesInfo.tsx index 9e8472321..2de2c9d98 100644 --- a/frontend/src/components/ChatBot/SourcesInfo.tsx +++ b/frontend/src/components/ChatBot/SourcesInfo.tsx @@ -71,9 +71,13 @@ const SourcesInfo: FC = ({ loading, mode, chunks, sources }) => { {isAllowedHost(link, ['wikipedia.org']) && (
Wikipedia Logo - + = ({ loading, mode, chunks, sources }) => { <>
youtube-source-logo - + = ({ loading, mode, chunks, sources }) => { !isAllowedHost(link, ['storage.googleapis.com', 'wikipedia.org', 'www.youtube.com']) && (
- + {link}
diff --git a/frontend/src/components/FileTable.tsx b/frontend/src/components/FileTable.tsx index dbfbbc69e..b4ee14ff3 100644 --- a/frontend/src/components/FileTable.tsx +++ b/frontend/src/components/FileTable.tsx @@ -387,9 +387,13 @@ const FileTable: ForwardRefRenderFunction = (props, re return ( - + {info.row.original.fileSource} diff --git a/frontend/src/components/Graph/GraphViewModal.tsx b/frontend/src/components/Graph/GraphViewModal.tsx index c75d0195f..0df748050 100644 --- a/frontend/src/components/Graph/GraphViewModal.tsx +++ b/frontend/src/components/Graph/GraphViewModal.tsx @@ -165,7 +165,7 @@ const GraphViewModal: React.FunctionComponent = ({ } if (!result.data.data) { - throw new Error(`No data in response. ${ result.data.error || 'Please try again.'}`); + throw new Error(`No data in response. ${result.data.error || 'Please try again.'}`); } const { nodes = [], relationships = [] } = result.data.data; diff --git a/frontend/src/components/User/Profile.tsx b/frontend/src/components/User/Profile.tsx index 19830c7d6..67d817ba2 100644 --- a/frontend/src/components/User/Profile.tsx +++ b/frontend/src/components/User/Profile.tsx @@ -38,7 +38,7 @@ export default function Profile() { size='large' type='letters' htmlAttributes={{ - shape: 'square' + shape: 'square', }} />
diff --git a/frontend/src/utils/Constants.ts b/frontend/src/utils/Constants.ts index 98b4bb281..386f4213b 100644 --- a/frontend/src/utils/Constants.ts +++ b/frontend/src/utils/Constants.ts @@ -8,79 +8,65 @@ export const APP_SOURCES = ? (process.env.VITE_REACT_APP_SOURCES?.split(',') as string[]) : ['s3', 'local', 'wiki', 'youtube', 'web']; -export const llms = process.env?.VITE_LLM_MODELS?.trim() - ? (process.env.VITE_LLM_MODELS?.split(',') as string[]) - : [ - 'openai_gpt_4o', - 'openai_gpt_4o_mini', - 'openai_gpt_4.1', - 'openai_gpt_4.1_mini', - 'openai_gpt_o3_mini', - 'gemini_1.5_pro', - 'gemini_1.5_flash', - 'gemini_2.0_flash', - 'gemini_2.5_pro', - 'diffbot', - 'azure_ai_gpt_35', - 'azure_ai_gpt_4o', - 'ollama_llama3', - 'groq_llama3_70b', - 'anthropic_claude_4_sonnet', - 'fireworks_llama4_maverick', - 'fireworks_llama4_scout', - 'fireworks_qwen72b_instruct', - 'bedrock_nova_micro_v1', - 'bedrock_nova_lite_v1', - 'bedrock_nova_pro_v1', - 'fireworks_deepseek_r1', - 'fireworks_deepseek_v3', - 'llama4_maverick', - 'fireworks_qwen3_30b', - 'fireworks_qwen3_235b', - ]; +export const llms = + process.env?.VITE_LLM_MODELS?.trim() != '' + ? (process.env.VITE_LLM_MODELS?.split(',') as string[]) + : [ + 'openai_gpt_5.1', + 'openai_gpt_5_mini', + 'gemini_2.5_flash', + 'gemini_2.5_pro', + 'diffbot', + 'groq_llama3.1_8b', + 'anthropic_claude_4.5_sonnet', + 'anthropic_claude_4.5_haiku', + 'llama4_maverick', + 'bedrock_nova_micro_v1', + 'bedrock_nova_lite_v1', + 'bedrock_nova_pro_v1', + 'fireworks_deepseek_v3', + 'fireworks_qwen3_30b', + 'fireworks_gpt_oss', + ]; export const supportedLLmsForRagas = [ - 'openai_gpt_4', - 'openai_gpt_4o', - 'openai_gpt_4o_mini', - 'openai_gpt_4.1', - 'openai_gpt_4.1_mini', - 'gemini_1.5_pro', - 'gemini_1.5_flash', - 'gemini_2.0_flash', + 'openai_gpt_5.1', + 'openai_gpt_5_mini', + 'gemini_2.5_flash', 'gemini_2.5_pro', - 'azure_ai_gpt_35', - 'azure_ai_gpt_4o', - 'groq_llama3_70b', - 'anthropic_claude_4_sonnet', - 'fireworks_llama4_maverick', - 'fireworks_llama4_scout', - 'openai_gpt_o3_mini', + 'diffbot', + 'groq_llama3.1_8b', + 'anthropic_claude_4.5_sonnet', + 'anthropic_claude_4.5_haiku', 'llama4_maverick', + 'bedrock_nova_micro_v1', + 'bedrock_nova_lite_v1', + 'bedrock_nova_pro_v1', + 'fireworks_deepseek_v3', 'fireworks_qwen3_30b', - 'fireworks_qwen3_235b', + 'fireworks_gpt_oss', ]; export const supportedLLmsForGroundTruthMetrics = [ - 'openai_gpt_4', - 'openai_gpt_4o', - 'openai_gpt_4o_mini', - 'openai_gpt_4.1', - 'openai_gpt_4.1_mini', - 'azure_ai_gpt_35', - 'azure_ai_gpt_4o', - 'groq_llama3_70b', - 'anthropic_claude_4_sonnet', - 'fireworks_llama4_maverick', - 'fireworks_llama4_scout', - 'openai_gpt_o3_mini', + 'openai_gpt_5.1', + 'openai_gpt_5_mini', + 'gemini_2.5_flash', + 'gemini_2.5_pro', + 'diffbot', + 'groq_llama3.1_8b', + 'anthropic_claude_4.5_sonnet', + 'anthropic_claude_4.5_haiku', 'llama4_maverick', + 'bedrock_nova_micro_v1', + 'bedrock_nova_lite_v1', + 'bedrock_nova_pro_v1', + 'fireworks_deepseek_v3', 'fireworks_qwen3_30b', - 'fireworks_qwen3_235b', + 'fireworks_gpt_oss', ]; export const prodllms = process.env.VITE_LLM_MODELS_PROD?.trim() != '' ? (process.env.VITE_LLM_MODELS_PROD?.split(',') as string[]) - : ['openai_gpt_4o', 'openai_gpt_4o_mini', 'diffbot', 'gemini_2.0_flash']; + : ['openai_gpt_5_mini', 'diffbot', 'gemini_2.5_flash', 'anthropic_claude_4.5_haiku']; export const chatModeLables = { vector: 'vector', diff --git a/frontend/src/utils/Utils.ts b/frontend/src/utils/Utils.ts index 3f70d78b0..ef3969ec8 100644 --- a/frontend/src/utils/Utils.ts +++ b/frontend/src/utils/Utils.ts @@ -43,12 +43,18 @@ export const wikiValidation = (url: string) => { return url.trim() != '' && /https:\/\/([a-zA-Z]{2,3})\.wikipedia\.org\/wiki\/(.*)/gm.test(url) != false; }; export const webLinkValidation = (url: string) => { - return (url.trim() != '' && /(http(s)?:\/\/.)?(www\.)?[-a-zA-Z0-9@:%._~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9@:%_.~#?&//=]*)/g.test(url) != false); + return ( + url.trim() != '' && + /(http(s)?:\/\/.)?(www\.)?[-a-zA-Z0-9@:%._~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9@:%_.~#?&//=]*)/g.test(url) != false + ); }; export const youtubeLinkValidation = (url: string) => { - return (url.trim() != '' && /^(?:https?:\/\/)?(?:www\.)?(?:youtu\.be\/|youtube\.com\/(?:embed\/|v\/|watch\?v=|watch\?.+&v=))((\w|-){11})(?:\S+)?$/.test( - url - ) != false); + return ( + url.trim() != '' && + /^(?:https?:\/\/)?(?:www\.)?(?:youtu\.be\/|youtube\.com\/(?:embed\/|v\/|watch\?v=|watch\?.+&v=))((\w|-){11})(?:\S+)?$/.test( + url + ) != false + ); }; // Status indicator icons to status column export const statusCheck = (status: string) => {