-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathtest_custom_query.py
More file actions
121 lines (100 loc) · 4.09 KB
/
test_custom_query.py
File metadata and controls
121 lines (100 loc) · 4.09 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
#!/usr/bin/env python3
"""
Interactive test script for custom queries using the RAG pipeline.
"""
import os
from llmrag.ingestion.ingest_html import ingest_html_file
from llmrag.embeddings import SentenceTransformersEmbedder
from llmrag.retrievers import ChromaVectorStore
from llmrag.models.fake_llm import FakeLLM
from llmrag.pipelines.rag_pipeline import RAGPipeline
def setup_pipeline():
"""Set up the RAG pipeline with IPCC data."""
html_file = "tests/ipcc/wg1/chapter04/html_with_ids.html"
collection_name = "ipcc_chapter4_custom"
if not os.path.exists(html_file):
print(f"Error: HTML file not found at {html_file}")
return None
print("Setting up RAG pipeline...")
# Ingest the HTML file
try:
ingest_html_file(html_file, collection_name=collection_name)
print("✅ HTML ingestion completed")
except Exception as e:
print(f"❌ HTML ingestion failed: {e}")
return None
# Set up the pipeline
try:
embedder = SentenceTransformersEmbedder()
retriever = ChromaVectorStore(embedder=embedder, collection_name=collection_name)
llm = FakeLLM()
pipeline = RAGPipeline(vector_store=retriever, model=llm)
print("✅ RAG pipeline setup completed")
return pipeline
except Exception as e:
print(f"❌ Pipeline setup failed: {e}")
return None
def test_query(pipeline, query):
"""Test a single query and display results."""
print(f"\n🔍 Query: {query}")
print("-" * 50)
try:
result = pipeline.run(query)
print(f"📝 Answer: {result['answer']}")
print(f"📊 Number of context documents: {len(result['context'])}")
if result['paragraph_ids']:
print(f"🏷️ Paragraph IDs: {result['paragraph_ids']}")
else:
print("🏷️ No paragraph IDs found")
# Show a preview of the context documents
print("\n📄 Context Preview:")
for i, doc in enumerate(result['context'][:2], 1): # Show first 2 docs
preview = doc.page_content[:100] + "..." if len(doc.page_content) > 100 else doc.page_content
print(f" {i}. {preview}")
if len(result['context']) > 2:
print(f" ... and {len(result['context']) - 2} more documents")
except Exception as e:
print(f"❌ Query failed: {e}")
def interactive_mode():
"""Run interactive query testing."""
pipeline = setup_pipeline()
if not pipeline:
return
print("\n" + "=" * 60)
print("🎯 INTERACTIVE QUERY TESTING")
print("=" * 60)
print("Type your queries about climate change, IPCC scenarios, etc.")
print("Type 'quit' or 'exit' to stop")
print("Type 'help' for example queries")
print("-" * 60)
example_queries = [
"What are the main climate scenarios used in IPCC projections?",
"How do CMIP6 models differ from CMIP5?",
"What is the projected temperature increase by 2100?",
"How do climate models handle uncertainty?",
"What are the Shared Socioeconomic Pathways?",
"How does the Arctic sea ice change in projections?",
"What is the difference between near-term and long-term projections?"
]
while True:
try:
query = input("\n❓ Enter your query: ").strip()
if query.lower() in ['quit', 'exit', 'q']:
print("👋 Goodbye!")
break
elif query.lower() == 'help':
print("\n💡 Example queries:")
for i, example in enumerate(example_queries, 1):
print(f" {i}. {example}")
continue
elif not query:
print("Please enter a query or type 'help' for examples")
continue
test_query(pipeline, query)
except KeyboardInterrupt:
print("\n👋 Goodbye!")
break
except Exception as e:
print(f"❌ Error: {e}")
if __name__ == "__main__":
interactive_mode()