-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsingle_stream.py
More file actions
77 lines (61 loc) · 3.08 KB
/
single_stream.py
File metadata and controls
77 lines (61 loc) · 3.08 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
from src.support_ticket.intelligent_system.run import Train
from src.support_ticket.rag.query import QueryTicket
from src.support_ticket.utils.data_preparation import prepare_training_data, process_ticket
from src.support_ticket.ticket_ranking.ticket_ranking import RankingTickets
from src.support_ticket.utils.utils import load_config
from src.support_ticket.utils.utils import get_logger
from src.support_ticket.utils.utils import load_json
from src.support_ticket.utils.utils import save_json
from pathlib import Path
import requests
from huggingface_hub import configure_http_backend
def backend_factory() -> requests.Session:
session = requests.Session()
session.verify = False
return session
configure_http_backend(backend_factory=backend_factory)
def single_stream():
config_path = "config.yaml"
config = load_config(config_path)
logger = get_logger(config)
if (
not Path(config["Data"]["training_data"]).exists() or
not Path(config["Data"]["ticket_train"]).exists() or
not Path(config["Data"]["ticket_test"]).exists()
):
prepare_training_data(config)
trainer = Train(config, logger)
if config["Train"]["train_model"]:
trainer.run_train()
input_query_path = config["Data"]["input_query_path"] # single ticket.
ticket_data = load_json(input_query_path)
df_ticket = process_ticket(ticket_data)
incoming_ticket = df_ticket.to_dict(orient='records')
logger.info("Running Deep Learning Model to predict Category ...")
predicted_label, predicted_category = trainer.run_inference(incoming_ticket)
# initialize and run RAG system
query = QueryTicket(config, logger)
if not query.vectorstore.vector_db_exists():
print("Vector store is empty. Please run the RAG pipeline to create chormadb and ingest data.")
logger.info("Vector store is empty. Please run the RAG pipeline to create chormadb and ingest data.")
# or we run the RAG pipeline to ingest data into chromadb
query.rag_pipeline()
print("Vector Database has been ingested.")
logger.info("Vector Database has been ingested.")
else:
print("Vector store already initialized with data.")
logger.info("Vector store already initialized with data.")
print("Quering Vector Database now.")
logger.info("Quering Vector Database now.")
# Then, we query the system
category = predicted_category[0]
queried_tickets = query.query_ticket(ticket_data[0], category)
# once we have queried tickets, we will use our re-ranking algorithm to re-rank the result and provide top-k results
ranking_ticket = RankingTickets()
ranking_ticket.extract_relevant_metadata(queried_tickets)
ticket_re_rank = ranking_ticket.ranking_algorithm()
sorted_tickets = ranking_ticket.sort_tickets(queried_tickets, ticket_re_rank)
incoming_ticket[0]["type"] = "input_query"
sorted_tickets = [{**ticket, "type": "queried_ticket"} for ticket in sorted_tickets]
result_ticket = incoming_ticket + sorted_tickets
save_json(result_ticket, config["Data"]["final_output"])