Prototype_PDF_Rag/RAG_Prototype.py at main · Drexter-07/Prototype_PDF_Rag · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import os
from dotenv import load_dotenv
load_dotenv()

# document loader
from langchain_community.document_loaders import PyPDFLoader
from pathlib import Path

from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
#db import
from qdrant_client import QdrantClient
from langchain_qdrant import QdrantVectorStore

#Took path of pdf
pdf_path =Path(__file__).parent/ "nodejs.pdf"

#Loaded the pdf
loader=PyPDFLoader(pdf_path)
docs= loader.load()

#Text- Splitter
text_splitter=RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)
#Used the splitter to split the docs
split_docs=text_splitter.split_documents(documents=docs)

#Embedder
embedder = OpenAIEmbeddings(
    model="text-embedding-3-large",
    # By removing the api_key line, the library will now [api_key="OPENAI_API_KEY"]
    # automatically find the OPENAI_API_KEY from your .env file.
)

# vector_store=QdrantVectorStore.from_documents(
#     documents=[],
#     url="http://localhost:6333/",
#     collection_name="langchain_RAG",
#     embedding=embedder
# )
# #fedding the data into the vector store
# vector_store.add_documents(documents=split_docs)

print("Injestion Done")

#retrtiver
retriver= vector_store=QdrantVectorStore.from_existing_collection(
    url="http://localhost:6333/",
    collection_name="langchain_RAG",
    embedding=embedder
)

# relevant_chunks=retriver.similarity_search(
#     query="What is FS Module?"
# )

# SYSTEM_PROMPT=f"""
# You are a helpful AI Assistant who responds based on the available context.

# Context:
# {relevant_chunks}
# """

# print("Relevant Chunks ", relevant_chunks)

# #chat
# llm = ChatOpenAI(
#     model="gpt-4o",
#     temperature=0,
# )

# messages = [
#         SYSTEM_PROMPT,
#     ("human", "I am a curious folk."),
# ]
# ai_msg = llm.invoke(messages)
# ai_msg


# ==============================================================================

#Initiate the chat model once, outside the loop

llm= ChatOpenAI(
    model='gpt-4o',
    temperature=0
)

#Start a loop that will run forever until you type 'exit'

while True:

    #1. GET User Q
    user_q=input("\nAsk a question about your PDF (or type 'exit' to quit): ")

    if user_q.lower()=='exit':
        break

    #2. Retrieve relevant chunks based on user's questions
    relevant_chunks=retriver.similarity_search(
        query=user_q
    )

    #3 Create the SYSTEM PROMPT with the retrived contect
    SYSTEM_PROMPT=f"""
    You are a helpful AI Assistant who responds based on the available context.

    Context:
    {relevant_chunks}
    """

    #4. Create the message list for the LLM
    messages=[
        ("system", SYSTEM_PROMPT),
        ("human", user_q)
    ]

    #5. Call the model and get the response
    print("Sending context and question to the AI...")
    ai_message=llm.invoke(messages)

    #6. PRINT the final answer for the user
    #access the .content attribute to get the clean text
    print("\nAssistant:")
    print(ai_message.content)