-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathchatbot.py
More file actions
84 lines (66 loc) · 3.09 KB
/
chatbot.py
File metadata and controls
84 lines (66 loc) · 3.09 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import streamlit as st
from langchain.text_splitter import RecursiveCharacterTextSplitter
import os
from langchain_google_genai import GoogleGenerativeAIEmbeddings
import google.generativeai as genai
from langchain_community.vectorstores import FAISS
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.chains.question_answering import load_qa_chain
from langchain.prompts import PromptTemplate
from dotenv import load_dotenv
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_community.document_loaders import PDFPlumberLoader
load_dotenv()
os.getenv("GOOGLE_API_KEY")
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
def get_text_chunks(text):
text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
chunks = text_splitter.split_text(text)
return chunks
def get_vector_store(text_chunks):
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
vector_store.save_local("faiss_index")
def get_conversational_chain(docs, user_question):
prompt_template = """
You are a book chatbot that uses information from the book data in the context and provide useful answers to the user.
Your answers should be professional like you are an expert in this field. Do not mention the word context.
Provide you response in points. Always go in detail not only from the context but also your knowledge about the book. \n\n
Context:\n {context}\n
Question: \n{question}\n
"""
model = ChatGoogleGenerativeAI(model="gemini-1.5-flash",
temperature=0.6)
prompt = ChatPromptTemplate.from_template(prompt_template)
chain = prompt | model | StrOutputParser()
return chain.invoke({"context": docs, "question": user_question})
# prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
# chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
#
# return chain
def user_input(user_question):
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
new_db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
docs = new_db.similarity_search(user_question)
answer = get_conversational_chain(docs, user_question)
return answer
def extract_text_from_documents(documents):
text = ""
for doc in documents:
text += doc.page_content + "\n"
return text
def io(question):
if not os.path.isdir('faiss_index'):
file_path = "Atomic Habits by James Clear (PDF) PDFDrive ( PDFDrive ).pdf"
loader = PDFPlumberLoader(file_path)
docs = loader.load_and_split()
get_vector_store(get_text_chunks(extract_text_from_documents(docs)))
#user_question = input("User: \n")
if question:
return user_input(question)
# raw_text = all_text
# text_chunks = get_text_chunks(raw_text)
# get_vector_store(text_chunks)
# if user_question:
# user_input(user_question)