BookAppChatbot/chatbot.py at master · mowne67/BookAppChatbot · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import streamlit as st
from langchain.text_splitter import RecursiveCharacterTextSplitter
import os
from langchain_google_genai import GoogleGenerativeAIEmbeddings
import google.generativeai as genai
from langchain_community.vectorstores import FAISS
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.chains.question_answering import load_qa_chain
from langchain.prompts import PromptTemplate
from dotenv import load_dotenv
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_community.document_loaders import PDFPlumberLoader

load_dotenv()
os.getenv("GOOGLE_API_KEY")
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))


def get_text_chunks(text):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
    chunks = text_splitter.split_text(text)
    return chunks


def get_vector_store(text_chunks):
    embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
    vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
    vector_store.save_local("faiss_index")


def get_conversational_chain(docs, user_question):
    prompt_template = """
    You are a book chatbot that uses information from the book data in the context and provide useful answers to the user.
    Your answers should be professional like you are an expert in this field. Do not mention the word context.
    Provide you response in points. Always go in detail not only from the context but also your knowledge about the book. \n\n
    Context:\n {context}\n
    Question: \n{question}\n
    """

    model = ChatGoogleGenerativeAI(model="gemini-1.5-flash",
                                   temperature=0.6)
    prompt = ChatPromptTemplate.from_template(prompt_template)
    chain = prompt | model | StrOutputParser()
    return chain.invoke({"context": docs, "question": user_question})
    # prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
    # chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
    #
    # return chain


def user_input(user_question):
    embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
    new_db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
    docs = new_db.similarity_search(user_question)
    answer = get_conversational_chain(docs, user_question)
    return answer


def extract_text_from_documents(documents):
    text = ""
    for doc in documents:
        text += doc.page_content + "\n"
    return text

def io(question):
    if not os.path.isdir('faiss_index'):
        file_path = "Atomic Habits by James Clear (PDF) PDFDrive ( PDFDrive ).pdf"
        loader = PDFPlumberLoader(file_path)
        docs = loader.load_and_split()
        get_vector_store(get_text_chunks(extract_text_from_documents(docs)))
    #user_question = input("User: \n")
    if question:
        return user_input(question)


    # raw_text = all_text
    # text_chunks = get_text_chunks(raw_text)
    # get_vector_store(text_chunks)

    # if user_question:
    #     user_input(user_question)