-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathload.py
More file actions
32 lines (24 loc) · 820 Bytes
/
load.py
File metadata and controls
32 lines (24 loc) · 820 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
# Load Data #
# Note: Run This Python Script Only Once
# Import Packages
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import MongoDBAtlasVectorSearch
from langchain_community.document_loaders import DirectoryLoader
from mongo import collection
import nltk
import env as e
# Download NLTK Data
nltk.download("punkt") # Sentence Tokenizer
nltk.download("punkt_tab") # Word Tokenizer
# Get Env Variables
openai_api_key = e.openai_api_key # OpenAI API Key
# Directory Loader
loader = DirectoryLoader(path="./data", glob="./*.txt", show_progress=True)
# Load Data
data = loader.load()
# Embeddings
embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
# Vector Store
vectorStore = MongoDBAtlasVectorSearch.from_documents(
data, embeddings, collection=collection
)