-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathimplementVectorDB.py
More file actions
28 lines (22 loc) · 902 Bytes
/
implementVectorDB.py
File metadata and controls
28 lines (22 loc) · 902 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
import ollama
from src.loadingDataset import load_multiple_files
EMBEDDING_MODEL = 'hf.co/CompendiumLabs/bge-base-en-v1.5-gguf'
LANGUAGE_MODEL = 'hf.co/bartowski/Llama-3.2-1B-Instruct-GGUF'
VECTOR_DB = []
def add_chunk_to_database(chunk):
embedding = ollama.embed(model=EMBEDDING_MODEL, input=chunk)['embeddings'][0]
VECTOR_DB.append((chunk, embedding))
#Fonction pour créer la base de données vectorielle à partir du dataset chargé
def create_vector_db_from_dataset(chunks):
"""
Ajoute les chunks déjà splitté à la base de données vectorielle.
Les chunks doivent être pré-splitté par load_multiple_files()
"""
chunk_count = 0
for chunk in chunks:
add_chunk_to_database(chunk)
chunk_count += 1
#print(f'Added chunk {chunk_count}/{len(VECTOR_DB)} to the database')
def reset_vector_db():
global VECTOR_DB
VECTOR_DB.clear()