-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathutils.py
More file actions
131 lines (108 loc) · 3.41 KB
/
utils.py
File metadata and controls
131 lines (108 loc) · 3.41 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
"""
Utilities for Vertex AI
"""
# Utils
import time
from typing import List
from typing import Optional
# Langchain
import langchain
from pydantic import BaseModel
# just for documentation purposes
print(f"LangChain version: {langchain.__version__}")
# Vertex AI
from google.cloud import aiplatform
from langchain.chat_models import ChatVertexAI
from langchain.embeddings import VertexAIEmbeddings
from langchain.llms import VertexAI
from langchain.schema import HumanMessage, SystemMessage
# just for documentation purposes
print(f"Vertex AI SDK version: {aiplatform.__version__}")
# Initialize Vertex AI
PROJECT_ID = ""
LOCATION = "us-central1"
import vertexai
vertexai.init(project=PROJECT_ID, location=LOCATION)
# Utility functions for Embeddings API with rate limiting
def rate_limit(max_per_minute):
period = 60 / max_per_minute
print("Waiting")
while True:
before = time.time()
yield
after = time.time()
elapsed = after - before
sleep_time = max(0, period - elapsed)
if sleep_time > 0:
print(".", end="")
time.sleep(sleep_time)
# Configurations for Vertex AI Embeddings
class CustomVertexAIEmbeddings(VertexAIEmbeddings, BaseModel):
requests_per_minute: int
num_instances_per_batch: int
# Overriding embed_documents method
def embed_documents(self, texts: List[str]):
limiter = rate_limit(self.requests_per_minute)
results = []
docs = list(texts)
while docs:
# Working in batches because the API accepts maximum 5
# documents per request to get embeddings
head, docs = (
docs[: self.num_instances_per_batch],
docs[self.num_instances_per_batch :],
)
chunk = self.client.get_embeddings(head)
results.extend(chunk)
next(limiter)
return [r.values for r in results]
# Utility functions for Embeddings API with rate limiting
def rate_limit(max_per_minute):
period = 60 / max_per_minute
print("Waiting")
while True:
before = time.time()
yield
after = time.time()
elapsed = after - before
sleep_time = max(0, period - elapsed)
if sleep_time > 0:
print(".", end="")
time.sleep(sleep_time)
class CustomVertexAIEmbeddings(VertexAIEmbeddings, BaseModel):
requests_per_minute: int
num_instances_per_batch: int
# Overriding embed_documents method
def embed_documents(self, texts: List[str]):
limiter = rate_limit(self.requests_per_minute)
results = []
docs = list(texts)
while docs:
# Working in batches because the API accepts maximum 5
# documents per request to get embeddings
head, docs = (
docs[: self.num_instances_per_batch],
docs[self.num_instances_per_batch :],
)
chunk = self.client.get_embeddings(head)
results.extend(chunk)
next(limiter)
return [r.values for r in results]
# LLM model
llm = VertexAI(
model_name="text-bison@001",
max_output_tokens=512,
temperature=0.1,
top_p=0.8,
top_k=40,
verbose=True,
)
# Chat
chat = ChatVertexAI()
# Embedding
EMBEDDING_QPM = 15
EMBEDDING_NUM_BATCH = 2
embeddings = CustomVertexAIEmbeddings(
requests_per_minute=EMBEDDING_QPM,
num_instances_per_batch=EMBEDDING_NUM_BATCH,
)