From 206fd5735007673252f7b995cac273b8962b2157 Mon Sep 17 00:00:00 2001 From: bwoody13 Date: Sat, 6 Jan 2024 01:33:46 -0800 Subject: [PATCH 01/17] Change api key to be recieved as input if not found in .env file --- app.py | 49 ++++++++++++++++++++++++++++++++++++++++++++++--- chat_rover.py | 9 ++++----- 2 files changed, 50 insertions(+), 8 deletions(-) diff --git a/app.py b/app.py index 11b6adc..7228cf8 100644 --- a/app.py +++ b/app.py @@ -1,8 +1,11 @@ +from openai import OpenAI from chat_rover import ChatRover import streamlit as st from github_scraper import GitHubScraper import time import random +import os +from dotenv import load_dotenv AVATAR_IMAGE = 'https://raw.githubusercontent.com/Marcozc19/RepoRover/main/images/rover3.png' USER_IMAGE = "https://raw.githubusercontent.com/Marcozc19/RepoRover/main/images/moon.png" @@ -24,11 +27,30 @@ "My creators built me during a Large Language Model hackathon in 2023." ] + +# Tests API Key +def is_valid_key(api_key): + try: + client = OpenAI(api_key=api_key) + response = client.chat.completions.create( + model="gpt-3.5-turbo-1106", + messages=[{'role': 'user', 'content': "Hello, world!"}], + max_tokens=5 + ) + # print(response) + # print(response.choices) + # print('choices' in response) + return response.choices is not None + except Exception as e: + print(e) + return False + + # Updates rover based on URL def update_url(url): gitHubScraper = GitHubScraper(url) st.session_state.repo_name = gitHubScraper.get_repo_name() - st.session_state.chat_rover = ChatRover(gitHubScraper) + st.session_state.chat_rover = ChatRover(gitHubScraper, st.session_state.api_key) # Get the Rover if it exists @@ -39,6 +61,27 @@ def update_url(url): # Title for the app st.title("RepoRover") + +# Get API Key +if 'api_key' not in st.session_state or st.session_state.api_key is None: + # use .env key if there is .env + load_dotenv() + if "OPENAI_API_KEY" in os.environ: + api_key = os.environ["OPENAI_API_KEY"] + if is_valid_key(api_key): + st.session_state.api_key = api_key + st.success("API Key loaded from .env file") + else: + st.error("Invalid API Key from .env file") + else: + api_key = st.text_input("Enter your OpenAI API key", type="password") + if st.button('Submit'): + if is_valid_key(api_key): + st.session_state.api_key = api_key + st.success("API Key accepted.") + else: + st.error("Invalid API Key.") + # Input box repo_url = st.text_input("Enter a Repo URL") @@ -47,7 +90,7 @@ def update_url(url): if repo_url: random_fact = random.choice(fun_facts) st.info(f"Fun Fact: {random_fact}") - with st.spinner(f"Analyzing repository terrain... Please wait..."): + with st.spinner("Analyzing repository terrain... Please wait..."): update_url(repo_url) st.session_state.messages = [] st.success(f"New world discovered! Welcome to {st.session_state.repo_name}!") @@ -70,7 +113,7 @@ def update_url(url): st.session_state.messages.append({"role": "user", "content": prompt}) # start the spinner - spinner = st.spinner(f"Engaging in digital deep thought...") + spinner = st.spinner("Engaging in digital deep thought...") spinner.__enter__() first_chunk_received = False diff --git a/chat_rover.py b/chat_rover.py index bdec6c6..990ef1f 100644 --- a/chat_rover.py +++ b/chat_rover.py @@ -8,18 +8,17 @@ from langchain.schema.document import Document from langchain_community.chat_models import ChatOpenAI from langchain.prompts import PromptTemplate -from langchain.text_splitter import CharacterTextSplitter from langchain.chains import LLMChain import tiktoken # load env -load_dotenv() +# load_dotenv() class ChatRover(): - def __init__(self, gitHubScraper): - api_key = os.getenv('OPENAI_API_KEY') + def __init__(self, gitHubScraper, api_key): + # api_key = os.getenv('OPENAI_API_KEY') self.client = OpenAI(api_key=api_key) self.gitHubScraper = gitHubScraper @@ -156,4 +155,4 @@ def run_chat(self, user_input): yield response_chunk response += response_chunk - self.update_history("assistant", response) \ No newline at end of file + self.update_history("assistant", response) From 4172d05476fd34d0e447c06a15a63a743ccf966d Mon Sep 17 00:00:00 2001 From: bwoody13 Date: Sat, 6 Jan 2024 01:38:46 -0800 Subject: [PATCH 02/17] added dictionary for loaded file conents to scraper to reduce api calls --- github_scraper.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/github_scraper.py b/github_scraper.py index eb76fd9..b021389 100644 --- a/github_scraper.py +++ b/github_scraper.py @@ -15,14 +15,15 @@ def __init__(self, github_url, branch=None, condensed=False): self.root_readme = "" self.file_paths = [] self.set_files(condensed) + self.file_contents = {} # Getters def get_repo_name(self): return self.repo - + def get_file_paths(self): return self.file_paths - + def get_readme(self): return self.root_readme @@ -64,8 +65,10 @@ def set_files(self, condensed=False): else: print("Error:", response.status_code, response.text) - def get_file_raw(self, file_path): + if file_path in self.file_contents: + return self.file_contents[file_path] + url = f'https://api.github.com/repos/{self.owner}/{self.repo}/contents/{file_path}?ref={self.branch}' headers = {'Accept': 'application/vnd.github.v3.raw'} From c4550f32ad4aa379b53aec2114cff2cafbf9366d Mon Sep 17 00:00:00 2001 From: bwoody13 Date: Sat, 6 Jan 2024 19:23:04 -0800 Subject: [PATCH 03/17] update requirements and remove redundant imports. --- chat_rover.py | 5 ----- github_scraper.py | 9 --------- requirements.txt | 3 ++- 3 files changed, 2 insertions(+), 15 deletions(-) diff --git a/chat_rover.py b/chat_rover.py index 990ef1f..fd95cad 100644 --- a/chat_rover.py +++ b/chat_rover.py @@ -1,6 +1,4 @@ from openai import OpenAI -import os -from dotenv import load_dotenv from langchain.text_splitter import CharacterTextSplitter from langchain.embeddings import OpenAIEmbeddings @@ -11,9 +9,6 @@ from langchain.chains import LLMChain import tiktoken -# load env -# load_dotenv() - class ChatRover(): diff --git a/github_scraper.py b/github_scraper.py index b021389..f754732 100644 --- a/github_scraper.py +++ b/github_scraper.py @@ -101,12 +101,3 @@ def _condense_file_structure(self, file_paths): formatted_structure += " " * current_depth + path_segments[-1] + "\n" return formatted_structure - - -if __name__ == "__main__": - # Replace with your GitHub URL - github_url = 'https://github.com/Stability-AI/generative-models' - - scraper = GitHubScraper(github_url) - print(scraper.root_readme) - print(scraper.file_paths) diff --git a/requirements.txt b/requirements.txt index 827acdc..2377ac4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,4 +3,5 @@ altair==4 openai numpy tiktoken -langchain \ No newline at end of file +langchain +python-dotenv \ No newline at end of file From 31ad6459a504771b2139232ae37704e34bba3654 Mon Sep 17 00:00:00 2001 From: bwoody13 Date: Sat, 6 Jan 2024 19:31:38 -0800 Subject: [PATCH 04/17] reorder scraper attribute initialization --- github_scraper.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/github_scraper.py b/github_scraper.py index f754732..621133d 100644 --- a/github_scraper.py +++ b/github_scraper.py @@ -12,10 +12,11 @@ def __init__(self, github_url, branch=None, condensed=False): self.owner, self.repo = self.get_github_repo_info() self.branch = self.get_default_branch() if branch is None else branch + self.file_contents = {} self.root_readme = "" self.file_paths = [] self.set_files(condensed) - self.file_contents = {} + # Getters def get_repo_name(self): From 07c5207ffb1e100b595faefa01d91a4a5e58bc12 Mon Sep 17 00:00:00 2001 From: bwoody13 Date: Sat, 6 Jan 2024 20:39:47 -0800 Subject: [PATCH 05/17] add api key param to OpenAIEmbeddings to handle if not in env --- chat_rover.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/chat_rover.py b/chat_rover.py index fd95cad..7adc2ab 100644 --- a/chat_rover.py +++ b/chat_rover.py @@ -14,6 +14,7 @@ class ChatRover(): def __init__(self, gitHubScraper, api_key): # api_key = os.getenv('OPENAI_API_KEY') + self.api_key = api_key self.client = OpenAI(api_key=api_key) self.gitHubScraper = gitHubScraper @@ -44,7 +45,7 @@ def create_file_vector(self): print("Creating file vector...") split_data = [Document(page_content=file) for file in files] - embeddings = OpenAIEmbeddings() + embeddings = OpenAIEmbeddings(openai_api_key=self.api_key) vectorstore = FAISS.from_documents(split_data, embedding=embeddings) print("File vector complete!") return vectorstore From d08d3a08084ad3ad909b9eecc666adf67c4f90b6 Mon Sep 17 00:00:00 2001 From: bwoody13 Date: Sat, 6 Jan 2024 20:44:26 -0800 Subject: [PATCH 06/17] add api key param to OpenAIEmbeddings to handle if not in env --- chat_rover.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chat_rover.py b/chat_rover.py index 7adc2ab..ec91933 100644 --- a/chat_rover.py +++ b/chat_rover.py @@ -60,7 +60,7 @@ def create_readme_vector(self): text_splitter = CharacterTextSplitter(chunk_size=3000, chunk_overlap=200) split_data = [Document(page_content=chunk) for chunk in text_splitter.split_text(data)] - embeddings = OpenAIEmbeddings() + embeddings = OpenAIEmbeddings(openai_api_key=self.api_key) vectorstore = FAISS.from_documents(split_data, embedding=embeddings) print("Readme vector complete!") return vectorstore From b734d352eeae493b8ab03462512cb98b50c696fa Mon Sep 17 00:00:00 2001 From: bwoody13 Date: Sat, 6 Jan 2024 20:49:07 -0800 Subject: [PATCH 07/17] add FAISS to requirements --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 2377ac4..9937746 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,4 +4,5 @@ openai numpy tiktoken langchain +faiss-gpu python-dotenv \ No newline at end of file From aaf676b46949953e18d7f8577f8c407f399fd669 Mon Sep 17 00:00:00 2001 From: bwoody13 Date: Sat, 6 Jan 2024 20:51:41 -0800 Subject: [PATCH 08/17] update FAISS to cpu version --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 9937746..a619523 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,5 +4,5 @@ openai numpy tiktoken langchain -faiss-gpu +faiss-cpu python-dotenv \ No newline at end of file From 9fd4d19b97fc7bd499cac81019fac6576d05bf86 Mon Sep 17 00:00:00 2001 From: bwoody13 Date: Sat, 6 Jan 2024 20:55:01 -0800 Subject: [PATCH 09/17] add open ai key to chat open ai form langchain to handle if not in env --- chat_rover.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chat_rover.py b/chat_rover.py index ec91933..980aeee 100644 --- a/chat_rover.py +++ b/chat_rover.py @@ -66,7 +66,7 @@ def create_readme_vector(self): return vectorstore def code_summary(self, file_path, query): - llm = ChatOpenAI(temperature=0.3, model_name=self.model) + llm = ChatOpenAI(temperature=0.3, model_name=self.model, openai_api_key=self.api_key) custom_prompt = """ Provide a clear and concise summary on the code that you will be given. You should reference specific parts of the code. Be technical. Your summary will be used by another LLM to explain specific parts of the user query. Focus on those parts that are most relevant to the user query. Do not speak to or address the user. Limit your response to 150 words. From e9991674e0ad41e1f09d3a7db2d1b116dc941f96 Mon Sep 17 00:00:00 2001 From: bwoody13 Date: Sun, 7 Jan 2024 02:02:59 -0800 Subject: [PATCH 10/17] set langchain version to 0.0.353 --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index a619523..dc0ad71 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,6 +3,6 @@ altair==4 openai numpy tiktoken -langchain +langchain==0.0.353 faiss-cpu python-dotenv \ No newline at end of file From 81c4af8ee9f825e8c3c821e06e8d47e5a1faba87 Mon Sep 17 00:00:00 2001 From: Maxim Date: Sun, 7 Jan 2024 16:47:40 -0500 Subject: [PATCH 11/17] removing periods from file ext --- github_scraper.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/github_scraper.py b/github_scraper.py index 621133d..dc9219b 100644 --- a/github_scraper.py +++ b/github_scraper.py @@ -1,8 +1,8 @@ import requests -IGNORE_EXTS = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.webp', '.svg', '.mp4', '.mp3', - '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx', '.zip', '.tar', '.gz', '.rar', - '.7z', '.exe', '.dll', '.jar', '.war', '.class'] +IGNORE_EXTS = ['jpg', 'jpeg', 'png', 'gif', 'bmp', 'tiff', 'webp', 'svg', 'mp4', 'mp3', + 'doc', 'docx', 'xls', 'xlsx', 'ppt', 'pptx', 'zip', 'tar', 'gz', 'rar', + '7z', 'exe', 'dll', 'jar', 'war', 'class'] class GitHubScraper: From 4ade0bd9eebfe21a4e1c7ce76aa4f4fcf696c5c8 Mon Sep 17 00:00:00 2001 From: Maxim <81264561+MaximClouser@users.noreply.github.com> Date: Sun, 7 Jan 2024 16:51:51 -0500 Subject: [PATCH 12/17] Added Dev Container Folder --- .devcontainer/devcontainer.json | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 .devcontainer/devcontainer.json diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 0000000..8491816 --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,33 @@ +{ + "name": "Python 3", + // Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile + "image": "mcr.microsoft.com/devcontainers/python:1-3.11-bullseye", + "customizations": { + "codespaces": { + "openFiles": [ + "README.md", + "app.py" + ] + }, + "vscode": { + "settings": {}, + "extensions": [ + "ms-python.python", + "ms-python.vscode-pylance" + ] + } + }, + "updateContentCommand": "[ -f packages.txt ] && sudo apt update && sudo apt upgrade -y && sudo xargs apt install -y Date: Sun, 7 Jan 2024 16:54:07 -0500 Subject: [PATCH 13/17] removing container --- .devcontainer/devcontainer.json | 33 --------------------------------- 1 file changed, 33 deletions(-) delete mode 100644 .devcontainer/devcontainer.json diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json deleted file mode 100644 index 8491816..0000000 --- a/.devcontainer/devcontainer.json +++ /dev/null @@ -1,33 +0,0 @@ -{ - "name": "Python 3", - // Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile - "image": "mcr.microsoft.com/devcontainers/python:1-3.11-bullseye", - "customizations": { - "codespaces": { - "openFiles": [ - "README.md", - "app.py" - ] - }, - "vscode": { - "settings": {}, - "extensions": [ - "ms-python.python", - "ms-python.vscode-pylance" - ] - } - }, - "updateContentCommand": "[ -f packages.txt ] && sudo apt update && sudo apt upgrade -y && sudo xargs apt install -y Date: Mon, 25 Mar 2024 14:30:30 -0400 Subject: [PATCH 14/17] add key load from st.secrets for use on deploy app --- .gitignore | 5 ++++- app.py | 7 +++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 5a2384a..a6922df 100644 --- a/.gitignore +++ b/.gitignore @@ -15,4 +15,7 @@ aiconfig.log file_structure.txt #readme -readme.txt \ No newline at end of file +readme.txt + +# secrets +secrets.toml \ No newline at end of file diff --git a/app.py b/app.py index 7228cf8..7a23d64 100644 --- a/app.py +++ b/app.py @@ -73,6 +73,13 @@ def update_url(url): st.success("API Key loaded from .env file") else: st.error("Invalid API Key from .env file") + elif "OPENAI_API_KEY" in st.secrets: + api_key = st.secrets["OPENAI_API_KEY"] + if is_valid_key(api_key): + st.session_state.api_key = api_key + st.success("API Key loaded from secrets file") + else: + st.error("Invalid API Key from sectets file") else: api_key = st.text_input("Enter your OpenAI API key", type="password") if st.button('Submit'): From 28a2b67f71090c018cfa96b2e697b0e06eb6f936 Mon Sep 17 00:00:00 2001 From: Brendan Woodward <73412688+bwoody13@users.noreply.github.com> Date: Mon, 25 Mar 2024 14:46:56 -0400 Subject: [PATCH 15/17] Added Dev Container Folder --- .devcontainer/devcontainer.json | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 .devcontainer/devcontainer.json diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 0000000..8491816 --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,33 @@ +{ + "name": "Python 3", + // Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile + "image": "mcr.microsoft.com/devcontainers/python:1-3.11-bullseye", + "customizations": { + "codespaces": { + "openFiles": [ + "README.md", + "app.py" + ] + }, + "vscode": { + "settings": {}, + "extensions": [ + "ms-python.python", + "ms-python.vscode-pylance" + ] + } + }, + "updateContentCommand": "[ -f packages.txt ] && sudo apt update && sudo apt upgrade -y && sudo xargs apt install -y Date: Mon, 25 Mar 2024 15:49:40 -0400 Subject: [PATCH 16/17] update to inclide github token for api requests --- app.py | 3 +-- github_scraper.py | 14 ++++++++------ 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/app.py b/app.py index 7a23d64..ccc8134 100644 --- a/app.py +++ b/app.py @@ -48,7 +48,7 @@ def is_valid_key(api_key): # Updates rover based on URL def update_url(url): - gitHubScraper = GitHubScraper(url) + gitHubScraper = GitHubScraper(url, token=st.secrets["GITHUB_TOKEN"]) st.session_state.repo_name = gitHubScraper.get_repo_name() st.session_state.chat_rover = ChatRover(gitHubScraper, st.session_state.api_key) @@ -61,7 +61,6 @@ def update_url(url): # Title for the app st.title("RepoRover") - # Get API Key if 'api_key' not in st.session_state or st.session_state.api_key is None: # use .env key if there is .env diff --git a/github_scraper.py b/github_scraper.py index dc9219b..d104437 100644 --- a/github_scraper.py +++ b/github_scraper.py @@ -7,8 +7,10 @@ class GitHubScraper: - def __init__(self, github_url, branch=None, condensed=False): + def __init__(self, github_url, token=None, branch=None, condensed=False): self.github_url = github_url + self.token = token + self.header = {'Authorization': f'token {self.token}'} if self.token else {} self.owner, self.repo = self.get_github_repo_info() self.branch = self.get_default_branch() if branch is None else branch @@ -16,7 +18,6 @@ def __init__(self, github_url, branch=None, condensed=False): self.root_readme = "" self.file_paths = [] self.set_files(condensed) - # Getters def get_repo_name(self): @@ -36,7 +37,7 @@ def get_github_repo_info(self): def get_default_branch(self): url = f"https://api.github.com/repos/{self.owner}/{self.repo}" - response = requests.get(url) + response = requests.get(url, headers=self.header) if response.status_code == 200: data = response.json() return data.get('default_branch', 'master') @@ -46,7 +47,7 @@ def get_default_branch(self): def set_files(self, condensed=False): url = f"https://api.github.com/repos/{self.owner}/{self.repo}/git/trees/{self.branch}?recursive=1" - response = requests.get(url) + response = requests.get(url, headers=self.header) if response.status_code == 200: data = response.json() @@ -58,7 +59,7 @@ def set_files(self, condensed=False): if file_extension not in IGNORE_EXTS: if file_name == 'readme.md': # must use correct casing to get file - self.root_readme = self.get_file_raw(file['path']) + self.root_readme = self.get_file_raw(file['path']) files.append(file['path']) if condensed: files = self._condense_file_structure(files) @@ -71,7 +72,8 @@ def get_file_raw(self, file_path): return self.file_contents[file_path] url = f'https://api.github.com/repos/{self.owner}/{self.repo}/contents/{file_path}?ref={self.branch}' - headers = {'Accept': 'application/vnd.github.v3.raw'} + headers = self.header.copy() + headers['Accept'] = 'application/vnd.github.v3.raw' response = requests.get(url, headers=headers) From 6c2d8d45d6c7ab27ca9e4cc856923c6d53603e90 Mon Sep 17 00:00:00 2001 From: bwoody13 Date: Thu, 4 Apr 2024 14:53:40 -0400 Subject: [PATCH 17/17] udpate requirements --- requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 4defe61..fbf21f7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,7 +5,8 @@ numpy tiktoken langchain==0.0.353 langchain_openai -langchain_community +langchain_community==0.0.9 langchain_core +langsmith==0.0.77 faiss-cpu python-dotenv \ No newline at end of file