Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
{
"name": "Python 3",
// Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile
"image": "mcr.microsoft.com/devcontainers/python:1-3.11-bullseye",
"customizations": {
"codespaces": {
"openFiles": [
"README.md",
"app.py"
]
},
"vscode": {
"settings": {},
"extensions": [
"ms-python.python",
"ms-python.vscode-pylance"
]
}
},
"updateContentCommand": "[ -f packages.txt ] && sudo apt update && sudo apt upgrade -y && sudo xargs apt install -y <packages.txt; [ -f requirements.txt ] && pip3 install --user -r requirements.txt; pip3 install --user streamlit; echo '✅ Packages installed and Requirements met'",
"postAttachCommand": {
"server": "streamlit run app.py --server.enableCORS false --server.enableXsrfProtection false"
},
"portsAttributes": {
"8501": {
"label": "Application",
"onAutoForward": "openPreview"
}
},
"forwardPorts": [
8501
]
}
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,7 @@ aiconfig.log
file_structure.txt

#readme
readme.txt
readme.txt

# secrets
secrets.toml
57 changes: 53 additions & 4 deletions app.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
from openai import OpenAI
from chat_rover import ChatRover
import streamlit as st
from github_scraper import GitHubScraper
import time
import random
import os
from dotenv import load_dotenv

AVATAR_IMAGE = 'https://raw.githubusercontent.com/Marcozc19/RepoRover/main/images/rover3.png'
USER_IMAGE = "https://raw.githubusercontent.com/Marcozc19/RepoRover/main/images/moon.png"
Expand All @@ -24,11 +27,30 @@
"My creators built me during a Large Language Model hackathon in 2023."
]


# Tests API Key
def is_valid_key(api_key):
try:
client = OpenAI(api_key=api_key)
response = client.chat.completions.create(
model="gpt-3.5-turbo-1106",
messages=[{'role': 'user', 'content': "Hello, world!"}],
max_tokens=5
)
# print(response)
# print(response.choices)
# print('choices' in response)
return response.choices is not None
except Exception as e:
print(e)
return False


# Updates rover based on URL
def update_url(url):
gitHubScraper = GitHubScraper(url)
gitHubScraper = GitHubScraper(url, token=st.secrets["GITHUB_TOKEN"])
st.session_state.repo_name = gitHubScraper.get_repo_name()
st.session_state.chat_rover = ChatRover(gitHubScraper)
st.session_state.chat_rover = ChatRover(gitHubScraper, st.session_state.api_key)


# Get the Rover if it exists
Expand All @@ -39,6 +61,33 @@ def update_url(url):
# Title for the app
st.title("RepoRover")

# Get API Key
if 'api_key' not in st.session_state or st.session_state.api_key is None:
# use .env key if there is .env
load_dotenv()
if "OPENAI_API_KEY" in os.environ:
api_key = os.environ["OPENAI_API_KEY"]
if is_valid_key(api_key):
st.session_state.api_key = api_key
st.success("API Key loaded from .env file")
else:
st.error("Invalid API Key from .env file")
elif "OPENAI_API_KEY" in st.secrets:
api_key = st.secrets["OPENAI_API_KEY"]
if is_valid_key(api_key):
st.session_state.api_key = api_key
st.success("API Key loaded from secrets file")
else:
st.error("Invalid API Key from sectets file")
else:
api_key = st.text_input("Enter your OpenAI API key", type="password")
if st.button('Submit'):
if is_valid_key(api_key):
st.session_state.api_key = api_key
st.success("API Key accepted.")
else:
st.error("Invalid API Key.")

# Input box
repo_url = st.text_input("Enter a Repo URL")

Expand All @@ -47,7 +96,7 @@ def update_url(url):
if repo_url:
random_fact = random.choice(fun_facts)
st.info(f"Fun Fact: {random_fact}")
with st.spinner(f"Analyzing repository terrain... Please wait..."):
with st.spinner("Analyzing repository terrain... Please wait..."):
update_url(repo_url)
st.session_state.messages = []
st.success(f"New world discovered! Welcome to {st.session_state.repo_name}!")
Expand All @@ -70,7 +119,7 @@ def update_url(url):
st.session_state.messages.append({"role": "user", "content": prompt})

# start the spinner
spinner = st.spinner(f"Engaging in digital deep thought...")
spinner = st.spinner("Engaging in digital deep thought...")
spinner.__enter__()

first_chunk_received = False
Expand Down
47 changes: 14 additions & 33 deletions chat_rover.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,20 @@
from openai import OpenAI
import os
from dotenv import load_dotenv

from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain_openai.embeddings import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.schema.document import Document
from langchain_community.chat_models import ChatOpenAI
from langchain.text_splitter import CharacterTextSplitter
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate

import tiktoken

# load env
load_dotenv()


class ChatRover():

def __init__(self, gitHubScraper):
self.api_key = os.getenv('OPENAI_API_KEY')
self.client = OpenAI(api_key=self.api_key)
def __init__(self, gitHubScraper, api_key):
self.api_key = api_key
self.client = OpenAI(api_key=api_key)

self.gitHubScraper = gitHubScraper

Expand Down Expand Up @@ -50,7 +44,7 @@ def create_file_vector(self):
print("Creating file vector...")
split_data = [Document(page_content=file) for file in files]

embeddings = OpenAIEmbeddings()
embeddings = OpenAIEmbeddings(openai_api_key=self.api_key)
vectorstore = FAISS.from_documents(split_data, embedding=embeddings)
print("File vector complete!")
return vectorstore
Expand All @@ -65,21 +59,17 @@ def create_readme_vector(self):
text_splitter = CharacterTextSplitter(chunk_size=3000, chunk_overlap=200)
split_data = [Document(page_content=chunk) for chunk in text_splitter.split_text(data)]

embeddings = OpenAIEmbeddings()
embeddings = OpenAIEmbeddings(openai_api_key=self.api_key)
vectorstore = FAISS.from_documents(split_data, embedding=embeddings)
print("Readme vector complete!")
return vectorstore

# Returns summary of file contents for a given file_path that is relevant
# to the user query
def code_summary(self, file_path, query):
custom_prompt = """
Provide a clear and concise summary on the code that you will be given as it relates to a user query.
You should reference specific parts of the code.
Be technical. Your summary will be used by another LLM to explain specific parts of the code.
Focus on those parts that are most relevant to the user query, the user may ask for specific code snippets which you will provide.
Do not speak to or address the user.
Limit your response to 200 words.
Provide a clear and concise summary on the code that you will be given as it relates to a user query. You should reference specific parts of the code. Be technical. Your summary will be used by another LLM to explain specific parts of the code. Focus on those parts that are most relevant to the user query, the user may ask for specific code snippets which you will provide. Do not speak to or address the user. Limit your response to 200 words.

Code: {code}
User Query: {query}
"""
Expand Down Expand Up @@ -115,16 +105,7 @@ def retrieve_context(self, query):
i += 1

role_prompt = f"""
As 'RepoRover', you are a specialized AI expert on the '{self.repo}' repository.
Your expertise includes detailed knowledge of the repository's structure,
critical portions of the README, and summaries of key files based on user queries.
You do not have to use the summaries of files if they are not relevant.
If they are relevant, feel free to copy them verbatum or you may choose to extract
parts of them to best answer the user.
Below is the relevant file structure, selected README excerpts, and summaries of important files.
Using this information, please provide precise answers to the following question,
referencing specific files or sections when useful.
You are responding directly to the user. Only address the user in your response.
As 'RepoRover', you are a specialized AI expert on the '{self.repo}' repository. Your expertise includes detailed knowledge of the repository's structure, critical portions of the README, and summaries of key files based on user queries. You do not have to use the summaries of files if they are not relevant. If they are relevant, feel free to copy them verbatum or you may choose to extract parts of them to best answer the user. Below is the relevant file structure, selected README excerpts, and summaries of important files. Using this information, please provide precise answers to the following question, referencing specific files or sections when useful. You may also provide code snippets and refer to functions in the files. You are responding directly to the user. Only address the user (which you should call '{self.repo} Explorer') in your response.

README.md portion: '{readme_response}'
Comma seperated file structure: '{file_response}'
Expand Down Expand Up @@ -172,5 +153,5 @@ def run_chat(self, user_input):
response_chunk = chunk.choices[0].delta.content
yield response_chunk
response += response_chunk
self.update_history("assistant", response)

self.update_history("assistant", response)
37 changes: 17 additions & 20 deletions github_scraper.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,31 @@
import requests

IGNORE_EXTS = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.webp', '.svg', '.mp4', '.mp3',
'.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx', '.zip', '.tar', '.gz', '.rar',
'.7z', '.exe', '.dll', '.jar', '.war', '.class']
IGNORE_EXTS = ['jpg', 'jpeg', 'png', 'gif', 'bmp', 'tiff', 'webp', 'svg', 'mp4', 'mp3',
'doc', 'docx', 'xls', 'xlsx', 'ppt', 'pptx', 'zip', 'tar', 'gz', 'rar',
'7z', 'exe', 'dll', 'jar', 'war', 'class']


class GitHubScraper:

def __init__(self, github_url, branch=None, condensed=False):
def __init__(self, github_url, token=None, branch=None, condensed=False):
self.github_url = github_url
self.token = token
self.header = {'Authorization': f'token {self.token}'} if self.token else {}
self.owner, self.repo = self.get_github_repo_info()
self.branch = self.get_default_branch() if branch is None else branch

self.file_contents = {}
self.root_readme = ""
self.file_paths = []
self.set_files(condensed)

# Getters
def get_repo_name(self):
return self.repo

def get_file_paths(self):
return self.file_paths

def get_readme(self):
return self.root_readme

Expand All @@ -34,7 +37,7 @@ def get_github_repo_info(self):

def get_default_branch(self):
url = f"https://api.github.com/repos/{self.owner}/{self.repo}"
response = requests.get(url)
response = requests.get(url, headers=self.header)
if response.status_code == 200:
data = response.json()
return data.get('default_branch', 'master')
Expand All @@ -44,7 +47,7 @@ def get_default_branch(self):

def set_files(self, condensed=False):
url = f"https://api.github.com/repos/{self.owner}/{self.repo}/git/trees/{self.branch}?recursive=1"
response = requests.get(url)
response = requests.get(url, headers=self.header)

if response.status_code == 200:
data = response.json()
Expand All @@ -56,18 +59,21 @@ def set_files(self, condensed=False):
if file_extension not in IGNORE_EXTS:
if file_name == 'readme.md':
# must use correct casing to get file
self.root_readme = self.get_file_raw(file['path'])
self.root_readme = self.get_file_raw(file['path'])
files.append(file['path'])
if condensed:
files = self._condense_file_structure(files)
self.file_paths = files
else:
print("Error:", response.status_code, response.text)


def get_file_raw(self, file_path):
if file_path in self.file_contents:
return self.file_contents[file_path]

url = f'https://api.github.com/repos/{self.owner}/{self.repo}/contents/{file_path}?ref={self.branch}'
headers = {'Accept': 'application/vnd.github.v3.raw'}
headers = self.header.copy()
headers['Accept'] = 'application/vnd.github.v3.raw'

response = requests.get(url, headers=headers)

Expand Down Expand Up @@ -98,12 +104,3 @@ def _condense_file_structure(self, file_paths):
formatted_structure += " " * current_depth + path_segments[-1] + "\n"

return formatted_structure


if __name__ == "__main__":
# Replace with your GitHub URL
github_url = 'https://github.com/Stability-AI/generative-models'

scraper = GitHubScraper(github_url)
print(scraper.root_readme)
print(scraper.file_paths)
10 changes: 8 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
streamlit
altair==4
altair
openai
numpy
tiktoken
langchain
langchain==0.0.353
langchain_openai
langchain_community==0.0.9
langchain_core
langsmith==0.0.77
faiss-cpu
python-dotenv