sintel-dev · mzbdraidia · Apr 13, 2025
diff --git a/orionagent/agents.py b/orionagent/agents.py
@@ -1,11 +1,13 @@
 import ast
+import textwrap
 import json
 import os
+import re
 
 # import openai
 from openai import OpenAI
 
-from orionagent.rag.embeddings import find_similar
+from rag.embeddings import find_similar, load_embeddings
 
 TEMPLATE = """
 Orion is a python library for time series anomaly detection.
@@ -44,6 +46,8 @@ def __init__(self, model: str = 'gpt-4o-mini', client: OpenAI = None, system_pro
         self.client = client
         self.system_prompt = system_prompt
 
+        self.rag_table = load_embeddings(path='../embeddings.pkl')
+
         print('What can I help you with today?')
 
     def run(self, prompt: str) -> str:
@@ -57,7 +61,8 @@ def run(self, prompt: str) -> str:
         return response.choices[0].message.content
 
     def run_rag(self, user_task: str) -> str:
-        similar_documents = find_similar(user_task, k=3)
+
+        similar_documents = find_similar(user_task, table=self.rag_table, openai_client = self.client, k=3)
         documents = [
             f'DOCUMENT #{i+1}: {doc}' for i, doc in enumerate(similar_documents)
         ]
@@ -83,10 +88,42 @@ def __init__(self, llm: LLM, name: str):
         self.name = name
 
     def ask_user(self, question: str) -> str:
-        print('Asking user...')
+        # print('Asking user...')
         response = input(question)
         return response
-
+
+
+def normalize_generated_code(code: str) -> str:
+    # Remove ```python or ``` at the start, and closing ``
+    code = re.sub(r'^```(?:python)?\s*|\s*```$', '', code.strip(), flags=re.MULTILINE)
+
+    # Replace common invisible unicode with plain space
+    code = code.replace('\u00A0', ' ')  # non-breaking space
+    code = code.replace('\u200B', '')   # zero-width space
+    code = code.replace('\ufeff', '')   # BOM
+
+    # Normalize line endings and remove leading/trailing space
+    code = code.replace('\r\n', '\n').replace('\r', '\n')
+    code = textwrap.dedent(code)
+    code = code.strip()
+
+    # Remove leading blank lines
+    code = re.sub(r'^\s*\n', '', code, flags=re.MULTILINE)
+
+    # Make sure it ends with a newline (optional)
+    if not code.endswith('\n'):
+        code += '\n'
+
+
+    return code
+
+def is_code_valid(code: str) -> bool:
+    try:
+        ast.parse(code)
+        return True
+    except SyntaxError as e:
+        print(f"SyntaxError: {e}")
+        return False
 
 def execute_code(code: str, variable_name: str) -> bool:
     """Execute code block.
@@ -108,20 +145,22 @@ def execute_code(code: str, variable_name: str) -> bool:
             If the code is not executable.
     """
     try:
-        ast.parse(code) # check syntax, throws a SyntaxError
-
+        code = normalize_generated_code(code)
+        is_code_valid(code)
         # Execute the code and capture the output
         print('trying')
         exec_globals = {}
         exec_locals = {}
         exec(code, exec_globals, exec_locals)
+
+        print('executed successfully')
 
         result = exec_locals.get(variable_name, None)
         return True, result
 
     except Exception as ex:
         print(f'error {ex}')
-        return False
+        return False, None
 
 
 if __name__ == '__main__':

diff --git a/orionagent/app.py b/orionagent/app.py
@@ -5,7 +5,10 @@
 import streamlit as st
 
 from plot import generate_time_series_chart, plot_dataframe
-from agents import execute_code
+from agents import execute_code, OpenAILLM
+
+import asyncio
+asyncio.set_event_loop(asyncio.new_event_loop())
 
 st.title("Orion Agent")
 
@@ -74,10 +77,28 @@
             response = f"The number of rows in the uploaded CSV file is {len(df)}"
             st.write(response)
 
-        elif 'code' in prompt.lower():
-            response = "Executing code"
-            x = execute_code("x = 'hello'", 'x')
-            st.write(x)
+        elif 'detect anomalies' in prompt.lower():
+            anomaly_detector = OpenAILLM(client=client)
+
+            if uploaded_file:
+                prompt += f'Load the dataframe using csv file : {uploaded_file} '
+
+            st.write('Generating code to perform rag task')
+            response_code = anomaly_detector.run_rag(prompt)
+            with st.expander("View generated code and execution status"):
+                st.write(response_code)
+
+            st.write('Executing code')
+            status, anomalies = execute_code(response_code, 'anomalies')
+            st.write("execution success", status)
+
+            if len(anomalies):
+                st.write('Detected anomalies')
+                st.write(anomalies)
+            else:
+                st.write('No anomalies detected')
+            response = anomalies
+
 
         else:
             stream = client.chat.completions.create(

diff --git a/orionagent/rag/embeddings.py b/orionagent/rag/embeddings.py
@@ -1,5 +1,7 @@
 import os
 import pickle
+import streamlit as st
+
 
 import tiktoken
 import numpy as np
@@ -48,7 +50,7 @@ def generate_embeddings(openai_key, path='orion_contents/', model="text-embeddin
                 continue
 
             name = file.split('/')[-1]
-            print(file)
+            # print(file)
             with open(os.path.join(root, file), 'r') as f:
                 data = f.read()
 
@@ -94,28 +96,28 @@ def format_into_table(embeddings):
 
 
 def load_embeddings(path='embeddings.pkl'):
-    with open('embeddings.pkl', 'rb') as f:
+    with open(path, 'rb') as f:
         embeddings = pickle.load(f)
 
     table = format_into_table(embeddings)
     return table
 
 
-def find_similar(text, table, openai_key, distance=CosineSimilarity, k=5):
+def find_similar(text, openai_client, table=None, distance=CosineSimilarity, k=5):
     """Return top similar documents."""
+
     sim = distance(dim=0)
     sim_table = table.copy()
 
-    client = OpenAI(api_key=openai_key)
-    embedding = get_embedding(text, client)
+    embedding = get_embedding(text, openai_client)
 
     embedding = torch.tensor(embedding.data[0].embedding)
     sim_table['similarity'] = sim_table['embedding'].apply(lambda x: sim(torch.tensor(x), embedding))
     sim_table.sort_values('similarity', ascending=False)
     topk = sim_table.iloc[:k]['text']
     for i, document in enumerate(topk):
         print(f'DOCUMENT #{i+1}:\n')
-        print(document)
+        # print(document)
 
     return topk