|
1 | 1 | --- |
2 | 2 | title: Assistants |
3 | | ---- |
| 3 | +--- |
| 4 | + |
| 5 | +# Building Local AI Assistants |
| 6 | + |
| 7 | +While Cortex doesn't yet support the full OpenAI Assistants API, we can build assistant-like functionality |
| 8 | +using the chat completions API. Here's how to create persistent, specialized assistants locally. |
| 9 | + |
| 10 | +## Get Started |
| 11 | + |
| 12 | +First, fire up our model: |
| 13 | + |
| 14 | +```sh |
| 15 | +cortex run -d llama3.1:8b-gguf-q4-km |
| 16 | +``` |
| 17 | + |
| 18 | +Set up your Python environment: |
| 19 | + |
| 20 | +```bash |
| 21 | +mkdir assistant-test |
| 22 | +cd assistant-test |
| 23 | +python -m venv .venv |
| 24 | +source .venv/bin/activate |
| 25 | +pip install openai |
| 26 | +``` |
| 27 | + |
| 28 | +## Creating an Assistant |
| 29 | + |
| 30 | +Here's how to create an assistant-like experience using chat completions: |
| 31 | + |
| 32 | +```python |
| 33 | +from openai import OpenAI |
| 34 | +from typing import List, Dict |
| 35 | + |
| 36 | +class LocalAssistant: |
| 37 | + def __init__(self, name: str, instructions: str): |
| 38 | + self.client = OpenAI( |
| 39 | + base_url="http://localhost:39281/v1", |
| 40 | + api_key="not-needed" |
| 41 | + ) |
| 42 | + self.name = name |
| 43 | + self.instructions = instructions |
| 44 | + self.conversation_history: List[Dict] = [] |
| 45 | + |
| 46 | + def add_message(self, content: str, role: str = "user") -> str: |
| 47 | + # Add message to history |
| 48 | + self.conversation_history.append({"role": role, "content": content}) |
| 49 | + |
| 50 | + # Prepare messages with system instructions and history |
| 51 | + messages = [ |
| 52 | + {"role": "system", "content": self.instructions}, |
| 53 | + *self.conversation_history |
| 54 | + ] |
| 55 | + |
| 56 | + # Get response |
| 57 | + response = self.client.chat.completions.create( |
| 58 | + model="llama3.1:8b-gguf-q4-km", |
| 59 | + messages=messages |
| 60 | + ) |
| 61 | + |
| 62 | + # Add assistant's response to history |
| 63 | + assistant_message = response.choices[0].message.content |
| 64 | + self.conversation_history.append({"role": "assistant", "content": assistant_message}) |
| 65 | + |
| 66 | + return assistant_message |
| 67 | + |
| 68 | +# Create a coding assistant |
| 69 | +coding_assistant = LocalAssistant( |
| 70 | + name="Code Buddy", |
| 71 | + instructions="""You are a helpful coding assistant who: |
| 72 | + - Explains concepts with practical examples |
| 73 | + - Provides working code snippets |
| 74 | + - Points out potential pitfalls |
| 75 | + - Keeps responses concise but informative""" |
| 76 | +) |
| 77 | + |
| 78 | +# Ask a question |
| 79 | +response = coding_assistant.add_message("Can you explain Python list comprehensions with examples?") |
| 80 | +print(response) |
| 81 | + |
| 82 | +# Follow-up question (with conversation history maintained) |
| 83 | +response = coding_assistant.add_message("Can you show a more complex example with filtering?") |
| 84 | +print(response) |
| 85 | +``` |
| 86 | + |
| 87 | +## Specialized Assistants |
| 88 | + |
| 89 | +You can create different types of assistants by changing the instructions: |
| 90 | + |
| 91 | +```python |
| 92 | +# Math tutor assistant |
| 93 | +math_tutor = LocalAssistant( |
| 94 | + name="Math Buddy", |
| 95 | + instructions="""You are a patient math tutor who: |
| 96 | + - Breaks down problems step by step |
| 97 | + - Uses clear explanations |
| 98 | + - Provides practice problems |
| 99 | + - Encourages understanding over memorization""" |
| 100 | +) |
| 101 | + |
| 102 | +# Writing assistant |
| 103 | +writing_assistant = LocalAssistant( |
| 104 | + name="Writing Buddy", |
| 105 | + instructions="""You are a writing assistant who: |
| 106 | + - Helps improve clarity and structure |
| 107 | + - Suggests better word choices |
| 108 | + - Maintains the author's voice |
| 109 | + - Explains the reasoning behind suggestions""" |
| 110 | +) |
| 111 | +``` |
| 112 | + |
| 113 | +## Working with Context |
| 114 | + |
| 115 | +Here's how to create an assistant that can work with context: |
| 116 | + |
| 117 | +```python |
| 118 | +class ContextAwareAssistant(LocalAssistant): |
| 119 | + def __init__(self, name: str, instructions: str, context: str): |
| 120 | + super().__init__(name, instructions) |
| 121 | + self.context = context |
| 122 | + |
| 123 | + def add_message(self, content: str, role: str = "user") -> str: |
| 124 | + # Include context in the system message |
| 125 | + messages = [ |
| 126 | + {"role": "system", "content": f"{self.instructions}\n\nContext:\n{self.context}"}, |
| 127 | + *self.conversation_history, |
| 128 | + {"role": role, "content": content} |
| 129 | + ] |
| 130 | + |
| 131 | + response = self.client.chat.completions.create( |
| 132 | + model="llama3.1:8b-gguf-q4-km", |
| 133 | + messages=messages |
| 134 | + ) |
| 135 | + |
| 136 | + assistant_message = response.choices[0].message.content |
| 137 | + self.conversation_history.append({"role": role, "content": content}) |
| 138 | + self.conversation_history.append({"role": "assistant", "content": assistant_message}) |
| 139 | + |
| 140 | + return assistant_message |
| 141 | + |
| 142 | +# Example usage with code review context |
| 143 | +code_context = """ |
| 144 | +def calculate_average(numbers): |
| 145 | + total = 0 |
| 146 | + for num in numbers: |
| 147 | + total += num |
| 148 | + return total / len(numbers) |
| 149 | +""" |
| 150 | + |
| 151 | +code_reviewer = ContextAwareAssistant( |
| 152 | + name="Code Reviewer", |
| 153 | + instructions="You are a helpful code reviewer. Suggest improvements while being constructive.", |
| 154 | + context=code_context |
| 155 | +) |
| 156 | + |
| 157 | +response = code_reviewer.add_message("Can you review this code and suggest improvements?") |
| 158 | +print(response) |
| 159 | +``` |
| 160 | + |
| 161 | +## Pro Tips |
| 162 | + |
| 163 | +- Keep the conversation history focused - clear it when starting a new topic |
| 164 | +- Use specific instructions to get better responses |
| 165 | +- Consider using temperature and max_tokens parameters for different use cases |
| 166 | +- Remember that responses are stateless - maintain context yourself |
| 167 | + |
| 168 | +## Memory Management |
| 169 | + |
| 170 | +For longer conversations, you might want to limit the history: |
| 171 | + |
| 172 | +```python |
| 173 | +def trim_conversation_history(self, max_messages: int = 10): |
| 174 | + if len(self.conversation_history) > max_messages: |
| 175 | + # Keep system message and last N messages |
| 176 | + self.conversation_history = self.conversation_history[-max_messages:] |
| 177 | +``` |
| 178 | + |
| 179 | +That's it! While we don't have the full Assistants API yet, we can still create powerful assistant-like |
| 180 | +experiences using the chat completions API. The best part? It's all running locally on your machine. |
0 commit comments