From 58105eedecdd9c0be2df68b6b9558e943f55b101 Mon Sep 17 00:00:00 2001 From: John Woods Date: Wed, 12 Nov 2025 17:47:50 -0700 Subject: [PATCH 1/7] Add .env support --- rcg/README.md | 2 +- rcg/llm.py | 4 ++-- rcg/main.py | 4 ++++ 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/rcg/README.md b/rcg/README.md index 0f70c31b..801b5bd1 100644 --- a/rcg/README.md +++ b/rcg/README.md @@ -31,7 +31,7 @@ Unity executes → Camera updates → Gradio shows result ```bash # Clone and install dependencies cd rcg -pip install gradio pillow openai +pip install gradio pillow openai python-dotenv # Install pyrcareworld cd ../pyrcareworld diff --git a/rcg/llm.py b/rcg/llm.py index 62814611..2f3546cd 100644 --- a/rcg/llm.py +++ b/rcg/llm.py @@ -29,8 +29,8 @@ class LLMConfig: # OpenAI API settings (using custom Qwen3 API endpoint) API_KEY = os.getenv("OPENAI_API_KEY", "") - BASE_URL = os.getenv("OPENAI_BASE_URL", "") - MODEL = os.getenv("OPENAI_MODEL", "") + BASE_URL = os.getenv("OPENAI_BASE_URL", "https://api.openai.com/v1") + MODEL = os.getenv("OPENAI_MODEL", "gpt-3.5-turbo") # Temperature and other params TEMPERATURE = 0.7 diff --git a/rcg/main.py b/rcg/main.py index ffc8be9c..4faec524 100644 --- a/rcg/main.py +++ b/rcg/main.py @@ -10,6 +10,7 @@ import sys import argparse from pathlib import Path +from dotenv import load_dotenv # Add project root to path _PROJECT_ROOT = Path(__file__).parent.parent @@ -157,6 +158,9 @@ def parse_args(): def main(): """Main entry point.""" + # Load environment variables + load_dotenv() + # Parse arguments args = parse_args() use_gradio = not args.no_gradio From 3506e51006e851363235c84b86016acd85220fc6 Mon Sep 17 00:00:00 2001 From: John Woods Date: Thu, 20 Nov 2025 11:00:52 -0700 Subject: [PATCH 2/7] Added camera control --- rcg/gradio_ui.py | 169 ++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 151 insertions(+), 18 deletions(-) diff --git a/rcg/gradio_ui.py b/rcg/gradio_ui.py index 65fdde15..436c720d 100644 --- a/rcg/gradio_ui.py +++ b/rcg/gradio_ui.py @@ -81,7 +81,7 @@ def camera_loop(): _latest_frame = np.array(image) # ~10 FPS = ~0.1 second delay - time.sleep(0.1) + # time.sleep(0.01) except Exception as e: print(f"[Camera Feed Error] {e}") @@ -112,7 +112,7 @@ def get_latest_frame(): # ============================================================================ -# Control Button Functions +# Movement Control Button Functions # ============================================================================ def move_up() -> str: @@ -158,7 +158,7 @@ def move_left() -> str: try: with _unity_lock: # Thread safety _global_robot.IKTargetDoMove( - position=[-MOVEMENT_DISTANCE, 0, 0], + position=[MOVEMENT_DISTANCE, 0, 0], duration=1.0, speed_based=False, relative=True @@ -177,7 +177,7 @@ def move_right() -> str: try: with _unity_lock: # Thread safety _global_robot.IKTargetDoMove( - position=[MOVEMENT_DISTANCE, 0, 0], + position=[-MOVEMENT_DISTANCE, 0, 0], duration=1.0, speed_based=False, relative=True @@ -216,6 +216,110 @@ def release_action() -> str: traceback.print_exc() return f"✗ Error: {str(e)}" +# ============================================================================ +# Camera Control Button Functions +# ============================================================================ + +def move_camera_up() -> str: + """Move camera up by configured distance.""" + try: + with _unity_lock: # Thread safety + _global_camera.DoMove( + position=[0, MOVEMENT_DISTANCE, 0], + duration=1.0, + speed_based=False, + relative=True + ) + return f"✓ Moved camera up {int(MOVEMENT_DISTANCE*100)}cm" + except Exception as e: + import traceback + traceback.print_exc() + return f"✗ Error: {str(e)}" + + +def move_camera_down() -> str: + """Move camera down by configured distance.""" + try: + with _unity_lock: # Thread safety + _global_camera.DoMove( + position=[0, -MOVEMENT_DISTANCE, 0], + duration=1.0, + speed_based=False, + relative=True + ) + return f"✓ Moved camera down {int(MOVEMENT_DISTANCE*100)}cm" + except Exception as e: + import traceback + traceback.print_exc() + return f"✗ Error: {str(e)}" + + +def move_camera_left() -> str: + """Move camera left by configured distance.""" + try: + with _unity_lock: # Thread safety + _global_camera.DoMove( + position=[MOVEMENT_DISTANCE, 0, 0], + duration=1.0, + speed_based=False, + relative=True + ) + return f"✓ Moved camera left {int(MOVEMENT_DISTANCE*100)}cm" + except Exception as e: + import traceback + traceback.print_exc() + return f"✗ Error: {str(e)}" + + +def move_camera_right() -> str: + """Move camera right by configured distance.""" + try: + with _unity_lock: # Thread safety + _global_camera.DoMove( + position=[-MOVEMENT_DISTANCE, 0, 0], + duration=1.0, + speed_based=False, + relative=True + ) + return f"✓ Moved camera right {int(MOVEMENT_DISTANCE*100)}cm" + except Exception as e: + import traceback + traceback.print_exc() + return f"✗ Error: {str(e)}" + +def move_camera_forward() -> str: + """Move camera forward by configured distance.""" + try: + with _unity_lock: # Thread safety + _global_camera.DoMove( + position=[0, 0, -MOVEMENT_DISTANCE], + duration=1.0, + speed_based=False, + relative=True + ) + return f"✓ Moved camera forward {int(MOVEMENT_DISTANCE*100)}cm" + except Exception as e: + import traceback + traceback.print_exc() + return f"✗ Error: {str(e)}" + + +def move_camera_back() -> str: + """Move camera back by configured distance.""" + try: + with _unity_lock: # Thread safety + _global_camera.DoMove( + position=[0, 0, MOVEMENT_DISTANCE], + duration=1.0, + speed_based=False, + relative=True + ) + return f"✓ Moved camera back {int(MOVEMENT_DISTANCE*100)}cm" + except Exception as e: + import traceback + traceback.print_exc() + return f"✗ Error: {str(e)}" + # ============================================================================ # Chat Interface Functions @@ -323,12 +427,42 @@ def create_interface() -> gr.Blocks: # Title gr.Markdown("# RCareGen") - # Top: Camera Feed + # Top: Camera Feed and controls with gr.Row(): - camera_feed = gr.Image( - label="Robot Camera Feed (~10 FPS)", - type="numpy" - ) + with gr.Column(scale=1): + with gr.Group(): + gr.Markdown(f"**Camera Controls** ({int(MOVEMENT_DISTANCE*100)}cm increments)") + + # 3x3 Grid for directional controls with equal-width columns + # Row 1: Empty, Up, Empty + with gr.Row(): + gr.HTML("
") # Empty spacer + camera_up = gr.Button("⬆️ Up", variant="primary", elem_classes=["direction-btn"]) + gr.HTML("
") # Empty spacer + + # Row 2: Left, Empty, Right + with gr.Row(): + camera_left = gr.Button("⬅️ Left", variant="primary", elem_classes=["direction-btn"]) + gr.HTML("") # Empty spacer + camera_right = gr.Button("➡️ Right", variant="primary", elem_classes=["direction-btn"]) + + # Row 3: Empty, Down, Empty + with gr.Row(): + gr.HTML("") # Empty spacer + camera_down = gr.Button("⬇️ Down", variant="primary", elem_classes=["direction-btn"]) + gr.HTML("") # Empty spacer + + # Row 4: Forward, Back + with gr.Row(): + camera_forward = gr.Button("Forward", variant="primary", elem_classes=["direction-btn"]) + camera_back = gr.Button("Back", variant="primary", elem_classes=["direction-btn"]) + + # Right Panel: Camera Feed + with gr.Column(scale=2): + camera_feed = gr.Image( + label="Robot Camera Feed (~10 FPS)", + type="numpy" + ) # Timer for periodic camera updates (Gradio 4.0 style) timer = gr.Timer(value=0.1, active=True) @@ -346,29 +480,20 @@ def create_interface() -> gr.Blocks: # 3x3 Grid for directional controls with equal-width columns # Row 1: Empty, Up, Empty with gr.Row(): - with gr.Column(scale=1): gr.HTML("") # Empty spacer - with gr.Column(scale=1): btn_up = gr.Button("⬆️ Up", variant="primary", elem_classes=["direction-btn"]) - with gr.Column(scale=1): gr.HTML("") # Empty spacer # Row 2: Left, Empty, Right with gr.Row(): - with gr.Column(scale=1): btn_left = gr.Button("⬅️ Left", variant="primary", elem_classes=["direction-btn"]) - with gr.Column(scale=1): gr.HTML("") # Empty spacer - with gr.Column(scale=1): btn_right = gr.Button("➡️ Right", variant="primary", elem_classes=["direction-btn"]) # Row 3: Empty, Down, Empty with gr.Row(): - with gr.Column(scale=1): gr.HTML("") # Empty spacer - with gr.Column(scale=1): btn_down = gr.Button("⬇️ Down", variant="primary", elem_classes=["direction-btn"]) - with gr.Column(scale=1): gr.HTML("") # Empty spacer with gr.Group(): @@ -435,6 +560,14 @@ def create_interface() -> gr.Blocks: outputs=camera_feed ) + # Camera movement button handlers + camera_up.click(fn=move_camera_up, inputs=None, outputs=btn_status) + camera_down.click(fn=move_camera_down, inputs=None, outputs=btn_status) + camera_left.click(fn=move_camera_left, inputs=None, outputs=btn_status) + camera_right.click(fn=move_camera_right, inputs=None, outputs=btn_status) + camera_forward.click(fn=move_camera_forward, inputs=None, outputs=btn_status) + camera_back.click(fn=move_camera_back, inputs=None, outputs=btn_status) + # Movement button handlers btn_up.click(fn=move_up, inputs=None, outputs=btn_status) btn_down.click(fn=move_down, inputs=None, outputs=btn_status) From 505fefe902899f8a015374fb5d5808fb04efcd4b Mon Sep 17 00:00:00 2001 From: John Woods Date: Thu, 20 Nov 2025 18:50:05 -0700 Subject: [PATCH 3/7] Added multi function support --- rcg/gradio_ui.py | 12 +-- rcg/llm.py | 190 +++++++++++++++++------------------------------ rcg/prompt.py | 16 +++- 3 files changed, 88 insertions(+), 130 deletions(-) diff --git a/rcg/gradio_ui.py b/rcg/gradio_ui.py index 436c720d..8f17ee8e 100644 --- a/rcg/gradio_ui.py +++ b/rcg/gradio_ui.py @@ -349,13 +349,13 @@ def process_chat_message(message: str, history: List[dict]) -> Tuple[List[dict], # Add function call info if available if result.get("function_called"): - func_result = result.get("function_result", {}) - func_name = result["function_called"] + for i, func_name in enumerate(result.get("function_called")): + func_result = result.get("function_result")[i] - if func_result.get("success"): - response_parts.append(f"**[Function: {func_name}]** ✓ {func_result.get('message', 'Done')}") - else: - response_parts.append(f"**[Function: {func_name}]** ✗ {func_result.get('message', 'Failed')}") + if func_result.get("success"): + response_parts.append(f"**[Function: {func_name}]** ✓ {func_result.get('message', 'Done')}") + else: + response_parts.append(f"**[Function: {func_name}]** ✗ {func_result.get('message', 'Failed')}") # Add LLM response response_parts.append(result["llm_response"]) diff --git a/rcg/llm.py b/rcg/llm.py index 8328f595..448b0b61 100644 --- a/rcg/llm.py +++ b/rcg/llm.py @@ -8,7 +8,7 @@ # Import prompts from prompt.py from rcg.prompt import ( SYSTEM_PROMPT, - FUNCTION_SCHEMAS, + TOOL_SCHEMAS, get_error_message, get_success_message ) @@ -22,6 +22,9 @@ OpenAI = None print("[Warning] OpenAI package not installed. Install with: pip install openai") +# Load environment variables +from dotenv import load_dotenv +load_dotenv() # ============================================================================ # Configuration @@ -763,53 +766,63 @@ def process_command(self, user_input: str) -> Dict[str, Any]: response = client.chat.completions.create( model=LLMConfig.MODEL, messages=self.conversation_history, - functions=FUNCTION_SCHEMAS, - function_call="auto", + tools=TOOL_SCHEMAS, + tool_choice="auto", + parallel_tool_calls=True, temperature=LLMConfig.TEMPERATURE ) message = response.choices[0].message + print(message) # Check for function call - if hasattr(message, 'function_call') and message.function_call: - function_name = message.function_call.name - function_args = json.loads(message.function_call.arguments) - - # Log function call - if self.enable_logging: - self._write_log(f"FUNCTION CALL: {function_name}") - self._write_log(f"Arguments: {json.dumps(function_args, indent=2, ensure_ascii=False)}") - self._write_log("") + if hasattr(message, 'tool_calls') and message.tool_calls: + function_names = [] + function_args_list = [] + function_results = [] + + for call in message.tool_calls: + function_name = call.function.name + function_names.append(function_name) + function_args = json.loads(call.function.arguments) + function_args_list.append(function_args) + + # Log function call + if self.enable_logging: + self._write_log(f"FUNCTION CALL: {function_name}") + self._write_log(f"Arguments: {json.dumps(function_args, indent=2, ensure_ascii=False)}") + self._write_log("") - if LLMConfig.SHOW_FUNCTION_CALLS: - print(f"\n[LLM] Calling function: {function_name}") - print(f"[LLM] Arguments: {json.dumps(function_args, indent=2)}") + if LLMConfig.SHOW_FUNCTION_CALLS: + print(f"\n[LLM] Calling function: {function_name}") + print(f"[LLM] Arguments: {json.dumps(function_args, indent=2)}") - # Execute function - function_result = execute_function(function_name, function_args) + # Execute function + function_result = execute_function(function_name, function_args) + function_results.append(function_result if function_result else {}) - # Log function result - if self.enable_logging: - self._write_log(f"FUNCTION RESULT:") - self._write_log(f" Success: {function_result.get('success', False)}") - self._write_log(f" Message: {function_result.get('message', 'N/A')}") - if function_result.get('data'): - data_str = json.dumps(function_result['data'], indent=2, ensure_ascii=False) - self._write_log(f" Data: {data_str}") - self._write_log("") - - # Add to history - self.conversation_history.append({ - "role": "assistant", - "content": None, - "function_call": {"name": function_name, "arguments": json.dumps(function_args)} - }) - - self.conversation_history.append({ - "role": "function", - "name": function_name, - "content": json.dumps(function_result) - }) + # Log function result + if self.enable_logging: + self._write_log(f"FUNCTION RESULT:") + self._write_log(f" Success: {function_result.get('success', False)}") + self._write_log(f" Message: {function_result.get('message', 'N/A')}") + if function_result.get('data'): + data_str = json.dumps(function_result['data'], indent=2, ensure_ascii=False) + self._write_log(f" Data: {data_str}") + self._write_log("") + + # Add to history + self.conversation_history.append({ + "role": "assistant", + "content": None, + "function_call": {"name": function_name, "arguments": json.dumps(function_args)} + }) + + self.conversation_history.append({ + "role": "function", + "name": function_name, + "content": json.dumps(function_result) + }) # Get final response final_response = client.chat.completions.create( @@ -829,95 +842,26 @@ def process_command(self, user_input: str) -> Dict[str, Any]: return { "success": True, - "function_called": function_name, - "function_args": function_args, - "function_result": function_result, + "function_called": function_names, + "function_args": function_args_list, + "function_result": function_results, "llm_response": final_message } else: - # No standard function call - try manual parsing - assistant_message = message.content - - # Try to parse manual function call from text - parsed = self._parse_manual_function_call(assistant_message) - - if parsed: - # Found manual function call! - function_name, function_args = parsed + # No function call at all + self.conversation_history.append({"role": "assistant", "content": assistant_message}) - # Log manual function call - if self.enable_logging: - self._write_log(f"MANUAL FUNCTION CALL DETECTED: {function_name}") - self._write_log(f"Arguments: {json.dumps(function_args, indent=2, ensure_ascii=False)}") - self._write_log("") - - if LLMConfig.SHOW_FUNCTION_CALLS: - print(f"\n[LLM] Manual function call: {function_name}") - print(f"[LLM] Arguments: {json.dumps(function_args, indent=2)}") - - # Execute function - function_result = execute_function(function_name, function_args) - - # Log function result - if self.enable_logging: - self._write_log(f"FUNCTION RESULT:") - self._write_log(f" Success: {function_result.get('success', False)}") - self._write_log(f" Message: {function_result.get('message', 'N/A')}") - if function_result.get('data'): - data_str = json.dumps(function_result['data'], indent=2, ensure_ascii=False) - self._write_log(f" Data: {data_str}") - self._write_log("") - - # Add to history - self.conversation_history.append({"role": "assistant", "content": assistant_message}) - - # Create user message with function result - result_message = f"Function {function_name} returned: {json.dumps(function_result)}" - self.conversation_history.append({"role": "user", "content": result_message}) - - # Get final response - final_response = client.chat.completions.create( - model=LLMConfig.MODEL, - messages=self.conversation_history, - temperature=LLMConfig.TEMPERATURE - ) - - final_message = final_response.choices[0].message.content - - # Remove tags from final message - import re - final_message = re.sub(r'.*?', '', final_message, flags=re.DOTALL).strip() - - self.conversation_history.append({"role": "assistant", "content": final_message}) - - # Log LLM response - if self.enable_logging: - self._write_log(f"LLM RESPONSE:") - self._write_log(final_message) - self._write_log("") - - return { - "success": True, - "function_called": function_name, - "function_args": function_args, - "function_result": function_result, - "llm_response": final_message - } - else: - # No function call at all - self.conversation_history.append({"role": "assistant", "content": assistant_message}) - - # Log LLM response - if self.enable_logging: - self._write_log(f"LLM RESPONSE (no function call):") - self._write_log(assistant_message) - self._write_log("") + # Log LLM response + if self.enable_logging: + self._write_log(f"LLM RESPONSE (no function call):") + self._write_log(assistant_message) + self._write_log("") - return { - "success": True, - "function_called": None, - "llm_response": assistant_message - } + return { + "success": True, + "function_called": None, + "llm_response": assistant_message + } except Exception as e: return { diff --git a/rcg/prompt.py b/rcg/prompt.py index 2fe39aad..71cca18d 100644 --- a/rcg/prompt.py +++ b/rcg/prompt.py @@ -15,7 +15,7 @@ # System Prompt # ============================================================================ -SYSTEM_PROMPT = """You control a Kinova Gen3 robotic arm in Unity. Be concise and direct. +SYSTEM_PROMPT_FULL = """You control a Kinova Gen3 robotic arm in Unity. Be concise and direct. ## ⚠️ CRITICAL: Coordinate System Unity uses: **X = left/right, Y = UP/DOWN (vertical), Z = forward/back** @@ -125,6 +125,18 @@ 6. **NEVER use Z-axis for up/down movement! Always use Y-axis!** """ +SYSTEM_PROMPT = """You control a Kinova Gen3 robotic arm in Unity. Be concise and direct. + +## ⚠️ CRITICAL: Coordinate System +Unity uses: **X = left/right, Y = UP/DOWN (vertical), Z = forward/back** +- Move UP → increase Y (y > 0) +- Move DOWN → decrease Y (y < 0) +- Move LEFT → decrease X (x < 0) +- Move RIGHT → increase X (x > 0) +- Move FORWARD → increase Z (z > 0) +- Move BACKWARD → decrease Z (z < 0) +""" + # ============================================================================ # Function Schemas for OpenAI Function Calling @@ -243,6 +255,8 @@ } ] +TOOL_SCHEMAS = [{"type": "function", "function": f} for f in FUNCTION_SCHEMAS] + # ============================================================================ # User Prompt Templates From 2d800c7c4ec41ae59858f4bb455402b5f9b7df8f Mon Sep 17 00:00:00 2001 From: John Woods Date: Thu, 20 Nov 2025 20:18:53 -0700 Subject: [PATCH 4/7] Fixed no tool call bug --- rcg/llm.py | 7 +++---- rcg/prompt.py | 1 + 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/rcg/llm.py b/rcg/llm.py index 448b0b61..b9b120ff 100644 --- a/rcg/llm.py +++ b/rcg/llm.py @@ -773,7 +773,6 @@ def process_command(self, user_input: str) -> Dict[str, Any]: ) message = response.choices[0].message - print(message) # Check for function call if hasattr(message, 'tool_calls') and message.tool_calls: @@ -849,18 +848,18 @@ def process_command(self, user_input: str) -> Dict[str, Any]: } else: # No function call at all - self.conversation_history.append({"role": "assistant", "content": assistant_message}) + self.conversation_history.append({"role": "assistant", "content": message.content}) # Log LLM response if self.enable_logging: self._write_log(f"LLM RESPONSE (no function call):") - self._write_log(assistant_message) + self._write_log(message.content) self._write_log("") return { "success": True, "function_called": None, - "llm_response": assistant_message + "llm_response": message.content } except Exception as e: diff --git a/rcg/prompt.py b/rcg/prompt.py index 71cca18d..ae723dec 100644 --- a/rcg/prompt.py +++ b/rcg/prompt.py @@ -126,6 +126,7 @@ """ SYSTEM_PROMPT = """You control a Kinova Gen3 robotic arm in Unity. Be concise and direct. +Think step by step about what functions you need to call to complete the user's request. ## ⚠️ CRITICAL: Coordinate System Unity uses: **X = left/right, Y = UP/DOWN (vertical), Z = forward/back** From 804a0095fd23608f18020265c88cc47810623e24 Mon Sep 17 00:00:00 2001 From: John Woods Date: Thu, 20 Nov 2025 20:19:17 -0700 Subject: [PATCH 5/7] Switch to gpt-5.1 --- rcg/llm.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/rcg/llm.py b/rcg/llm.py index b9b120ff..af467c07 100644 --- a/rcg/llm.py +++ b/rcg/llm.py @@ -39,7 +39,7 @@ class LLMConfig: BASE_URL = os.getenv("OPENAI_BASE_URL", "https://api.openai.com/v1") # Use models supportting function calling - MODEL = os.getenv("OPENAI_MODEL", "gpt-3.5-turbo") + MODEL = os.getenv("OPENAI_MODEL", "gpt-5.1") # Temperature and other params TEMPERATURE = 0.7 @@ -814,11 +814,17 @@ def process_command(self, user_input: str) -> Dict[str, Any]: self.conversation_history.append({ "role": "assistant", "content": None, - "function_call": {"name": function_name, "arguments": json.dumps(function_args)} + "tool_calls": [{ + "function": {"name": function_name, "arguments": json.dumps(function_args)}, + "type": call.type, + "id": call.id + }] }) self.conversation_history.append({ - "role": "function", + "tool_call_id": call.id, + "role": "tool", + "type": "function_tool_output", "name": function_name, "content": json.dumps(function_result) }) From 27114d67f8d16c5b1be2658ef1b966af1c6ce9f1 Mon Sep 17 00:00:00 2001 From: John Woods Date: Mon, 1 Dec 2025 19:23:17 -0700 Subject: [PATCH 6/7] Some prompting experiments --- rcg/README.md | 2 +- rcg/prompt.py | 15 ++++++++++++++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/rcg/README.md b/rcg/README.md index 28557c09..d0869369 100644 --- a/rcg/README.md +++ b/rcg/README.md @@ -6,7 +6,7 @@ LLM-based robot control interface for RCareWorld simulation environment. ```bash # Install dependencies -pip install gradio pillow openai python-dotenv +pip install gradio==5.* pillow openai python-dotenv # Set API key (if using OpenAI) export OPENAI_API_KEY="your-key" diff --git a/rcg/prompt.py b/rcg/prompt.py index ae723dec..c52b5e69 100644 --- a/rcg/prompt.py +++ b/rcg/prompt.py @@ -126,7 +126,20 @@ """ SYSTEM_PROMPT = """You control a Kinova Gen3 robotic arm in Unity. Be concise and direct. -Think step by step about what functions you need to call to complete the user's request. +Remember, you are an agent - please keep going until the user's +query is completely resolved, before ending your turn and yielding +back to the user. Decompose the user's query into all required +sub-requests, and confirm that each is completed. Do not stop +after completing only part of the request. Only terminate your +turn when you are sure that the problem is solved. You must be +prepared to answer multiple queries and only finish the call once +the user has confirmed they're done. + +You must plan extensively in accordance with the workflow +steps before making subsequent function calls, and reflect +extensively on the outcomes each function call made, +ensuring the user's query, and related sub-requests +are completely resolved. ## ⚠️ CRITICAL: Coordinate System Unity uses: **X = left/right, Y = UP/DOWN (vertical), Z = forward/back** From 02c6d338892b7737220ef7499851064feae3d9b2 Mon Sep 17 00:00:00 2001 From: John Woods Date: Thu, 11 Dec 2025 16:08:37 -0700 Subject: [PATCH 7/7] Add token generation limit --- rcg/llm.py | 8 +++++--- rcg/prompt.py | 16 ++-------------- 2 files changed, 7 insertions(+), 17 deletions(-) diff --git a/rcg/llm.py b/rcg/llm.py index af467c07..e0e6f8ac 100644 --- a/rcg/llm.py +++ b/rcg/llm.py @@ -43,7 +43,7 @@ class LLMConfig: # Temperature and other params TEMPERATURE = 0.7 - MAX_TOKENS = None # None = no limit + MAX_TOKENS = 2048 # None = no limit # Verbose logging VERBOSE = True @@ -769,7 +769,8 @@ def process_command(self, user_input: str) -> Dict[str, Any]: tools=TOOL_SCHEMAS, tool_choice="auto", parallel_tool_calls=True, - temperature=LLMConfig.TEMPERATURE + temperature=LLMConfig.TEMPERATURE, + max_completion_tokens=LLMConfig.MAX_TOKENS ) message = response.choices[0].message @@ -833,7 +834,8 @@ def process_command(self, user_input: str) -> Dict[str, Any]: final_response = client.chat.completions.create( model=LLMConfig.MODEL, messages=self.conversation_history, - temperature=LLMConfig.TEMPERATURE + temperature=LLMConfig.TEMPERATURE, + max_completion_tokens=LLMConfig.MAX_TOKENS ) final_message = final_response.choices[0].message.content diff --git a/rcg/prompt.py b/rcg/prompt.py index c52b5e69..6f29897e 100644 --- a/rcg/prompt.py +++ b/rcg/prompt.py @@ -126,20 +126,8 @@ """ SYSTEM_PROMPT = """You control a Kinova Gen3 robotic arm in Unity. Be concise and direct. -Remember, you are an agent - please keep going until the user's -query is completely resolved, before ending your turn and yielding -back to the user. Decompose the user's query into all required -sub-requests, and confirm that each is completed. Do not stop -after completing only part of the request. Only terminate your -turn when you are sure that the problem is solved. You must be -prepared to answer multiple queries and only finish the call once -the user has confirmed they're done. - -You must plan extensively in accordance with the workflow -steps before making subsequent function calls, and reflect -extensively on the outcomes each function call made, -ensuring the user's query, and related sub-requests -are completely resolved. +Think step by step about what functions you need to call to fully complete the user's request. +Ensure that you are calling all functions necessary to achieve the desired outcome. ## ⚠️ CRITICAL: Coordinate System Unity uses: **X = left/right, Y = UP/DOWN (vertical), Z = forward/back**