diff --git a/examples/README.md b/examples/README.md index 6d76132..2f5b433 100644 --- a/examples/README.md +++ b/examples/README.md @@ -31,6 +31,34 @@ python3 examples/euler_number_usage.py - See [`docs/EULER_PRECISION_IMPACT_ANALYSIS.md`](../docs/EULER_PRECISION_IMPACT_ANALYSIS.md) for analysis of where this precision issue could manifest across the SpiralSafe ecosystem - Use `tools/scan_euler_precision.py` to scan for hardcoded approximations in your repositories +### import_traces_demo.py + +Demonstrates how to safely import trace data from JSON files with proper error handling for missing required fields. + +**Key Points:** +- āœ“ Validates required fields: `trace_id`, `state`, `input`, `output` +- āœ“ Provides clear error messages for missing/invalid data +- āœ“ Handles FileNotFoundError, JSONDecodeError, ValueError +- āœ“ Supports both single trace objects and arrays + +**Usage:** +```bash +python3 examples/import_traces_demo.py +``` + +**Topics Covered:** +- Safe JSON import with validation +- Error handling for missing fields +- Defensive programming patterns +- Clear error reporting + +**Related Tests:** +- `tests/test_import_traces.py` (pytest version) +- `tests/test_import_traces_simple.py` (standalone version) + +**Implementation:** +- See `project-book.ipynb` for the full `import_traces()` function + --- ## šŸŽÆ Purpose diff --git a/examples/import_traces_demo.py b/examples/import_traces_demo.py new file mode 100644 index 0000000..6f43ca7 --- /dev/null +++ b/examples/import_traces_demo.py @@ -0,0 +1,184 @@ +""" +Demonstration of the import_traces() function. + +This script shows how to use the import_traces() function with various scenarios. +""" +import json +import tempfile +from pathlib import Path + + +# The import_traces function (from project-book.ipynb) +def import_traces(json_file_path): + """ + Import trace data from a JSON file with proper error handling. + + See project-book.ipynb for full implementation. + """ + # Check if file exists + if not Path(json_file_path).exists(): + raise FileNotFoundError(f"Trace file not found: {json_file_path}") + + # Read and parse JSON + try: + with open(json_file_path, 'r', encoding='utf-8') as f: + data = json.load(f) + except json.JSONDecodeError as e: + raise json.JSONDecodeError( + f"Invalid JSON in file {json_file_path}: {str(e)}", + e.doc, + e.pos + ) + + # Ensure data is a list + if isinstance(data, dict): + data = [data] + elif not isinstance(data, list): + raise ValueError( + f"Expected JSON to contain a list or dict, got {type(data).__name__}" + ) + + # Required fields for trace data + required_fields = ['trace_id', 'state', 'input', 'output'] + + # Validate each trace + validated_traces = [] + errors = [] + + for idx, trace in enumerate(data): + if not isinstance(trace, dict): + errors.append(f"Trace at index {idx} is not a dictionary: {type(trace).__name__}") + continue + + # Check for missing required fields + missing_fields = [field for field in required_fields if field not in trace] + + if missing_fields: + error_msg = ( + f"Trace at index {idx} is missing required fields: {', '.join(missing_fields)}. " + f"Available fields: {', '.join(trace.keys()) if trace.keys() else 'none'}. " + f"Required fields are: {', '.join(required_fields)}" + ) + errors.append(error_msg) + continue + + # Validate that required fields are not None + none_fields = [field for field in required_fields if trace[field] is None] + if none_fields: + error_msg = ( + f"Trace at index {idx} has null values for required fields: {', '.join(none_fields)}" + ) + errors.append(error_msg) + continue + + validated_traces.append(trace) + + # If we have errors, raise a comprehensive error message + if errors: + error_summary = f"Found {len(errors)} invalid trace(s) in {json_file_path}:\n" + error_summary += "\n".join(f" - {err}" for err in errors[:5]) + if len(errors) > 5: + error_summary += f"\n ... and {len(errors) - 5} more error(s)" + raise ValueError(error_summary) + + if not validated_traces: + raise ValueError(f"No valid traces found in {json_file_path}") + + return validated_traces + + +def demo(): + """Run demonstration scenarios.""" + print("šŸ” import_traces() Function Demonstration") + print("=" * 60) + + with tempfile.TemporaryDirectory() as tmp_dir: + tmp_path = Path(tmp_dir) + + # Demo 1: Valid trace import + print("\nšŸ“‹ Demo 1: Importing valid trace data") + print("-" * 60) + valid_file = tmp_path / "valid_traces.json" + valid_data = [ + { + "trace_id": "trace_001", + "state": "completed", + "input": {"query": "What is AI?"}, + "output": {"response": "AI stands for Artificial Intelligence"} + }, + { + "trace_id": "trace_002", + "state": "pending", + "input": {"query": "How does ML work?"}, + "output": {} + } + ] + valid_file.write_text(json.dumps(valid_data, indent=2)) + + try: + traces = import_traces(str(valid_file)) + print(f"āœ… Successfully imported {len(traces)} traces:") + for trace in traces: + print(f" - Trace {trace['trace_id']}: {trace['state']}") + except Exception as e: + print(f"āŒ Error: {e}") + + # Demo 2: Missing required field + print("\nšŸ“‹ Demo 2: Handling missing required fields") + print("-" * 60) + invalid_file = tmp_path / "invalid_traces.json" + invalid_data = [{ + "trace_id": "trace_003", + "state": "completed" + # Missing 'input' and 'output' fields + }] + invalid_file.write_text(json.dumps(invalid_data, indent=2)) + + try: + traces = import_traces(str(invalid_file)) + print(f"āŒ Should have raised an error!") + except ValueError as e: + print(f"āœ… Caught error as expected:") + print(f" {str(e)[:200]}...") + except Exception as e: + print(f"āŒ Unexpected error: {e}") + + # Demo 3: File not found + print("\nšŸ“‹ Demo 3: Handling file not found") + print("-" * 60) + try: + traces = import_traces("nonexistent_file.json") + print(f"āŒ Should have raised FileNotFoundError!") + except FileNotFoundError as e: + print(f"āœ… Caught error as expected:") + print(f" {e}") + except Exception as e: + print(f"āŒ Unexpected error: {e}") + + # Demo 4: Invalid JSON + print("\nšŸ“‹ Demo 4: Handling invalid JSON") + print("-" * 60) + malformed_file = tmp_path / "malformed.json" + malformed_file.write_text("{not valid json") + + try: + traces = import_traces(str(malformed_file)) + print(f"āŒ Should have raised JSONDecodeError!") + except json.JSONDecodeError as e: + print(f"āœ… Caught error as expected:") + print(f" Invalid JSON detected") + except Exception as e: + print(f"āŒ Unexpected error: {e}") + + print("\n" + "=" * 60) + print("✨ Demonstration complete!") + print("\nKey Features:") + print(" • Validates required fields: trace_id, state, input, output") + print(" • Provides clear error messages for missing/invalid data") + print(" • Handles FileNotFoundError, JSONDecodeError, ValueError") + print(" • Supports both single trace objects and arrays") + print(" • Allows additional fields beyond required ones") + + +if __name__ == "__main__": + demo() diff --git a/project-book.ipynb b/project-book.ipynb index 3696c29..43bae51 100644 --- a/project-book.ipynb +++ b/project-book.ipynb @@ -294,6 +294,198 @@ "outputs": [], "source": "@dataclass\nclass WaveSession:\n \"\"\"Represents a Wave collaboration session.\"\"\"\n session_id: str\n timestamp: str\n context: WaveContext\n system_prompt: str\n task: Optional[str] = None\n log_entries: List[Dict[str, Any]] = field(default_factory=list)\n \n def add_log(self, entry_type: str, content: str):\n \"\"\"Add a log entry to the session.\"\"\"\n self.log_entries.append({\n \"timestamp\": datetime.now().isoformat(),\n \"type\": entry_type,\n \"content\": content\n })\n \n def save(self, output_dir: str = \".claude/logs/sessions\"):\n \"\"\"Save the session log to a file.\"\"\"\n output_path = Path(output_dir)\n output_path.mkdir(parents=True, exist_ok=True)\n \n log_file = output_path / f\"session_{self.session_id}.json\"\n \n session_data = {\n \"session_id\": self.session_id,\n \"timestamp\": self.timestamp,\n \"task\": self.task,\n \"context\": self.context.to_dict(),\n \"system_prompt\": self.system_prompt,\n \"log_entries\": self.log_entries\n }\n \n with open(log_file, \"w\", encoding=\"utf-8\") as f:\n json.dump(session_data, f, indent=2)\n \n return str(log_file)\n\n\ndef create_wave_session(task: Optional[str] = None) -> WaveSession:\n \"\"\"\n Create a new Wave session with captured context.\n \n Args:\n task: Optional task description for the session\n \n Returns:\n WaveSession object\n \"\"\"\n ctx = get_wave_context()\n session_id = datetime.now().strftime(\"%Y%m%d_%H%M%S\")\n \n session = WaveSession(\n session_id=session_id,\n timestamp=ctx.timestamp,\n context=ctx,\n system_prompt=generate_system_prompt(ctx),\n task=task\n )\n \n session.add_log(\"session_start\", f\"Session created: {session_id}\")\n if task:\n session.add_log(\"task\", task)\n \n return session\n\n\n# Create a demo session\ndemo_session = create_wave_session(\"Explore Wave Toolkit Project Book\")\nprint(f\"\ud83d\udcdd Session Created: {demo_session.session_id}\")\nprint(f\" Task: {demo_session.task}\")\nprint(f\" Log Entries: {len(demo_session.log_entries)}\")" }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def import_traces(json_file_path: str) -> List[Dict[str, Any]]:", + " \"\"\"", + " Import trace data from a JSON file with proper error handling.", + " ", + " Handles JSON files containing trace data with required fields:", + " - trace_id: Unique identifier for the trace", + " - state: Current state of the trace", + " - input: Input data for the trace", + " - output: Output data from the trace", + " ", + " Args:", + " json_file_path: Path to the JSON file containing trace data", + " ", + " Returns:", + " List of trace dictionaries with validated fields", + " ", + " Raises:", + " FileNotFoundError: If the JSON file doesn't exist", + " json.JSONDecodeError: If the file contains invalid JSON", + " ValueError: If required fields are missing or invalid", + " ", + " Example:", + " >>> traces = import_traces(\"traces.json\")", + " >>> for trace in traces:", + " ... print(f\"Trace {trace['trace_id']}: {trace['state']}\")", + " \"\"\"", + " # Check if file exists", + " if not Path(json_file_path).exists():", + " raise FileNotFoundError(f\"Trace file not found: {json_file_path}\")", + " ", + " # Read and parse JSON", + " try:", + " with open(json_file_path, 'r', encoding='utf-8') as f:", + " data = json.load(f)", + " except json.JSONDecodeError as e:", + " raise json.JSONDecodeError(", + " f\"Invalid JSON in file {json_file_path}: {str(e)}\", ", + " e.doc, ", + " e.pos", + " )", + " ", + " # Ensure data is a list", + " if isinstance(data, dict):", + " # If it's a single trace object, wrap it in a list", + " data = [data]", + " elif not isinstance(data, list):", + " raise ValueError(", + " f\"Expected JSON to contain a list or dict, got {type(data).__name__}\"", + " )", + " ", + " # Required fields for trace data", + " required_fields = ['trace_id', 'state', 'input', 'output']", + " ", + " # Validate each trace", + " validated_traces = []", + " errors = []", + " ", + " for idx, trace in enumerate(data):", + " if not isinstance(trace, dict):", + " errors.append(f\"Trace at index {idx} is not a dictionary: {type(trace).__name__}\")", + " continue", + " ", + " # Check for missing required fields", + " missing_fields = [field for field in required_fields if field not in trace]", + " ", + " if missing_fields:", + " # Build a helpful error message", + " error_msg = (", + " f\"Trace at index {idx} is missing required fields: {', '.join(missing_fields)}. \"", + " f\"Available fields: {', '.join(trace.keys()) if trace.keys() else 'none'}. \"", + " f\"Required fields are: {', '.join(required_fields)}\"", + " )", + " errors.append(error_msg)", + " continue", + " ", + " # Validate that required fields are not None", + " none_fields = [field for field in required_fields if trace[field] is None]", + " if none_fields:", + " error_msg = (", + " f\"Trace at index {idx} has null values for required fields: {', '.join(none_fields)}\"", + " )", + " errors.append(error_msg)", + " continue", + " ", + " validated_traces.append(trace)", + " ", + " # If we have errors, raise a comprehensive error message", + " if errors:", + " error_summary = f\"Found {len(errors)} invalid trace(s) in {json_file_path}:\\n\"", + " error_summary += \"\\n\".join(f\" - {err}\" for err in errors[:5]) # Show first 5 errors", + " if len(errors) > 5:", + " error_summary += f\"\\n ... and {len(errors) - 5} more error(s)\"", + " raise ValueError(error_summary)", + " ", + " if not validated_traces:", + " raise ValueError(f\"No valid traces found in {json_file_path}\")", + " ", + " return validated_traces", + "", + "", + "# Example usage and test", + "def _test_import_traces():", + " \"\"\"Test the import_traces function with various scenarios.\"\"\"", + " print(\"\\n\ud83e\uddea Testing import_traces function...\")", + " ", + " # Create test directory", + " test_dir = Path(\".claude/test_traces\")", + " test_dir.mkdir(parents=True, exist_ok=True)", + " ", + " # Test 1: Valid trace data", + " valid_trace_file = test_dir / \"valid_traces.json\"", + " valid_data = [", + " {", + " \"trace_id\": \"trace_001\",", + " \"state\": \"completed\",", + " \"input\": {\"query\": \"Hello\"},", + " \"output\": {\"response\": \"Hi there!\"}", + " },", + " {", + " \"trace_id\": \"trace_002\", ", + " \"state\": \"pending\",", + " \"input\": {\"query\": \"How are you?\"},", + " \"output\": {}", + " }", + " ]", + " with open(valid_trace_file, 'w') as f:", + " json.dump(valid_data, f, indent=2)", + " ", + " try:", + " traces = import_traces(str(valid_trace_file))", + " print(f\"\u2705 Test 1 passed: Loaded {len(traces)} valid traces\")", + " except Exception as e:", + " print(f\"\u274c Test 1 failed: {e}\")", + " ", + " # Test 2: Missing required field", + " invalid_trace_file = test_dir / \"invalid_traces.json\"", + " invalid_data = [", + " {", + " \"trace_id\": \"trace_003\",", + " \"state\": \"completed\",", + " # Missing 'input' and 'output'", + " }", + " ]", + " with open(invalid_trace_file, 'w') as f:", + " json.dump(invalid_data, f, indent=2)", + " ", + " try:", + " traces = import_traces(str(invalid_trace_file))", + " print(f\"\u274c Test 2 failed: Should have raised ValueError for missing fields\")", + " except ValueError as e:", + " if \"missing required fields\" in str(e):", + " print(f\"\u2705 Test 2 passed: Caught missing fields error\")", + " else:", + " print(f\"\u274c Test 2 failed: Wrong error message: {e}\")", + " except Exception as e:", + " print(f\"\u274c Test 2 failed: Unexpected error: {e}\")", + " ", + " # Test 3: File not found", + " try:", + " traces = import_traces(\"nonexistent_file.json\")", + " print(f\"\u274c Test 3 failed: Should have raised FileNotFoundError\")", + " except FileNotFoundError:", + " print(f\"\u2705 Test 3 passed: Caught file not found error\")", + " except Exception as e:", + " print(f\"\u274c Test 3 failed: Unexpected error: {e}\")", + " ", + " # Test 4: Invalid JSON", + " malformed_file = test_dir / \"malformed.json\"", + " with open(malformed_file, 'w') as f:", + " f.write(\"{invalid json content\")", + " ", + " try:", + " traces = import_traces(str(malformed_file))", + " print(f\"\u274c Test 4 failed: Should have raised JSONDecodeError\")", + " except json.JSONDecodeError:", + " print(f\"\u2705 Test 4 passed: Caught invalid JSON error\")", + " except Exception as e:", + " print(f\"\u274c Test 4 failed: Unexpected error: {e}\")", + " ", + " print(\"\\n\u2728 Testing complete!\")", + "", + "# Uncomment to run tests", + "# _test_import_traces()", + "" + ] + }, { "cell_type": "markdown", "metadata": {}, diff --git a/tests/test_import_traces.py b/tests/test_import_traces.py new file mode 100644 index 0000000..b0626b7 --- /dev/null +++ b/tests/test_import_traces.py @@ -0,0 +1,354 @@ +""" +Tests for the import_traces function. + +Tests error handling for missing required fields in trace JSON files. +""" +import json +import pytest +import tempfile +from pathlib import Path +from typing import List, Dict, Any + + +# Function is defined locally for testing purposes. +# The actual implementation is in project-book.ipynb. +def import_traces(json_file_path: str) -> List[Dict[str, Any]]: + """ + Import trace data from a JSON file with proper error handling. + + Handles JSON files containing trace data with required fields: + - trace_id: Unique identifier for the trace + - state: Current state of the trace + - input: Input data for the trace + - output: Output data from the trace + + Args: + json_file_path: Path to the JSON file containing trace data + + Returns: + List of trace dictionaries with validated fields + + Raises: + FileNotFoundError: If the JSON file doesn't exist + json.JSONDecodeError: If the file contains invalid JSON + ValueError: If required fields are missing or invalid + """ + # Check if file exists + if not Path(json_file_path).exists(): + raise FileNotFoundError(f"Trace file not found: {json_file_path}") + + # Read and parse JSON + try: + with open(json_file_path, 'r', encoding='utf-8') as f: + data = json.load(f) + except json.JSONDecodeError as e: + raise json.JSONDecodeError( + f"Invalid JSON in file {json_file_path}: {str(e)}", + e.doc, + e.pos + ) + + # Ensure data is a list + if isinstance(data, dict): + # If it's a single trace object, wrap it in a list + data = [data] + elif not isinstance(data, list): + raise ValueError( + f"Expected JSON to contain a list or dict, got {type(data).__name__}" + ) + + # Required fields for trace data + required_fields = ['trace_id', 'state', 'input', 'output'] + + # Validate each trace + validated_traces = [] + errors = [] + + for idx, trace in enumerate(data): + if not isinstance(trace, dict): + errors.append(f"Trace at index {idx} is not a dictionary: {type(trace).__name__}") + continue + + # Check for missing required fields + missing_fields = [field for field in required_fields if field not in trace] + + if missing_fields: + # Build a helpful error message + error_msg = ( + f"Trace at index {idx} is missing required fields: {', '.join(missing_fields)}. " + f"Available fields: {', '.join(trace.keys()) if trace.keys() else 'none'}. " + f"Required fields are: {', '.join(required_fields)}" + ) + errors.append(error_msg) + continue + + # Validate that required fields are not None + none_fields = [field for field in required_fields if trace[field] is None] + if none_fields: + error_msg = ( + f"Trace at index {idx} has null values for required fields: {', '.join(none_fields)}" + ) + errors.append(error_msg) + continue + + validated_traces.append(trace) + + # If we have errors, raise a comprehensive error message + if errors: + error_summary = f"Found {len(errors)} invalid trace(s) in {json_file_path}:\n" + error_summary += "\n".join(f" - {err}" for err in errors[:5]) # Show first 5 errors + if len(errors) > 5: + error_summary += f"\n ... and {len(errors) - 5} more error(s)" + raise ValueError(error_summary) + + if not validated_traces: + raise ValueError(f"No valid traces found in {json_file_path}") + + return validated_traces + + +class TestImportTraces: + """Test suite for import_traces function.""" + + def test_valid_single_trace(self, tmp_path): + """Test importing a single valid trace.""" + trace_file = tmp_path / "single_trace.json" + trace_data = { + "trace_id": "trace_001", + "state": "completed", + "input": {"query": "Hello"}, + "output": {"response": "Hi there!"} + } + trace_file.write_text(json.dumps(trace_data)) + + traces = import_traces(str(trace_file)) + + assert len(traces) == 1 + assert traces[0]["trace_id"] == "trace_001" + assert traces[0]["state"] == "completed" + assert traces[0]["input"] == {"query": "Hello"} + assert traces[0]["output"] == {"response": "Hi there!"} + + def test_valid_multiple_traces(self, tmp_path): + """Test importing multiple valid traces.""" + trace_file = tmp_path / "multiple_traces.json" + trace_data = [ + { + "trace_id": "trace_001", + "state": "completed", + "input": {"query": "Hello"}, + "output": {"response": "Hi there!"} + }, + { + "trace_id": "trace_002", + "state": "pending", + "input": {"query": "How are you?"}, + "output": {} + } + ] + trace_file.write_text(json.dumps(trace_data)) + + traces = import_traces(str(trace_file)) + + assert len(traces) == 2 + assert traces[0]["trace_id"] == "trace_001" + assert traces[1]["trace_id"] == "trace_002" + + def test_missing_trace_id(self, tmp_path): + """Test error handling when trace_id is missing.""" + trace_file = tmp_path / "missing_trace_id.json" + trace_data = [{ + "state": "completed", + "input": {"query": "Hello"}, + "output": {"response": "Hi there!"} + }] + trace_file.write_text(json.dumps(trace_data)) + + with pytest.raises(ValueError) as exc_info: + import_traces(str(trace_file)) + + assert "missing required fields" in str(exc_info.value) + assert "trace_id" in str(exc_info.value) + + def test_missing_state(self, tmp_path): + """Test error handling when state is missing.""" + trace_file = tmp_path / "missing_state.json" + trace_data = [{ + "trace_id": "trace_001", + "input": {"query": "Hello"}, + "output": {"response": "Hi there!"} + }] + trace_file.write_text(json.dumps(trace_data)) + + with pytest.raises(ValueError) as exc_info: + import_traces(str(trace_file)) + + assert "missing required fields" in str(exc_info.value) + assert "state" in str(exc_info.value) + + def test_missing_input(self, tmp_path): + """Test error handling when input is missing.""" + trace_file = tmp_path / "missing_input.json" + trace_data = [{ + "trace_id": "trace_001", + "state": "completed", + "output": {"response": "Hi there!"} + }] + trace_file.write_text(json.dumps(trace_data)) + + with pytest.raises(ValueError) as exc_info: + import_traces(str(trace_file)) + + assert "missing required fields" in str(exc_info.value) + assert "input" in str(exc_info.value) + + def test_missing_output(self, tmp_path): + """Test error handling when output is missing.""" + trace_file = tmp_path / "missing_output.json" + trace_data = [{ + "trace_id": "trace_001", + "state": "completed", + "input": {"query": "Hello"} + }] + trace_file.write_text(json.dumps(trace_data)) + + with pytest.raises(ValueError) as exc_info: + import_traces(str(trace_file)) + + assert "missing required fields" in str(exc_info.value) + assert "output" in str(exc_info.value) + + def test_missing_multiple_fields(self, tmp_path): + """Test error handling when multiple fields are missing.""" + trace_file = tmp_path / "missing_multiple.json" + trace_data = [{ + "trace_id": "trace_001" + # Missing state, input, and output + }] + trace_file.write_text(json.dumps(trace_data)) + + with pytest.raises(ValueError) as exc_info: + import_traces(str(trace_file)) + + error_msg = str(exc_info.value) + assert "missing required fields" in error_msg + assert "state" in error_msg + assert "input" in error_msg + assert "output" in error_msg + + def test_null_field_values(self, tmp_path): + """Test error handling when required fields have null values.""" + trace_file = tmp_path / "null_values.json" + trace_data = [{ + "trace_id": "trace_001", + "state": None, + "input": {"query": "Hello"}, + "output": {"response": "Hi"} + }] + trace_file.write_text(json.dumps(trace_data)) + + with pytest.raises(ValueError) as exc_info: + import_traces(str(trace_file)) + + assert "null values" in str(exc_info.value) + assert "state" in str(exc_info.value) + + def test_file_not_found(self): + """Test error handling when file doesn't exist.""" + with pytest.raises(FileNotFoundError): + import_traces("nonexistent_file.json") + + def test_invalid_json(self, tmp_path): + """Test error handling for malformed JSON.""" + trace_file = tmp_path / "invalid.json" + trace_file.write_text("{invalid json content") + + with pytest.raises(json.JSONDecodeError): + import_traces(str(trace_file)) + + def test_non_dict_non_list_json(self, tmp_path): + """Test error handling for JSON that's neither dict nor list.""" + trace_file = tmp_path / "string.json" + trace_file.write_text(json.dumps("just a string")) + + with pytest.raises(ValueError) as exc_info: + import_traces(str(trace_file)) + + assert "Expected JSON to contain a list or dict" in str(exc_info.value) + + def test_list_with_non_dict_elements(self, tmp_path): + """Test error handling for list containing non-dictionary elements.""" + trace_file = tmp_path / "mixed_types.json" + trace_data = [ + "not a dict", + { + "trace_id": "trace_001", + "state": "completed", + "input": {}, + "output": {} + } + ] + trace_file.write_text(json.dumps(trace_data)) + + with pytest.raises(ValueError) as exc_info: + import_traces(str(trace_file)) + + assert "not a dictionary" in str(exc_info.value) + + def test_empty_list(self, tmp_path): + """Test error handling for empty trace list.""" + trace_file = tmp_path / "empty.json" + trace_file.write_text(json.dumps([])) + + with pytest.raises(ValueError) as exc_info: + import_traces(str(trace_file)) + + assert "No valid traces found" in str(exc_info.value) + + def test_mixed_valid_invalid_traces(self, tmp_path): + """Test that function rejects file with any invalid traces.""" + trace_file = tmp_path / "mixed.json" + trace_data = [ + { + "trace_id": "trace_001", + "state": "completed", + "input": {}, + "output": {} + }, + { + "trace_id": "trace_002", + "state": "completed" + # Missing input and output + } + ] + trace_file.write_text(json.dumps(trace_data)) + + with pytest.raises(ValueError) as exc_info: + import_traces(str(trace_file)) + + assert "invalid trace(s)" in str(exc_info.value) + + def test_extra_fields_allowed(self, tmp_path): + """Test that traces can have additional fields beyond required ones.""" + trace_file = tmp_path / "extra_fields.json" + trace_data = [{ + "trace_id": "trace_001", + "state": "completed", + "input": {"query": "Hello"}, + "output": {"response": "Hi"}, + "metadata": {"user": "test_user"}, + "timestamp": "2024-01-01T00:00:00Z" + }] + trace_file.write_text(json.dumps(trace_data)) + + traces = import_traces(str(trace_file)) + + assert len(traces) == 1 + assert traces[0]["trace_id"] == "trace_001" + assert traces[0]["metadata"] == {"user": "test_user"} + assert traces[0]["timestamp"] == "2024-01-01T00:00:00Z" + + +if __name__ == "__main__": + # Run tests with pytest + pytest.main([__file__, "-v"]) diff --git a/tests/test_import_traces_simple.py b/tests/test_import_traces_simple.py new file mode 100644 index 0000000..7abfe95 --- /dev/null +++ b/tests/test_import_traces_simple.py @@ -0,0 +1,343 @@ +""" +Simple test runner for import_traces function (no pytest dependency). +""" +import json +import tempfile +from pathlib import Path +from typing import List, Dict, Any + + +# Function is implemented locally for standalone testing. +# The actual implementation is in project-book.ipynb. +def import_traces(json_file_path: str) -> List[Dict[str, Any]]: + """ + Import trace data from a JSON file with proper error handling. + + Handles JSON files containing trace data with required fields: + - trace_id: Unique identifier for the trace + - state: Current state of the trace + - input: Input data for the trace + - output: Output data from the trace + + Args: + json_file_path: Path to the JSON file containing trace data + + Returns: + List of trace dictionaries with validated fields + + Raises: + FileNotFoundError: If the JSON file doesn't exist + json.JSONDecodeError: If the file contains invalid JSON + ValueError: If required fields are missing or invalid + """ + # Check if file exists + if not Path(json_file_path).exists(): + raise FileNotFoundError(f"Trace file not found: {json_file_path}") + + # Read and parse JSON + try: + with open(json_file_path, 'r', encoding='utf-8') as f: + data = json.load(f) + except json.JSONDecodeError as e: + raise json.JSONDecodeError( + f"Invalid JSON in file {json_file_path}: {str(e)}", + e.doc, + e.pos + ) + + # Ensure data is a list + if isinstance(data, dict): + # If it's a single trace object, wrap it in a list + data = [data] + elif not isinstance(data, list): + raise ValueError( + f"Expected JSON to contain a list or dict, got {type(data).__name__}" + ) + + # Required fields for trace data + required_fields = ['trace_id', 'state', 'input', 'output'] + + # Validate each trace + validated_traces = [] + errors = [] + + for idx, trace in enumerate(data): + if not isinstance(trace, dict): + errors.append(f"Trace at index {idx} is not a dictionary: {type(trace).__name__}") + continue + + # Check for missing required fields + missing_fields = [field for field in required_fields if field not in trace] + + if missing_fields: + # Build a helpful error message + error_msg = ( + f"Trace at index {idx} is missing required fields: {', '.join(missing_fields)}. " + f"Available fields: {', '.join(trace.keys()) if trace.keys() else 'none'}. " + f"Required fields are: {', '.join(required_fields)}" + ) + errors.append(error_msg) + continue + + # Validate that required fields are not None + none_fields = [field for field in required_fields if trace[field] is None] + if none_fields: + error_msg = ( + f"Trace at index {idx} has null values for required fields: {', '.join(none_fields)}" + ) + errors.append(error_msg) + continue + + validated_traces.append(trace) + + # If we have errors, raise a comprehensive error message + if errors: + error_summary = f"Found {len(errors)} invalid trace(s) in {json_file_path}:\n" + error_summary += "\n".join(f" - {err}" for err in errors[:5]) # Show first 5 errors + if len(errors) > 5: + error_summary += f"\n ... and {len(errors) - 5} more error(s)" + raise ValueError(error_summary) + + if not validated_traces: + raise ValueError(f"No valid traces found in {json_file_path}") + + return validated_traces + + +def run_tests(): + """Run all tests for import_traces.""" + passed = 0 + failed = 0 + + with tempfile.TemporaryDirectory() as tmp_dir: + tmp_path = Path(tmp_dir) + + # Test 1: Valid single trace + print("Test 1: Valid single trace... ", end="") + try: + trace_file = tmp_path / "single_trace.json" + trace_data = { + "trace_id": "trace_001", + "state": "completed", + "input": {"query": "Hello"}, + "output": {"response": "Hi there!"} + } + trace_file.write_text(json.dumps(trace_data)) + traces = import_traces(str(trace_file)) + assert len(traces) == 1 + assert traces[0]["trace_id"] == "trace_001" + print("āœ… PASSED") + passed += 1 + except Exception as e: + print(f"āŒ FAILED: {e}") + failed += 1 + + # Test 2: Valid multiple traces + print("Test 2: Valid multiple traces... ", end="") + try: + trace_file = tmp_path / "multiple_traces.json" + trace_data = [ + { + "trace_id": "trace_001", + "state": "completed", + "input": {"query": "Hello"}, + "output": {"response": "Hi there!"} + }, + { + "trace_id": "trace_002", + "state": "pending", + "input": {"query": "How are you?"}, + "output": {} + } + ] + trace_file.write_text(json.dumps(trace_data)) + traces = import_traces(str(trace_file)) + assert len(traces) == 2 + assert traces[0]["trace_id"] == "trace_001" + assert traces[1]["trace_id"] == "trace_002" + print("āœ… PASSED") + passed += 1 + except Exception as e: + print(f"āŒ FAILED: {e}") + failed += 1 + + # Test 3: Missing trace_id + print("Test 3: Missing trace_id raises ValueError... ", end="") + try: + trace_file = tmp_path / "missing_trace_id.json" + trace_data = [{ + "state": "completed", + "input": {"query": "Hello"}, + "output": {"response": "Hi there!"} + }] + trace_file.write_text(json.dumps(trace_data)) + import_traces(str(trace_file)) + print("āŒ FAILED: Should have raised ValueError") + failed += 1 + except ValueError as e: + if "missing required fields" in str(e) and "trace_id" in str(e): + print("āœ… PASSED") + passed += 1 + else: + print(f"āŒ FAILED: Wrong error message: {e}") + failed += 1 + except Exception as e: + print(f"āŒ FAILED: Unexpected error: {e}") + failed += 1 + + # Test 4: Missing state + print("Test 4: Missing state raises ValueError... ", end="") + try: + trace_file = tmp_path / "missing_state.json" + trace_data = [{ + "trace_id": "trace_001", + "input": {"query": "Hello"}, + "output": {"response": "Hi there!"} + }] + trace_file.write_text(json.dumps(trace_data)) + import_traces(str(trace_file)) + print("āŒ FAILED: Should have raised ValueError") + failed += 1 + except ValueError as e: + if "missing required fields" in str(e) and "state" in str(e): + print("āœ… PASSED") + passed += 1 + else: + print(f"āŒ FAILED: Wrong error message: {e}") + failed += 1 + except Exception as e: + print(f"āŒ FAILED: Unexpected error: {e}") + failed += 1 + + # Test 5: Missing input + print("Test 5: Missing input raises ValueError... ", end="") + try: + trace_file = tmp_path / "missing_input.json" + trace_data = [{ + "trace_id": "trace_001", + "state": "completed", + "output": {"response": "Hi there!"} + }] + trace_file.write_text(json.dumps(trace_data)) + import_traces(str(trace_file)) + print("āŒ FAILED: Should have raised ValueError") + failed += 1 + except ValueError as e: + if "missing required fields" in str(e) and "input" in str(e): + print("āœ… PASSED") + passed += 1 + else: + print(f"āŒ FAILED: Wrong error message: {e}") + failed += 1 + except Exception as e: + print(f"āŒ FAILED: Unexpected error: {e}") + failed += 1 + + # Test 6: Missing output + print("Test 6: Missing output raises ValueError... ", end="") + try: + trace_file = tmp_path / "missing_output.json" + trace_data = [{ + "trace_id": "trace_001", + "state": "completed", + "input": {"query": "Hello"} + }] + trace_file.write_text(json.dumps(trace_data)) + import_traces(str(trace_file)) + print("āŒ FAILED: Should have raised ValueError") + failed += 1 + except ValueError as e: + if "missing required fields" in str(e) and "output" in str(e): + print("āœ… PASSED") + passed += 1 + else: + print(f"āŒ FAILED: Wrong error message: {e}") + failed += 1 + except Exception as e: + print(f"āŒ FAILED: Unexpected error: {e}") + failed += 1 + + # Test 7: File not found + print("Test 7: File not found raises FileNotFoundError... ", end="") + try: + import_traces("nonexistent_file.json") + print("āŒ FAILED: Should have raised FileNotFoundError") + failed += 1 + except FileNotFoundError: + print("āœ… PASSED") + passed += 1 + except Exception as e: + print(f"āŒ FAILED: Unexpected error: {e}") + failed += 1 + + # Test 8: Invalid JSON + print("Test 8: Invalid JSON raises JSONDecodeError... ", end="") + try: + trace_file = tmp_path / "invalid.json" + trace_file.write_text("{invalid json content") + import_traces(str(trace_file)) + print("āŒ FAILED: Should have raised JSONDecodeError") + failed += 1 + except json.JSONDecodeError: + print("āœ… PASSED") + passed += 1 + except Exception as e: + print(f"āŒ FAILED: Unexpected error: {e}") + failed += 1 + + # Test 9: Null field values + print("Test 9: Null field values raises ValueError... ", end="") + try: + trace_file = tmp_path / "null_values.json" + trace_data = [{ + "trace_id": "trace_001", + "state": None, + "input": {"query": "Hello"}, + "output": {"response": "Hi"} + }] + trace_file.write_text(json.dumps(trace_data)) + import_traces(str(trace_file)) + print("āŒ FAILED: Should have raised ValueError") + failed += 1 + except ValueError as e: + if "null values" in str(e) and "state" in str(e): + print("āœ… PASSED") + passed += 1 + else: + print(f"āŒ FAILED: Wrong error message: {e}") + failed += 1 + except Exception as e: + print(f"āŒ FAILED: Unexpected error: {e}") + failed += 1 + + # Test 10: Extra fields allowed + print("Test 10: Extra fields are allowed... ", end="") + try: + trace_file = tmp_path / "extra_fields.json" + trace_data = [{ + "trace_id": "trace_001", + "state": "completed", + "input": {"query": "Hello"}, + "output": {"response": "Hi"}, + "metadata": {"user": "test_user"} + }] + trace_file.write_text(json.dumps(trace_data)) + traces = import_traces(str(trace_file)) + assert len(traces) == 1 + assert traces[0]["metadata"] == {"user": "test_user"} + print("āœ… PASSED") + passed += 1 + except Exception as e: + print(f"āŒ FAILED: {e}") + failed += 1 + + print(f"\n{'='*60}") + print(f"Test Results: {passed} passed, {failed} failed out of {passed + failed} tests") + print('='*60) + + return failed == 0 + + +if __name__ == "__main__": + success = run_tests() + exit(0 if success else 1)