Enhance Elasticsearch logging to include status information

Dylan Huang · Dylan Huang · commit 4c2773442fbe · 2025-10-01T14:52:13.000-07:00
- Added a method to extract status information from log records in ElasticSearchDirectHttpHandler.
- Updated the data structure sent to Elasticsearch to include status_code, status_message, and status_details if present.
- Modified ElasticsearchIndexManager to validate the mapping of new status fields.
- Implemented tests to verify logging of status information and searching by status code in Elasticsearch.
diff --git a/eval_protocol/logging/elasticsearch_direct_http_handler.py b/eval_protocol/logging/elasticsearch_direct_http_handler.py
@@ -33,6 +33,7 @@ def emit(self, record: logging.LogRecord) -> None:
             timestamp = datetime.fromtimestamp(record.created).strftime("%Y-%m-%dT%H:%M:%S.%fZ")
 
             rollout_id = self._get_rollout_id(record)
+            status_info = self._get_status_info(record)
 
             data: Dict[str, Any] = {
                 "@timestamp": timestamp,
@@ -42,6 +43,10 @@ def emit(self, record: logging.LogRecord) -> None:
                 "rollout_id": rollout_id,
             }
 
+            # Add status information if present
+            if status_info:
+                data.update(status_info)
+
             # Schedule the HTTP request to run asynchronously
             self._schedule_async_send(data, record)
         except Exception as e:
@@ -57,6 +62,34 @@ def _get_rollout_id(self, record: logging.LogRecord) -> str:
             )
         return rollout_id
 
+    def _get_status_info(self, record: logging.LogRecord) -> Optional[Dict[str, Any]]:
+        """Extract status information from the log record's extra data."""
+        # Check if 'status' is in the extra data (passed via extra parameter)
+        if hasattr(record, "status") and record.status is not None:  # type: ignore
+            status = record.status  # type: ignore
+
+            # Handle Status class instances (Pydantic BaseModel)
+            if hasattr(status, "code") and hasattr(status, "message"):
+                # Status object - extract code and message
+                status_code = status.code
+                # Handle both enum values and direct integer values
+                if hasattr(status_code, "value"):
+                    status_code = status_code.value
+
+                return {
+                    "status_code": status_code,
+                    "status_message": status.message,
+                    "status_details": getattr(status, "details", []),
+                }
+            elif isinstance(status, dict):
+                # Dictionary representation of status
+                return {
+                    "status_code": status.get("code"),
+                    "status_message": status.get("message"),
+                    "status_details": status.get("details", []),
+                }
+        return None
+
     def _schedule_async_send(self, data: Dict[str, Any], record: logging.LogRecord) -> None:
         """Schedule an async task to send the log data to Elasticsearch."""
         if self._executor is None:
diff --git a/eval_protocol/logging/elasticsearch_index_manager.py b/eval_protocol/logging/elasticsearch_index_manager.py
@@ -98,13 +98,13 @@ def _index_exists_with_correct_mapping(self) -> bool:
             return False
 
     def _has_correct_timestamp_mapping(self, mapping_data: Dict[str, Any]) -> bool:
-        """Check if the mapping has @timestamp as a date field and rollout_id as a keyword field.
+        """Check if the mapping has @timestamp as a date field, rollout_id as a keyword field, and status fields.
 
         Args:
             mapping_data: Elasticsearch mapping response data
 
         Returns:
-            bool: True if @timestamp is correctly mapped as date field and rollout_id as keyword field
+            bool: True if all required fields are correctly mapped
         """
         try:
             if not (
@@ -122,7 +122,12 @@ def _has_correct_timestamp_mapping(self, mapping_data: Dict[str, Any]) -> bool:
             # Check rollout_id is mapped as keyword
             rollout_id_ok = "rollout_id" in properties and properties["rollout_id"].get("type") == "keyword"
 
-            return timestamp_ok and rollout_id_ok
+            # Check status fields are mapped correctly
+            status_code_ok = "status_code" in properties and properties["status_code"].get("type") == "integer"
+            status_message_ok = "status_message" in properties and properties["status_message"].get("type") == "text"
+            status_details_ok = "status_details" in properties and properties["status_details"].get("type") == "object"
+
+            return timestamp_ok and rollout_id_ok and status_code_ok and status_message_ok and status_details_ok
         except (KeyError, TypeError):
             return False
 
@@ -140,6 +145,9 @@ def _get_logging_mapping(self) -> Dict[str, Any]:
                     "message": {"type": "text"},
                     "logger_name": {"type": "keyword"},
                     "rollout_id": {"type": "keyword"},
+                    "status_code": {"type": "integer"},
+                    "status_message": {"type": "text"},
+                    "status_details": {"type": "object"},
                 }
             }
         }
diff --git a/tests/logging/test_elasticsearch_direct_http_handler.py b/tests/logging/test_elasticsearch_direct_http_handler.py
@@ -402,3 +402,176 @@ def test_elasticsearch_direct_http_handler_search_by_rollout_id(
 
     print(f"Successfully verified search by rollout_id '{rollout_id}' found {len(hits)} log messages")
     print("Verified that search for different rollout_id returns 0 results")
+
+
+@pytest.mark.skipif(os.environ.get("CI") == "true", reason="Only run this test locally (skipped in CI)")
+def test_elasticsearch_direct_http_handler_logs_status_info(
+    elasticsearch_config: ElasticSearchConfig, test_logger: logging.Logger, rollout_id: str
+):
+    """Test that ElasticsearchDirectHttpHandler logs Status class instances and can search by status code."""
+    from eval_protocol import Status
+
+    # Create a Status instance
+    test_status = Status.rollout_running()
+
+    # Generate a unique test message
+    test_message = f"Status logging test message at {time.time()}"
+
+    # Log with Status instance in extra data
+    test_logger.info(test_message, extra={"status": test_status})
+
+    # Give Elasticsearch time to process the document
+    time.sleep(3)
+
+    # Query Elasticsearch to verify the document was received with status info
+    parsed_url = urlparse(elasticsearch_config.url)
+    base_url = f"{parsed_url.scheme}://{parsed_url.netloc}"
+    search_url = f"{base_url}/{elasticsearch_config.index_name}/_search"
+
+    # Search for logs with our specific status code
+    search_query = {
+        "query": {"term": {"status_code": test_status.code.value}},
+        "sort": [{"@timestamp": {"order": "desc"}}],
+        "size": 1,
+    }
+
+    # Execute the search
+    response = requests.post(
+        search_url,
+        headers={"Content-Type": "application/json", "Authorization": f"ApiKey {elasticsearch_config.api_key}"},
+        json=search_query,
+        verify=parsed_url.scheme == "https",
+    )
+
+    # Check for errors
+    if response.status_code != 200:
+        print(f"Elasticsearch search failed with status {response.status_code}")
+        print(f"Response: {response.text}")
+        response.raise_for_status()
+
+    search_results = response.json()
+
+    # Assert that we found our log message
+    assert "hits" in search_results, "Search response should contain 'hits'"
+    assert "total" in search_results["hits"], "Search hits should contain 'total'"
+
+    total_hits = search_results["hits"]["total"]
+    if isinstance(total_hits, dict):
+        total_count = total_hits["value"]
+    else:
+        total_count = total_hits
+
+    assert total_count > 0, f"Expected to find at least 1 log message, but found {total_count}"
+
+    # Verify the content of the found document
+    hits = search_results["hits"]["hits"]
+    assert len(hits) > 0, "Expected at least one hit"
+
+    found_document = hits[0]["_source"]
+
+    # Verify the status fields are present and correct
+    assert "status_code" in found_document, "Expected document to contain 'status_code' field"
+    assert found_document["status_code"] == test_status.code.value, (
+        f"Expected status_code {test_status.code.value}, got {found_document['status_code']}"
+    )
+    assert "status_message" in found_document, "Expected document to contain 'status_message' field"
+    assert found_document["status_message"] == test_status.message, (
+        f"Expected status_message '{test_status.message}', got '{found_document['status_message']}'"
+    )
+    assert "status_details" in found_document, "Expected document to contain 'status_details' field"
+    assert found_document["status_details"] == test_status.details, (
+        f"Expected status_details {test_status.details}, got {found_document['status_details']}"
+    )
+
+    # Verify other expected fields are still present
+    assert found_document["message"] == test_message, (
+        f"Expected message '{test_message}', got '{found_document['message']}'"
+    )
+    assert found_document["rollout_id"] == rollout_id, (
+        f"Expected rollout_id '{rollout_id}', got '{found_document['rollout_id']}'"
+    )
+
+    print(f"Successfully verified Status logging with code {test_status.code.value} in Elasticsearch: {test_message}")
+
+
+@pytest.mark.skipif(os.environ.get("CI") == "true", reason="Only run this test locally (skipped in CI)")
+def test_elasticsearch_direct_http_handler_search_by_status_code(
+    elasticsearch_config: ElasticSearchConfig, test_logger: logging.Logger, rollout_id: str
+):
+    """Test that logs can be searched by status code in Elasticsearch."""
+    from eval_protocol.models import Status
+
+    # Create different Status instances for testing
+    statuses = [
+        Status.rollout_running(),
+        Status.eval_finished(),
+        Status.error("Test error message"),
+    ]
+
+    # Generate unique test messages
+    test_messages = []
+    for i, status in enumerate(statuses):
+        message = f"Status search test message {i} at {time.time()}"
+        test_messages.append((message, status))
+        test_logger.info(message, extra={"status": status})
+        time.sleep(0.1)  # Small delay to ensure different timestamps
+
+    # Give Elasticsearch time to process all documents
+    time.sleep(3)
+
+    # Query Elasticsearch to search by specific status code
+    parsed_url = urlparse(elasticsearch_config.url)
+    base_url = f"{parsed_url.scheme}://{parsed_url.netloc}"
+    search_url = f"{base_url}/{elasticsearch_config.index_name}/_search"
+
+    # Search for logs with RUNNING status code
+    running_status = Status.Code.RUNNING
+    search_query = {
+        "query": {"term": {"status_code": running_status.value}},
+        "sort": [{"@timestamp": {"order": "desc"}}],
+        "size": 10,
+    }
+
+    # Execute the search
+    response = requests.post(
+        search_url,
+        headers={"Content-Type": "application/json", "Authorization": f"ApiKey {elasticsearch_config.api_key}"},
+        json=search_query,
+        verify=parsed_url.scheme == "https",
+    )
+
+    # Check for errors
+    if response.status_code != 200:
+        print(f"Elasticsearch search failed with status {response.status_code}")
+        print(f"Response: {response.text}")
+        response.raise_for_status()
+
+    search_results = response.json()
+
+    # Assert that we found our log messages
+    assert "hits" in search_results, "Search response should contain 'hits'"
+    assert "total" in search_results["hits"], "Search hits should contain 'total'"
+
+    total_hits = search_results["hits"]["total"]
+    if isinstance(total_hits, dict):
+        total_count = total_hits["value"]
+    else:
+        total_count = total_hits
+
+    assert total_count >= 1, f"Expected to find at least 1 log message with RUNNING status, but found {total_count}"
+
+    # Verify the content of the found documents
+    hits = search_results["hits"]["hits"]
+    assert len(hits) >= 1, f"Expected at least 1 hit, found {len(hits)}"
+
+    # Verify all found documents have the correct status code
+    for hit in hits:
+        document = hit["_source"]
+        assert document["status_code"] == running_status.value, (
+            f"Expected status_code {running_status.value}, got {document['status_code']}"
+        )
+        assert document["rollout_id"] == rollout_id, (
+            f"Expected rollout_id '{rollout_id}', got '{document['rollout_id']}'"
+        )
+
+    print(f"Successfully verified search by status code {running_status.value} found {len(hits)} log messages")