Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@
[runtime]
http_show_error_body = true
log_cli_level = "WARNING"
log_format = "JSON"
log_rotate_when = "midnight"
# Default: logs are set to human readable text
# To switch to structured JSON instead, uncomment the line below (must be uppercase "JSON")
# log_format = "JSON"

[extract]
workers = 8
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@
[runtime]
http_show_error_body = true
log_cli_level = "WARNING"
log_format = "JSON"
log_rotate_when = "midnight"
# Default: logs are set to human readable text
# To switch to structured JSON instead, uncomment the line below (must be uppercase "JSON")
# log_format = "JSON"

[extract]
workers = 8
Expand Down
90 changes: 80 additions & 10 deletions src/openhound/core/logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@

VALID_LEVELS = ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]

VALID_FORMATS = ["json", "text"]

# This should be a complete list of default fields as part of a LogRecord
# this is used to prevent custom fields overwriting the default LogRecord entries
DEFAULT_LOG_FIELDS = [
Expand Down Expand Up @@ -155,6 +157,46 @@ def format(self, record: logging.LogRecord) -> str:
return json.dumps(log_data)


class OpenHoundTextFormatter(logging.Formatter):
"""Human-readable plain-text formatter for OpenHound file logs"""

def format(self, record: logging.LogRecord) -> str:
"""Format a log record as a single readable line with structured context.

Args:
record (logging.LogRecord): The original log record

Returns:
str: A human-readable plain-text representation of the log record
"""
timestamp = self.formatTime(record, "%Y-%m-%d %H:%M:%S")
location = f"{record.name}:{record.funcName}:{record.lineno}"
log_line = (
f"{timestamp} [{record.levelname}] {location} - {record.getMessage()}"
)

# Append any custom/extra fields (e.g. resource, phase, extension) so the
# structured context is preserved without the noise of full JSON
extras = {
key: value
for key, value in record.__dict__.items()
if (
not key.startswith("_")
and key not in DEFAULT_LOG_FIELDS
and key != "taskName"
and value is not None
)
}
if extras:
extra_str = " ".join(f"{key}={value}" for key, value in extras.items())
log_line = f"{log_line} | {extra_str}"

if record.exc_info:
log_line = f"{log_line}\n{self.formatException(record.exc_info)}"

return log_line


class OpenHoundRichFormatter(logging.Formatter):
"""Custom formatter for Rich when logging in CLI mode"""

Expand Down Expand Up @@ -182,6 +224,7 @@ def __init__(
interval: int = 1,
cli_level: str = "ERROR",
base_path: str | None = None,
log_format: str = "text",
):
self.level = level
self.name = name
Expand All @@ -192,6 +235,7 @@ def __init__(
self.backup_count = backup_count
self.interval = interval
self.cli_level = cli_level
self.log_format = log_format

self.base_path = Path(base_path) if base_path else self.default_platform_path()
self.log_file_path: Path | None = None
Expand All @@ -210,6 +254,14 @@ def _validate_level(level: str, default: str = "INFO") -> str:

return default

@staticmethod
def _validate_format(log_format: str, default: str = "text") -> str:
normalized_format = log_format.lower()
if normalized_format in VALID_FORMATS:
return normalized_format

return default

@staticmethod
def _is_valid_path(base_path: Path, spec_path: Path) -> Path:
"""Validate the user provided path to prevent directory traversal and ensure it's within the base path
Expand Down Expand Up @@ -275,12 +327,18 @@ def setup(self) -> None:
dlt_interval = dlt.config.get("runtime.log_interval", int)
dlt_cli_level = dlt.config.get("runtime.log_cli_level", str)
dlt_log_path = dlt.config.get("runtime.log_path", str)
# dlt only treats the exact value "JSON" specially. We read it to select json logs and
# fall back to text for anything else (including dlt's default format string).
dlt_log_format = dlt.config.get("runtime.log_format", str)

# Check if the DLT config values are valid and if so override the defaults
self.level = self._validate_level(dlt_level or self.level, default="INFO")
self.cli_level = self._validate_level(
dlt_cli_level or self.cli_level, default="ERROR"
)
self.log_format = self._validate_format(
dlt_log_format or self.log_format, default="text"
)

# Override the base path if log_path is set in DLT config, otherwise use the default platform path
self.base_path = Path(dlt_log_path) if dlt_log_path else self.base_path
Expand All @@ -303,25 +361,37 @@ def setup(self) -> None:
self.root_logger.handlers.clear()
self.handlers[self.runtime_mode](self.root_logger, self.log_file_path)

def _file_formatter(self) -> logging.Formatter:
"""Return the formatter for file/stdout handlers based on the configured log format.

Returns:
logging.Formatter: A JSON formatter when log_format is 'json', otherwise a
human-readable plain-text formatter.
"""
if self.log_format == "json":
return OpenHoundJSONFormatter()
return OpenHoundTextFormatter()

def container_handlers(self, logger: logging.Logger, file_path: Path) -> None:
"""Set the logging handler/format when running in a container"""

json_formatter = OpenHoundJSONFormatter()
formatter = self._file_formatter()

# Log to stdout in JSON for better compatibility with container-based logging systems
# Log to stdout for better compatibility with container-based logging systems.
# Output is human-readable text by default; set runtime.log_format = "JSON" for structured JSON.
stdout_handler = logging.StreamHandler(sys.stdout)
stdout_handler.setFormatter(json_formatter)
stdout_handler.setFormatter(formatter)
logger.addHandler(stdout_handler)

# But also log the same json format to a file for persistence and debugging when needed
# But also log the same format to a file for persistence and debugging when needed
rotating_file_handler = RotatingFileHandler(
file_path,
when=self.rotate_when,
interval=self.interval,
backupCount=self.backup_count,
max_bytes=self.max_bytes,
)
rotating_file_handler.setFormatter(json_formatter)
rotating_file_handler.setFormatter(formatter)
# This regular expression overrides the default extMatch to recognize both
# default time based rotation filenames and size based rotation filenames (which gets a seconds added as well)
rotating_file_handler.extMatch = re.compile(
Expand All @@ -347,16 +417,16 @@ def cli_handlers(self, logger: logging.Logger, file_path: Path) -> None:
console_handler.setFormatter(rich_formatter)
logger.addHandler(console_handler)

# But also save the logs to a file in JSON format :)
json_formatter = OpenHoundJSONFormatter()
# But also save the logs to a file using the configured format (text by default)
file_formatter = self._file_formatter()
rotating_file_handler = RotatingFileHandler(
file_path,
when=self.rotate_when,
interval=self.interval,
backupCount=self.backup_count,
max_bytes=self.max_bytes,
)
rotating_file_handler.setFormatter(json_formatter)
rotating_file_handler.setFormatter(file_formatter)
# This regular expression overrides the default extMatch to recognize both
# default time based rotation filenames and size based rotation filenames (which gets a seconds added as well)
rotating_file_handler.extMatch = re.compile(
Expand All @@ -367,15 +437,15 @@ def cli_handlers(self, logger: logging.Logger, file_path: Path) -> None:

def service_handlers(self, logger: logging.Logger, file_path: Path) -> None:
"""Set the logging handler/format when running the OpenHound service"""
json_formatter = OpenHoundJSONFormatter()
file_formatter = self._file_formatter()
rotating_file_handler = RotatingFileHandler(
file_path,
when=self.rotate_when,
interval=self.interval,
backupCount=self.backup_count,
max_bytes=self.max_bytes,
)
rotating_file_handler.setFormatter(json_formatter)
rotating_file_handler.setFormatter(file_formatter)
# This regular expression overrides the default extMatch to recognize both
# default time based rotation filenames and size based rotation filenames (which gets a seconds added as well)
rotating_file_handler.extMatch = re.compile(
Expand Down
66 changes: 64 additions & 2 deletions tests/test_log_handlers.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,15 @@
import json
import logging

from openhound.core.logging import RotatingFileHandler, logger_override
import pytest

from openhound.core.logging import (
CustomLogger,
OpenHoundJSONFormatter,
OpenHoundTextFormatter,
RotatingFileHandler,
logger_override,
)


def test_root_handler_setup():
Expand Down Expand Up @@ -59,8 +67,10 @@ def test_dlt_extension_handlers():
)


def test_log_routing_content(tmp_path, caplog):
def test_log_routing_content(tmp_path, caplog, monkeypatch):
"""Test that logs are correctly routed and that the files are created for the expected paths"""
# Pin JSON (dlt's only crash-safe override value) so the JSON-parsing assertions are self-contained.
monkeypatch.setenv("RUNTIME__LOG_FORMAT", "JSON")
logger_override.base_path = tmp_path
logger_override.setup()
logger_override.set_handler("test_extension")
Expand Down Expand Up @@ -94,3 +104,55 @@ def test_log_routing_content(tmp_path, caplog):
assert ext_logs_json[0]["message"] == "Extension DLT log", (
"The extension log message should be present in 'ext_test_extension.log'"
)


def test_validate_format_defaults_to_text():
"""Test that the log format validation accepts text/json and falls back to text"""
assert CustomLogger._validate_format("JSON") == "json"
assert CustomLogger._validate_format("text") == "text"
assert CustomLogger._validate_format("invalid") == "text"


def test_file_formatter_selection():
"""Test that the configured log_format selects the matching file formatter"""
json_logger = CustomLogger("openhound.log", log_format="json")
assert isinstance(json_logger._file_formatter(), OpenHoundJSONFormatter), (
"log_format 'json' should produce a JSON formatter"
)

text_logger = CustomLogger("openhound.log", log_format="text")
assert isinstance(text_logger._file_formatter(), OpenHoundTextFormatter), (
"log_format 'text' should produce a plain-text formatter"
)


def test_text_formatter_produces_plain_text():
"""Test that the text formatter produces a readable, non-JSON line with extras"""
formatter = OpenHoundTextFormatter()
record = logging.LogRecord(
name="openhound.core.collect",
level=logging.ERROR,
pathname="collect.py",
lineno=61,
msg="Starting collector %s",
args=("github",),
exc_info=None,
func="run",
)
record.resource = "scim_users"
record.taskName = None

output = formatter.format(record)

assert "Starting collector github" in output, (
"The formatted message should be rendered with its args"
)
assert "[ERROR" in output, "The log level should be present in the output"
assert "openhound.core.collect:run:61" in output, (
"The logger/function/line location should be present in the output"
)
assert "resource=scim_users" in output, "Extra fields should be preserved"
assert "taskName" not in output, "Null extras like taskName should be dropped"

with pytest.raises(json.JSONDecodeError):
json.loads(output)
Loading