Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 0 additions & 4 deletions deployments/helm/openhound/templates/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -59,10 +59,6 @@ spec:
- name: DESTINATION__BLOODHOUNDENTERPRISE__URL
value: {{ .Values.destination.bloodhoundEnterprise.url | quote }}
{{- end }}
{{- if .Values.destination.bloodhoundEnterprise.interval }}
- name: DESTINATION__BLOODHOUNDENTERPRISE__INTERVAL
value: {{ .Values.destination.bloodhoundEnterprise.interval | quote }}
{{- end }}
{{- range $name, $value := .Values.env }}
- name: {{ $name }}
value: {{ $value | quote }}
Expand Down
1 change: 0 additions & 1 deletion deployments/helm/openhound/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ collector:
destination:
bloodhoundEnterprise:
url: ""
interval: ""

resources: { }

Expand Down
1 change: 0 additions & 1 deletion deployments/helm/values.example.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,3 @@ collector:
destination:
bloodhoundEnterprise:
url: https://test.bloodhoundenterprise.io
interval: "300"
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@
[runtime]
http_show_error_body = true
log_cli_level = "WARNING"
log_format = "JSON"
log_rotate_when = "midnight"
# Default: logs are set to human readable text
# To switch to structured JSON instead, uncomment the line below (must be uppercase "JSON")
# log_format = "JSON"

[extract]
workers = 8
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@
[runtime]
http_show_error_body = true
log_cli_level = "WARNING"
log_format = "JSON"
log_rotate_when = "midnight"
# Default: logs are set to human readable text
# To switch to structured JSON instead, uncomment the line below (must be uppercase "JSON")
# log_format = "JSON"

[extract]
workers = 8
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
[destination.bloodhoundenterprise]
interval = "300"
token_id = "client_token_id"
token_key = "client_token_key"
url = "bhe_url"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
[destination.bloodhoundenterprise]
interval = "300"
token_id = "client_token_id"
token_key = "client_token_key"
url = "bhe_url"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
[destination.bloodhoundenterprise]
interval = "300"
token_id = "client_token_id"
token_key = "client_token_key"
url = "bhe_url"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
x-scheduler: &scheduler
image: specterops/openhound:${IMAGE_VERSION:latest-enterprise}
image: specterops/openhound:${IMAGE_VERSION:-latest-enterprise}
restart: unless-stopped
init: true
volumes:
Expand Down
8 changes: 6 additions & 2 deletions src/openhound/core/clients/bloodhound.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import logging
from abc import ABC, abstractmethod
from datetime import timedelta
from importlib.metadata import version

import requests
from dlt.common import json
Expand All @@ -22,6 +23,9 @@
logger = logging.getLogger(__name__)


__version__ = version("openhound")


class BloodHoundHTTPError(DltException):
def __init__(self, reason: str, code: int):
self.reason = reason
Expand Down Expand Up @@ -175,7 +179,7 @@ def request(

sig = base64.b64encode(digester.digest()).decode()
headers = {
"User-Agent": "openhound/0.1.0",
"User-Agent": f"openhound/{__version__}",
"Authorization": f"bhesignature {self.token_id}",
"RequestDate": datetime_formatted,
"Signature": sig,
Expand Down Expand Up @@ -203,7 +207,7 @@ def request(
extra_headers: dict[str, str] | None = None,
):
headers = {
"User-Agent": "openhound/0.1.0",
"User-Agent": f"openhound/{__version__}",
"Content-Type": "application/json",
"Authorization": f"Bearer {self.token}",
}
Expand Down
90 changes: 80 additions & 10 deletions src/openhound/core/logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@

VALID_LEVELS = ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]

VALID_FORMATS = ["json", "text"]

# This should be a complete list of default fields as part of a LogRecord
# this is used to prevent custom fields overwriting the default LogRecord entries
DEFAULT_LOG_FIELDS = [
Expand Down Expand Up @@ -155,6 +157,46 @@ def format(self, record: logging.LogRecord) -> str:
return json.dumps(log_data)


class OpenHoundTextFormatter(logging.Formatter):
"""Human-readable plain-text formatter for OpenHound file logs"""

def format(self, record: logging.LogRecord) -> str:
"""Format a log record as a single readable line with structured context.

Args:
record (logging.LogRecord): The original log record

Returns:
str: A human-readable plain-text representation of the log record
"""
timestamp = self.formatTime(record, "%Y-%m-%d %H:%M:%S")
location = f"{record.name}:{record.funcName}:{record.lineno}"
log_line = (
f"{timestamp} [{record.levelname}] {location} - {record.getMessage()}"
)

# Append any custom/extra fields (e.g. resource, phase, extension) so the
# structured context is preserved without the noise of full JSON
extras = {
key: value
for key, value in record.__dict__.items()
if (
not key.startswith("_")
and key not in DEFAULT_LOG_FIELDS
and key != "taskName"
and value is not None
)
}
if extras:
extra_str = " ".join(f"{key}={value}" for key, value in extras.items())
log_line = f"{log_line} | {extra_str}"

if record.exc_info:
log_line = f"{log_line}\n{self.formatException(record.exc_info)}"

return log_line


class OpenHoundRichFormatter(logging.Formatter):
"""Custom formatter for Rich when logging in CLI mode"""

Expand Down Expand Up @@ -182,6 +224,7 @@ def __init__(
interval: int = 1,
cli_level: str = "ERROR",
base_path: str | None = None,
log_format: str = "text",
):
self.level = level
self.name = name
Expand All @@ -192,6 +235,7 @@ def __init__(
self.backup_count = backup_count
self.interval = interval
self.cli_level = cli_level
self.log_format = log_format

self.base_path = Path(base_path) if base_path else self.default_platform_path()
self.log_file_path: Path | None = None
Expand All @@ -210,6 +254,14 @@ def _validate_level(level: str, default: str = "INFO") -> str:

return default

@staticmethod
def _validate_format(log_format: str, default: str = "text") -> str:
normalized_format = log_format.lower()
if normalized_format in VALID_FORMATS:
return normalized_format

return default

@staticmethod
def _is_valid_path(base_path: Path, spec_path: Path) -> Path:
"""Validate the user provided path to prevent directory traversal and ensure it's within the base path
Expand Down Expand Up @@ -275,12 +327,18 @@ def setup(self) -> None:
dlt_interval = dlt.config.get("runtime.log_interval", int)
dlt_cli_level = dlt.config.get("runtime.log_cli_level", str)
dlt_log_path = dlt.config.get("runtime.log_path", str)
# dlt only treats the exact value "JSON" specially. We read it to select json logs and
# fall back to text for anything else (including dlt's default format string).
dlt_log_format = dlt.config.get("runtime.log_format", str)

# Check if the DLT config values are valid and if so override the defaults
self.level = self._validate_level(dlt_level or self.level, default="INFO")
self.cli_level = self._validate_level(
dlt_cli_level or self.cli_level, default="ERROR"
)
self.log_format = self._validate_format(
dlt_log_format or self.log_format, default="text"
)

# Override the base path if log_path is set in DLT config, otherwise use the default platform path
self.base_path = Path(dlt_log_path) if dlt_log_path else self.base_path
Expand All @@ -303,25 +361,37 @@ def setup(self) -> None:
self.root_logger.handlers.clear()
self.handlers[self.runtime_mode](self.root_logger, self.log_file_path)

def _file_formatter(self) -> logging.Formatter:
"""Return the formatter for file/stdout handlers based on the configured log format.

Returns:
logging.Formatter: A JSON formatter when log_format is 'json', otherwise a
human-readable plain-text formatter.
"""
if self.log_format == "json":
return OpenHoundJSONFormatter()
return OpenHoundTextFormatter()

def container_handlers(self, logger: logging.Logger, file_path: Path) -> None:
"""Set the logging handler/format when running in a container"""

json_formatter = OpenHoundJSONFormatter()
formatter = self._file_formatter()

# Log to stdout in JSON for better compatibility with container-based logging systems
# Log to stdout for better compatibility with container-based logging systems.
# Output is human-readable text by default; set runtime.log_format = "JSON" for structured JSON.
stdout_handler = logging.StreamHandler(sys.stdout)
stdout_handler.setFormatter(json_formatter)
stdout_handler.setFormatter(formatter)
logger.addHandler(stdout_handler)

# But also log the same json format to a file for persistence and debugging when needed
# But also log the same format to a file for persistence and debugging when needed
rotating_file_handler = RotatingFileHandler(
file_path,
when=self.rotate_when,
interval=self.interval,
backupCount=self.backup_count,
max_bytes=self.max_bytes,
)
rotating_file_handler.setFormatter(json_formatter)
rotating_file_handler.setFormatter(formatter)
# This regular expression overrides the default extMatch to recognize both
# default time based rotation filenames and size based rotation filenames (which gets a seconds added as well)
rotating_file_handler.extMatch = re.compile(
Expand All @@ -347,16 +417,16 @@ def cli_handlers(self, logger: logging.Logger, file_path: Path) -> None:
console_handler.setFormatter(rich_formatter)
logger.addHandler(console_handler)

# But also save the logs to a file in JSON format :)
json_formatter = OpenHoundJSONFormatter()
# But also save the logs to a file using the configured format (text by default)
file_formatter = self._file_formatter()
rotating_file_handler = RotatingFileHandler(
file_path,
when=self.rotate_when,
interval=self.interval,
backupCount=self.backup_count,
max_bytes=self.max_bytes,
)
rotating_file_handler.setFormatter(json_formatter)
rotating_file_handler.setFormatter(file_formatter)
# This regular expression overrides the default extMatch to recognize both
# default time based rotation filenames and size based rotation filenames (which gets a seconds added as well)
rotating_file_handler.extMatch = re.compile(
Expand All @@ -367,15 +437,15 @@ def cli_handlers(self, logger: logging.Logger, file_path: Path) -> None:

def service_handlers(self, logger: logging.Logger, file_path: Path) -> None:
"""Set the logging handler/format when running the OpenHound service"""
json_formatter = OpenHoundJSONFormatter()
file_formatter = self._file_formatter()
rotating_file_handler = RotatingFileHandler(
file_path,
when=self.rotate_when,
interval=self.interval,
backupCount=self.backup_count,
max_bytes=self.max_bytes,
)
rotating_file_handler.setFormatter(json_formatter)
rotating_file_handler.setFormatter(file_formatter)
# This regular expression overrides the default extMatch to recognize both
# default time based rotation filenames and size based rotation filenames (which gets a seconds added as well)
rotating_file_handler.extMatch = re.compile(
Expand Down
7 changes: 5 additions & 2 deletions src/openhound/scheduler/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@

logger = logging.getLogger(__name__)

POLL_INTERVAL = 30 # seconds; fixed poll/check-in cadence, must remain below BHE's 600s client-checkin timeout


class ExtensionNotFoundError(Exception):
"""Raised when the configured collector extension cannot be found."""
Expand Down Expand Up @@ -68,7 +70,6 @@ def __init__(
token_key: str,
token_id: str,
collector_name: str,
interval: int = 5,
):
# BHE client settings
self.bhe_uri = bhe_uri
Expand All @@ -77,7 +78,7 @@ def __init__(
bhe_uri=bhe_uri, token_key=token_key, token_id=token_id
)
# Interval how often to check for a job
self.interval = interval
self.interval = POLL_INTERVAL

# Stores the ID of currently running BHE job
self.job_running: int | None = None
Expand Down Expand Up @@ -223,6 +224,8 @@ def _poll(self) -> None:
available_job = self.check_jobs()
if available_job:
self._start_job(available_job)
else:
self.client.jobs_current
except Exception:
logger.exception("Error checking for or starting jobs.")

Expand Down
2 changes: 0 additions & 2 deletions src/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ def start():
# Load BHE config and secrets
bhe_uri = dlt.config["destination.bloodhoundenterprise.url"]
collector_name = dlt.config["destination.bloodhoundenterprise.collector_name"]
interval = dlt.config.get("destination.bloodhoundenterprise.interval", int)

# Load BHE secrets
token_key = dlt.secrets["destination.bloodhoundenterprise.token_key"]
Expand All @@ -28,7 +27,6 @@ def start():
token_key=token_key,
token_id=token_id,
collector_name=collector_name,
interval=interval,
)
svc.start()

Expand Down
Loading
Loading