From 5c9304e40b5eb7337bb69e9002d1c12209f47032 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 7 Mar 2026 15:44:54 +0000 Subject: [PATCH 1/3] feat: materialize industry sectors as OpenCTI Sector entities When DEP_MATERIALIZE_SECTORS is enabled (default: true), the connector now creates a STIX Identity with identity_class="class" for each sector reported by the DEP API, and links the victim organization to it via a "part-of" relationship. This allows sectors to appear as first-class Sector entities in OpenCTI, enabling aggregation, filtering, and exploration by industry. The sector string is no longer embedded in the victim description when materialization is active. https://claude.ai/code/session_01CekopARuksxCJje93W8stx --- config.yml.sample | 1 + main.py | 27 ++++++++++++++++++++++++++- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/config.yml.sample b/config.yml.sample index 03f37c2..824e7b1 100644 --- a/config.yml.sample +++ b/config.yml.sample @@ -29,3 +29,4 @@ dep: enable_site_indicator: true enable_hash_indicator: true skip_empty_victim: true + materialize_sectors: true diff --git a/main.py b/main.py index 35f305d..b5ab7da 100644 --- a/main.py +++ b/main.py @@ -185,6 +185,12 @@ def __init__(self) -> None: config, default=True, ) + self.materialize_sectors = pycti.get_config_variable( + "DEP_MATERIALIZE_SECTORS", + ["dep", "materialize_sectors"], + config, + default=True, + ) @staticmethod def _load_config() -> dict[str, Any]: @@ -300,7 +306,7 @@ def _create_victim_identity(self, item: LeakRecord) -> stix2.Identity | None: ) description_parts = [] - if item.sector: + if item.sector and not self.materialize_sectors: description_parts.append(f"Industry sector: {item.sector}") if item.revenue: description_parts.append(f"Reported revenue: {item.revenue}") @@ -317,6 +323,16 @@ def _create_victim_identity(self, item: LeakRecord) -> stix2.Identity | None: external_references=external_references or None, ) + def _create_sector_identity(self, sector: str) -> stix2.Identity: + return stix2.Identity( + id=pycti.Identity.generate_id(sector, identity_class="class"), + name=sector, + identity_class="class", + created_by_ref=self.author_identity, + confidence=self.confidence, + labels=[self.label_value], + ) + def _create_incident(self, item: LeakRecord) -> stix2.Incident: victim_name = item.victim or item.victim_domain if not victim_name: @@ -457,12 +473,21 @@ def _process_item(self, item: LeakRecord) -> None: if hash_indicator: indicators.append(hash_indicator) + sector_identity: stix2.Identity | None = None + if self.materialize_sectors and item.sector and victim: + sector_identity = self._create_sector_identity(item.sector) + objects: list[stix2._STIXBase21] = [self.author_identity] if victim: objects.append(victim) objects.append(incident) if victim: objects.append(self._build_relationship("targets", incident.id, victim.id)) + if sector_identity and victim: + objects.append(sector_identity) + objects.append( + self._build_relationship("part-of", victim.id, sector_identity.id) + ) for indicator in indicators: objects.append(indicator) objects.append( From b8c4e78172a52be8d7cc9fc30e97d782388dc15b Mon Sep 17 00:00:00 2001 From: Edoardo Rosa <6991986+notdodo@users.noreply.github.com> Date: Sun, 8 Mar 2026 10:33:37 +0100 Subject: [PATCH 2/3] enh: better naming, add documentation, normalize names --- .gitignore | 5 +++-- README.md | 2 ++ config.yml.sample | 2 +- main.py | 24 +++++++++++++++++------- 4 files changed, 23 insertions(+), 10 deletions(-) diff --git a/.gitignore b/.gitignore index 9a043f6..7326edf 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ -config.yml +__pycache__ +.cache .env .mypy_cache .ruff_cache .venv -__pycache__ \ No newline at end of file +config.yml \ No newline at end of file diff --git a/README.md b/README.md index 2e24608..cca9346 100644 --- a/README.md +++ b/README.md @@ -59,6 +59,7 @@ All configuration values can be supplied via the `config.yml` file or through en | `dep.enable_site_indicator` | `DEP_ENABLE_SITE_INDICATOR` | `true` | Create a domain indicator per victim. | | `dep.enable_hash_indicator` | `DEP_ENABLE_HASH_INDICATOR` | `true` | Create a hash indicator when a hash is provided. | | `dep.skip_empty_victim` | `DEP_SKIP_EMPTY_VICTIM` | `true` | Skip items where victim is empty, `n/a`, or `none`. | +| `dep.create_sector_identities` | `DEP_CREATE_SECTOR_IDENTITIES` | `true` | Create sector identities and link victims with a `part-of` relationship. | ## Docker @@ -86,6 +87,7 @@ docker run --rm \ - The project uses [**uv**](https://docs.astral.sh/uv/) as the Python virtual environment and dependency management tool. - The connector stores `last_run` in OpenCTI worker state and fetches with an overlap (`DEP_OVERLAP_HOURS`) to catch delayed DEP changes. Delete the state in OpenCTI to force a full backfill window from `DEP_LOOKBACK_DAYS`. - Incidents are created with deterministic IDs derived from DEP `hashid`, and bundles are sent with `update=True`, so repeated records update existing incidents instead of creating duplicates. +- Sector names are normalized before sector-identity generation to reduce duplicates caused by inconsistent casing or whitespace in DEP data. - The API occasionally URL-encodes announcement descriptions. The connector automatically decodes the description before sending it to OpenCTI. - Intrusion set creation is disabled by default because not every dataset represents a threat actor. If needed, adapt the logic in `DepConnector._process_item`. - To reload the connector code in the platform, run: `docker compose build dep-connector; docker compose up -d dep-connector; docker compose logs -f dep-connector` diff --git a/config.yml.sample b/config.yml.sample index 824e7b1..7f83967 100644 --- a/config.yml.sample +++ b/config.yml.sample @@ -29,4 +29,4 @@ dep: enable_site_indicator: true enable_hash_indicator: true skip_empty_victim: true - materialize_sectors: true + create_sector_identities: true diff --git a/main.py b/main.py index b5ab7da..34a23ec 100644 --- a/main.py +++ b/main.py @@ -86,6 +86,14 @@ def indicator_domain(self) -> str | None: self.site ) + @field_validator("sector") + @classmethod + def normalize_sector(cls, v: str | None) -> str | None: + if v is None: + return None + normalized = " ".join(v.split()).strip() + return normalized or None + class DepConnector: def __init__(self) -> None: @@ -185,9 +193,9 @@ def __init__(self) -> None: config, default=True, ) - self.materialize_sectors = pycti.get_config_variable( - "DEP_MATERIALIZE_SECTORS", - ["dep", "materialize_sectors"], + self.create_sector_identities = pycti.get_config_variable( + "DEP_CREATE_SECTOR_IDENTITIES", + ["dep", "create_sector_identities"], config, default=True, ) @@ -306,7 +314,7 @@ def _create_victim_identity(self, item: LeakRecord) -> stix2.Identity | None: ) description_parts = [] - if item.sector and not self.materialize_sectors: + if item.sector and not self.create_sector_identities: description_parts.append(f"Industry sector: {item.sector}") if item.revenue: description_parts.append(f"Reported revenue: {item.revenue}") @@ -324,8 +332,9 @@ def _create_victim_identity(self, item: LeakRecord) -> stix2.Identity | None: ) def _create_sector_identity(self, sector: str) -> stix2.Identity: + sector_key = sector.lower() return stix2.Identity( - id=pycti.Identity.generate_id(sector, identity_class="class"), + id=pycti.Identity.generate_id(sector_key, identity_class="class"), name=sector, identity_class="class", created_by_ref=self.author_identity, @@ -474,8 +483,9 @@ def _process_item(self, item: LeakRecord) -> None: indicators.append(hash_indicator) sector_identity: stix2.Identity | None = None - if self.materialize_sectors and item.sector and victim: - sector_identity = self._create_sector_identity(item.sector) + sector = item.sector + if self.create_sector_identities and sector and victim: + sector_identity = self._create_sector_identity(sector) objects: list[stix2._STIXBase21] = [self.author_identity] if victim: From 7dc0fbac0e12f8731f1bcfa8ba8d245678994a5b Mon Sep 17 00:00:00 2001 From: Edoardo Rosa <6991986+notdodo@users.noreply.github.com> Date: Sun, 8 Mar 2026 10:36:31 +0100 Subject: [PATCH 3/3] fix: ci --- .github/workflows/python-ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python-ci.yml b/.github/workflows/python-ci.yml index 536d0c3..de2b228 100644 --- a/.github/workflows/python-ci.yml +++ b/.github/workflows/python-ci.yml @@ -32,7 +32,7 @@ jobs: uses: notdodo/github-actions/.github/workflows/docker-build-and-push.yml@docker-build-and-push-v0 with: image: digintlab/opencti-connector - platforms: linux/amd64, linux/arm64 + platforms: linux/amd64 egress-policy-allowlist: > deb.debian.org:443 deb.debian.org:80