diff --git a/.github/workflows/python-ci.yml b/.github/workflows/python-ci.yml index 536d0c3..de2b228 100644 --- a/.github/workflows/python-ci.yml +++ b/.github/workflows/python-ci.yml @@ -32,7 +32,7 @@ jobs: uses: notdodo/github-actions/.github/workflows/docker-build-and-push.yml@docker-build-and-push-v0 with: image: digintlab/opencti-connector - platforms: linux/amd64, linux/arm64 + platforms: linux/amd64 egress-policy-allowlist: > deb.debian.org:443 deb.debian.org:80 diff --git a/.gitignore b/.gitignore index 9a043f6..7326edf 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ -config.yml +__pycache__ +.cache .env .mypy_cache .ruff_cache .venv -__pycache__ \ No newline at end of file +config.yml \ No newline at end of file diff --git a/README.md b/README.md index 2e24608..cca9346 100644 --- a/README.md +++ b/README.md @@ -59,6 +59,7 @@ All configuration values can be supplied via the `config.yml` file or through en | `dep.enable_site_indicator` | `DEP_ENABLE_SITE_INDICATOR` | `true` | Create a domain indicator per victim. | | `dep.enable_hash_indicator` | `DEP_ENABLE_HASH_INDICATOR` | `true` | Create a hash indicator when a hash is provided. | | `dep.skip_empty_victim` | `DEP_SKIP_EMPTY_VICTIM` | `true` | Skip items where victim is empty, `n/a`, or `none`. | +| `dep.create_sector_identities` | `DEP_CREATE_SECTOR_IDENTITIES` | `true` | Create sector identities and link victims with a `part-of` relationship. | ## Docker @@ -86,6 +87,7 @@ docker run --rm \ - The project uses [**uv**](https://docs.astral.sh/uv/) as the Python virtual environment and dependency management tool. - The connector stores `last_run` in OpenCTI worker state and fetches with an overlap (`DEP_OVERLAP_HOURS`) to catch delayed DEP changes. Delete the state in OpenCTI to force a full backfill window from `DEP_LOOKBACK_DAYS`. - Incidents are created with deterministic IDs derived from DEP `hashid`, and bundles are sent with `update=True`, so repeated records update existing incidents instead of creating duplicates. +- Sector names are normalized before sector-identity generation to reduce duplicates caused by inconsistent casing or whitespace in DEP data. - The API occasionally URL-encodes announcement descriptions. The connector automatically decodes the description before sending it to OpenCTI. - Intrusion set creation is disabled by default because not every dataset represents a threat actor. If needed, adapt the logic in `DepConnector._process_item`. - To reload the connector code in the platform, run: `docker compose build dep-connector; docker compose up -d dep-connector; docker compose logs -f dep-connector` diff --git a/config.yml.sample b/config.yml.sample index 03f37c2..7f83967 100644 --- a/config.yml.sample +++ b/config.yml.sample @@ -29,3 +29,4 @@ dep: enable_site_indicator: true enable_hash_indicator: true skip_empty_victim: true + create_sector_identities: true diff --git a/main.py b/main.py index 35f305d..34a23ec 100644 --- a/main.py +++ b/main.py @@ -86,6 +86,14 @@ def indicator_domain(self) -> str | None: self.site ) + @field_validator("sector") + @classmethod + def normalize_sector(cls, v: str | None) -> str | None: + if v is None: + return None + normalized = " ".join(v.split()).strip() + return normalized or None + class DepConnector: def __init__(self) -> None: @@ -185,6 +193,12 @@ def __init__(self) -> None: config, default=True, ) + self.create_sector_identities = pycti.get_config_variable( + "DEP_CREATE_SECTOR_IDENTITIES", + ["dep", "create_sector_identities"], + config, + default=True, + ) @staticmethod def _load_config() -> dict[str, Any]: @@ -300,7 +314,7 @@ def _create_victim_identity(self, item: LeakRecord) -> stix2.Identity | None: ) description_parts = [] - if item.sector: + if item.sector and not self.create_sector_identities: description_parts.append(f"Industry sector: {item.sector}") if item.revenue: description_parts.append(f"Reported revenue: {item.revenue}") @@ -317,6 +331,17 @@ def _create_victim_identity(self, item: LeakRecord) -> stix2.Identity | None: external_references=external_references or None, ) + def _create_sector_identity(self, sector: str) -> stix2.Identity: + sector_key = sector.lower() + return stix2.Identity( + id=pycti.Identity.generate_id(sector_key, identity_class="class"), + name=sector, + identity_class="class", + created_by_ref=self.author_identity, + confidence=self.confidence, + labels=[self.label_value], + ) + def _create_incident(self, item: LeakRecord) -> stix2.Incident: victim_name = item.victim or item.victim_domain if not victim_name: @@ -457,12 +482,22 @@ def _process_item(self, item: LeakRecord) -> None: if hash_indicator: indicators.append(hash_indicator) + sector_identity: stix2.Identity | None = None + sector = item.sector + if self.create_sector_identities and sector and victim: + sector_identity = self._create_sector_identity(sector) + objects: list[stix2._STIXBase21] = [self.author_identity] if victim: objects.append(victim) objects.append(incident) if victim: objects.append(self._build_relationship("targets", incident.id, victim.id)) + if sector_identity and victim: + objects.append(sector_identity) + objects.append( + self._build_relationship("part-of", victim.id, sector_identity.id) + ) for indicator in indicators: objects.append(indicator) objects.append(