From 5def6413d0af0e21e92f6d24a081483ef601e190 Mon Sep 17 00:00:00 2001
From: Edoardo Rosa <6991986+notdodo@users.noreply.github.com>
Date: Sat, 28 Mar 2026 16:58:16 +0100
Subject: [PATCH] enh: add multiple datasets, add tests, labels in all entities
 and relationships

---
 .gitignore                         |   1 +
 AGENTS.md                          |  46 +++-
 DOCKERHUB.md                       |  66 ++---
 README.md                          |  45 ++--
 config.yml.sample                  |   3 +-
 dep_connector/__init__.py          |  10 +-
 dep_connector/client_api.py        |  44 ++-
 dep_connector/connector.py         | 414 ++++++++++++++++++-----------
 dep_connector/converter_to_stix.py | 217 +++++++--------
 pyproject.toml                     |   4 +
 tests/__init__.py                  |   1 +
 tests/test_api_spec_datasets.py    |  19 ++
 tests/test_connector_datasets.py   |  43 +++
 tests/test_connector_runtime.py    | 224 ++++++++++++++++
 tests/test_converter_core.py       | 135 ++++++++++
 tests/test_converter_labels.py     |  75 ++++++
 uv.lock                            |  45 ++++
 17 files changed, 1053 insertions(+), 339 deletions(-)
 create mode 100644 tests/__init__.py
 create mode 100644 tests/test_api_spec_datasets.py
 create mode 100644 tests/test_connector_datasets.py
 create mode 100644 tests/test_connector_runtime.py
 create mode 100644 tests/test_converter_core.py
 create mode 100644 tests/test_converter_labels.py
diff --git a/.gitignore b/.gitignore
index 8252d16..944c410 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,4 +6,5 @@ __pycache__
 .uv_cache
 .venv
 config.yml
+dep-api-spec.json
 scripts/
\ No newline at end of file
diff --git a/AGENTS.md b/AGENTS.md
index 887287b..288c8de 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -32,15 +32,31 @@ It should track the code in `main.py`, not stale assumptions from earlier iterat
   - `dset`
   - `full=true`
 - `extended=true` is sent only when `DEP_EXTENDED_RESULTS=true`.
-- `DEP_DSET` defaults to `ext`, so the connector can query alternate DEP datasets when required.
+- `DEP_DATASETS` defaults to `["ext"]`.
+- Supported official dataset values are:
+  - `ext`
+  - `prv`
+  - `nws`
+  - `vnd`
+  - `dds`
+  - `frm`
+- The connector also accepts long dataset aliases and normalizes them to the API codes:
+  - `extortion` -> `ext`
+  - `privacy` -> `prv`
+  - `opennews` or `news` -> `nws`
+  - `vandalism` -> `vnd`
+  - `ddos` -> `dds`
+  - `forum` -> `frm`
+- The DEP API accepts one `dset` value per request, so the connector loops over configured datasets and issues one request per dataset.
 
 ## State management
 
-- The connector stores only one state key in OpenCTI worker state: `last_run`.
+- The connector stores one per-dataset state map in OpenCTI worker state: `last_run_by_dataset`.
 - First run window: `now - DEP_LOOKBACK_DAYS`.
-- Subsequent run window: `last_run - DEP_OVERLAP_HOURS`.
-- Invalid or non-string `last_run` values are ignored with a warning.
-- State is persisted only after the processing loop finishes: `{"last_run": end.isoformat()}`.
+- Subsequent run window per dataset: `last_run_by_dataset[dataset] - DEP_OVERLAP_HOURS`.
+- Invalid per-dataset `last_run` values are ignored with a warning.
+- State is persisted independently per dataset after that dataset finishes processing: `{"last_run_by_dataset": {"ext": "...", "dds": "..."}}`.
+- Adding a new dataset later starts that dataset from the full lookback window because it has no existing entry in `last_run_by_dataset`.
 - The overlap window is intentional and should be preserved to catch late DEP updates.
 
 ## Input parsing and normalization
@@ -81,6 +97,9 @@ It should track the code in `main.py`, not stale assumptions from earlier iterat
   - identity_class: `organization`
   - contact: `https://doubleextortion.com/`
 - Every emitted object and relationship created from DEP content carries the label `DigIntLab`.
+- DEP-derived objects and relationships also carry:
+  - `dep:dataset:<dataset code>` when the source dataset is known
+  - `dep:announcement-type:<lowercased enum value>` when announcement types are present
 - Confidence is consistently taken from `DEP_CONFIDENCE`.
 - Bundles are deduplicated by STIX ID before sending to OpenCTI.
 - Prefer deterministic IDs for DEP-derived entities and relationships to keep re-imports idempotent.
@@ -108,8 +127,9 @@ It should track the code in `main.py`, not stale assumptions from earlier iterat
 - Report custom properties (when present):
   - `dep_actor`
   - `dep_country`
-- Report labels always include `DigIntLab`, plus one label per announcement type:
+- Report labels always include `DigIntLab`, plus any applicable:
   - `dep:announcement-type:<lowercased enum value>`
+  - `dep:dataset:<dataset code>`
 - Report external reference prefers `annLink`; if absent, it falls back to `site`.
 - `annTitle` is attached as the external reference description when present.
 - `object_refs` contains all objects in the bundle (author identity, victim, indicators, intrusion set, country, sector, and all relationships between them).
@@ -130,8 +150,9 @@ It should track the code in `main.py`, not stale assumptions from earlier iterat
   - `first_seen`
   - `dep_actor` when present
   - `dep_country` when present
-- Incident labels always include `DigIntLab`, plus one label per announcement type:
+- Incident labels always include `DigIntLab`, plus any applicable:
   - `dep:announcement-type:<lowercased enum value>`
+  - `dep:dataset:<dataset code>`
 - Incident external reference prefers `annLink`; if absent, it falls back to `site`.
 - `annTitle` is attached as the external reference description when present.
 
@@ -242,7 +263,7 @@ These links are created automatically when both related objects exist. There are
   - `DEP_CREATE_COUNTRY_LOCATIONS`
 - Important non-boolean knobs:
   - `DEP_PRIMARY_OBJECT` (default: `report`; valid values: `report`, `incident`)
-  - `DEP_DSET`
+  - `DEP_DATASETS`
   - `DEP_LOOKBACK_DAYS`
   - `DEP_OVERLAP_HOURS`
   - `DEP_CONFIDENCE`
@@ -287,8 +308,13 @@ These links are created automatically when both related objects exist. There are
 
 Use `task format check type-check` for complete local checks before considering code changes done.
 
-There is a `task test` target, but there is currently no first-party test suite in this repository. Do not assume automated test coverage exists.
-For code changes, do not stop at static checks alone; perform Docker-based runtime validation as well.
+There is a first-party pytest suite in `tests/`, and `task test` runs it.
+Current automated coverage focuses on:
+
+- dataset parsing and official dataset validation against `dep-api-spec.json`
+- connector runtime helpers, run-window behavior, and item-processing behavior
+- STIX conversion, deterministic IDs, labels, and normalization helpers
+  For code changes, do not stop at static checks alone; perform Docker-based runtime validation as well.
 
 ## File map
 
diff --git a/DOCKERHUB.md b/DOCKERHUB.md
index c768109..216eee5 100644
--- a/DOCKERHUB.md
+++ b/DOCKERHUB.md
@@ -14,8 +14,8 @@ An [OpenCTI](https://github.com/OpenCTI-Platform/OpenCTI) external-import connec
 - Creates **Organization** identities for victims
 - Optionally creates **Sector** identities, **Intrusion Sets**, and **Country** locations
 - Optionally generates **Indicators** for victim domains and leak hash identifiers
-- Adds announcement-type labels such as `dep:announcement-type:pii`
-- Maintains connector state with an overlap window to catch late DEP updates
+- Adds announcement-type and dataset labels such as `dep:announcement-type:pii` and `dep:dataset:ext`
+- Maintains per-dataset connector state with an overlap window to catch late DEP updates
 
 ---
 
@@ -54,38 +54,38 @@ The connector loads configuration from `OPENCTI_CONFIG_FILE` when set, otherwise
 
 ### Required
 
-| Environment variable | Description |
-| -------------------- | ----------- |
-| `OPENCTI_URL` | URL of your OpenCTI platform |
-| `OPENCTI_TOKEN` | OpenCTI API token |
-| `CONNECTOR_ID` | Unique connector identifier |
-| `CONNECTOR_TYPE` | Connector type, typically `EXTERNAL_IMPORT` |
-| `CONNECTOR_NAME` | Connector display name |
-| `CONNECTOR_SCOPE` | Connector scope, typically `report,incident,identity,indicator` |
-| `DEP_USERNAME` | DEP portal username |
-| `DEP_PASSWORD` | DEP portal password |
-| `DEP_API_KEY` | API key issued by DEP |
-| `DEP_CLIENT_ID` | AWS Cognito App Client ID |
+| Environment variable | Description                                                     |
+| -------------------- | --------------------------------------------------------------- |
+| `OPENCTI_URL`        | URL of your OpenCTI platform                                    |
+| `OPENCTI_TOKEN`      | OpenCTI API token                                               |
+| `CONNECTOR_ID`       | Unique connector identifier                                     |
+| `CONNECTOR_TYPE`     | Connector type, typically `EXTERNAL_IMPORT`                     |
+| `CONNECTOR_NAME`     | Connector display name                                          |
+| `CONNECTOR_SCOPE`    | Connector scope, typically `report,incident,identity,indicator` |
+| `DEP_USERNAME`       | DEP portal username                                             |
+| `DEP_PASSWORD`       | DEP portal password                                             |
+| `DEP_API_KEY`        | API key issued by DEP                                           |
+| `DEP_CLIENT_ID`      | AWS Cognito App Client ID                                       |
 
 ### Optional
 
-| Environment variable | Default | Description |
-| -------------------- | ------- | ----------- |
-| `CONNECTOR_RUN_INTERVAL` | `3600` | Polling interval in seconds |
-| `DEP_CONFIDENCE` | `70` | Confidence score on generated STIX objects |
-| `DEP_LOOKBACK_DAYS` | `7` | Days to look back on first run |
-| `DEP_OVERLAP_HOURS` | `72` | Overlap hours from previous run to catch late updates |
-| `DEP_DSET` | `ext` | DEP dataset to query |
-| `DEP_PRIMARY_OBJECT` | `report` | Primary STIX object to emit: `report` or `incident` |
-| `DEP_EXTENDED_RESULTS` | `true` | Request extended DEP results |
-| `DEP_ENABLE_SITE_INDICATOR` | `true` | Create a domain indicator per victim |
-| `DEP_ENABLE_HASH_INDICATOR` | `true` | Create a hash indicator when a hash is provided |
-| `DEP_SKIP_EMPTY_VICTIM` | `true` | Skip items where victim name is empty, `n/a`, or `none` |
-| `DEP_CREATE_SECTOR_IDENTITIES` | `true` | Create sector identities and link victims with `part-of` |
-| `DEP_CREATE_INTRUSION_SETS` | `true` | Create intrusion sets from DEP actor values |
-| `DEP_CREATE_COUNTRY_LOCATIONS` | `true` | Create country locations and link victims with `located-at` |
-| `DEP_LOGIN_ENDPOINT` | `https://cognito-idp.eu-west-1.amazonaws.com/` | Cognito login endpoint |
-| `DEP_API_ENDPOINT` | `https://api.eu-ep1.doubleextortion.com/v1/dbtr/privlist` | DEP REST endpoint |
+| Environment variable           | Default                                                   | Description                                                                                                                                                                                                    |
+| ------------------------------ | --------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `CONNECTOR_RUN_INTERVAL`       | `3600`                                                    | Polling interval in seconds                                                                                                                                                                                    |
+| `DEP_CONFIDENCE`               | `70`                                                      | Confidence score on generated STIX objects                                                                                                                                                                     |
+| `DEP_LOOKBACK_DAYS`            | `7`                                                       | Days to look back on first run                                                                                                                                                                                 |
+| `DEP_OVERLAP_HOURS`            | `72`                                                      | Overlap hours from previous run to catch late updates                                                                                                                                                          |
+| `DEP_DATASETS`                 | `ext`                                                     | DEP datasets to query. Accepts comma-separated short API codes (`ext`, `prv`, `nws`, `vnd`, `dds`, `frm`) or long aliases such as `extortion`, `privacy`, `opennews`/`news`, `vandalism`, `ddos`, and `forum`. |
+| `DEP_PRIMARY_OBJECT`           | `report`                                                  | Primary STIX object to emit: `report` or `incident`                                                                                                                                                            |
+| `DEP_EXTENDED_RESULTS`         | `true`                                                    | Request extended DEP results                                                                                                                                                                                   |
+| `DEP_ENABLE_SITE_INDICATOR`    | `true`                                                    | Create a domain indicator per victim                                                                                                                                                                           |
+| `DEP_ENABLE_HASH_INDICATOR`    | `true`                                                    | Create a hash indicator when a hash is provided                                                                                                                                                                |
+| `DEP_SKIP_EMPTY_VICTIM`        | `true`                                                    | Skip items where victim name is empty, `n/a`, or `none`                                                                                                                                                        |
+| `DEP_CREATE_SECTOR_IDENTITIES` | `true`                                                    | Create sector identities and link victims with `part-of`                                                                                                                                                       |
+| `DEP_CREATE_INTRUSION_SETS`    | `true`                                                    | Create intrusion sets from DEP actor values                                                                                                                                                                    |
+| `DEP_CREATE_COUNTRY_LOCATIONS` | `true`                                                    | Create country locations and link victims with `located-at`                                                                                                                                                    |
+| `DEP_LOGIN_ENDPOINT`           | `https://cognito-idp.eu-west-1.amazonaws.com/`            | Cognito login endpoint                                                                                                                                                                                         |
+| `DEP_API_ENDPOINT`             | `https://api.eu-ep1.doubleextortion.com/v1/dbtr/privlist` | DEP REST endpoint                                                                                                                                                                                              |
 
 ---
 
@@ -108,6 +108,10 @@ dep-connector:
     - DEP_CLIENT_ID=${DEP_CLIENT_ID}
 ```
 
+When multiple datasets are configured, the connector loops over them and issues one DEP API request per dataset. Dataset aliases are normalized to the short API codes before the request is sent, for example `ddos -> dds` and `vandalism -> vnd`.
+
+State is tracked per dataset, so adding a new dataset later starts that dataset from the normal lookback window instead of inheriting the already-advanced state of the previously configured datasets.
+
 ---
 
 ## Links
diff --git a/README.md b/README.md
index 2453840..5acabef 100644
--- a/README.md
+++ b/README.md
@@ -13,8 +13,8 @@ The Double Extortion connector ingests ransomware and data-leak announcements pu
 - Optionally materializes **Country** locations and links victims to them.
 - Automatically links intrusion sets to sectors, intrusion sets to countries, and sectors to countries when those entities are created.
 - Generates optional **Indicators** for advertised victim domains and leak hash identifiers.
-- Adds announcement-type labels to reports or incidents (for example `dep:announcement-type:pii`).
-- Supports querying different Double Extortion Platform datasets via `DEP_DSET`.
+- Adds announcement-type and dataset labels across DEP-derived STIX objects (for example `dep:announcement-type:pii` and `dep:dataset:ext`).
+- Supports querying one or more Double Extortion Platform datasets via `DEP_DATASETS`.
 - Maintains connector state with a configurable overlap window to capture late DEP updates.
 - Uses stable identifiers (based on DEP `hashid`) for both reports and incidents so refreshed DEP records update existing objects.
 - Filters low-quality actor values such as `unknown`, `anonymous`, or `ransomware group` before creating intrusion sets.
@@ -52,23 +52,23 @@ All configuration values can be supplied via the `config.yml` file or through en
 
 ### Optional values
 
-| YAML path                      | Environment variable           | Default                                                   | Description                                                                                                             |
-| ------------------------------ | ------------------------------ | --------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------- |
-| `connector.interval`           | `CONNECTOR_RUN_INTERVAL`       | `3600`                                                    | Interval in seconds between executions.                                                                                 |
-| `dep.confidence`               | `DEP_CONFIDENCE`               | `70`                                                      | Confidence score attached to generated STIX objects.                                                                    |
-| `dep.login_endpoint`           | `DEP_LOGIN_ENDPOINT`           | `https://cognito-idp.eu-west-1.amazonaws.com/`            | Cognito login endpoint.                                                                                                 |
-| `dep.api_endpoint`             | `DEP_API_ENDPOINT`             | `https://api.eu-ep1.doubleextortion.com/v1/dbtr/privlist` | REST endpoint for announcements.                                                                                        |
-| `dep.lookback_days`            | `DEP_LOOKBACK_DAYS`            | `7`                                                       | Days to look back on the first run.                                                                                     |
-| `dep.overlap_hours`            | `DEP_OVERLAP_HOURS`            | `72`                                                      | Hours to overlap from the previous `last_run` when fetching, to catch late updates.                                     |
-| `dep.extended_results`         | `DEP_EXTENDED_RESULTS`         | `true`                                                    | Request extended leak information by adding `extended=true` to DEP API requests.                                        |
-| `dep.dset`                     | `DEP_DSET`                     | `ext`                                                     | Dataset to query (for example `ext`, `sanctions`).                                                                      |
-| `dep.enable_site_indicator`    | `DEP_ENABLE_SITE_INDICATOR`    | `true`                                                    | Create a domain indicator per victim.                                                                                   |
-| `dep.enable_hash_indicator`    | `DEP_ENABLE_HASH_INDICATOR`    | `true`                                                    | Create a hash indicator when a hash is provided.                                                                        |
-| `dep.skip_empty_victim`        | `DEP_SKIP_EMPTY_VICTIM`        | `true`                                                    | Skip items where victim is empty, `n/a`, or `none`.                                                                     |
-| `dep.create_sector_identities` | `DEP_CREATE_SECTOR_IDENTITIES` | `true`                                                    | Create sector identities and link victims with a `part-of` relationship.                                                |
-| `dep.create_intrusion_sets`    | `DEP_CREATE_INTRUSION_SETS`    | `true`                                                    | Create intrusion sets from DEP actor values and link incidents with `attributed-to` (incident mode only).               |
-| `dep.primary_object`           | `DEP_PRIMARY_OBJECT`           | `report`                                                  | Primary object: `report` wraps all objects in a STIX Report container; `incident` creates a standalone Incident object. |
-| `dep.create_country_locations` | `DEP_CREATE_COUNTRY_LOCATIONS` | `true`                                                    | Create country locations and link victim identities with `located-at`.                                                  |
+| YAML path                      | Environment variable           | Default                                                   | Description                                                                                                                                                                                                                  |
+| ------------------------------ | ------------------------------ | --------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `connector.interval`           | `CONNECTOR_RUN_INTERVAL`       | `3600`                                                    | Interval in seconds between executions.                                                                                                                                                                                      |
+| `dep.confidence`               | `DEP_CONFIDENCE`               | `70`                                                      | Confidence score attached to generated STIX objects.                                                                                                                                                                         |
+| `dep.login_endpoint`           | `DEP_LOGIN_ENDPOINT`           | `https://cognito-idp.eu-west-1.amazonaws.com/`            | Cognito login endpoint.                                                                                                                                                                                                      |
+| `dep.api_endpoint`             | `DEP_API_ENDPOINT`             | `https://api.eu-ep1.doubleextortion.com/v1/dbtr/privlist` | REST endpoint for announcements.                                                                                                                                                                                             |
+| `dep.lookback_days`            | `DEP_LOOKBACK_DAYS`            | `7`                                                       | Days to look back on the first run.                                                                                                                                                                                          |
+| `dep.overlap_hours`            | `DEP_OVERLAP_HOURS`            | `72`                                                      | Hours to overlap from the previous per-dataset `last_run` when fetching, to catch late updates.                                                                                                                              |
+| `dep.extended_results`         | `DEP_EXTENDED_RESULTS`         | `true`                                                    | Request extended leak information by adding `extended=true` to DEP API requests.                                                                                                                                             |
+| `dep.datasets`                 | `DEP_DATASETS`                 | `ext`                                                     | Comma-separated or YAML-list dataset selection. Accepts official API values (`ext`, `prv`, `nws`, `vnd`, `dds`, `frm`) and long aliases such as `extortion`, `privacy`, `opennews`/`news`, `vandalism`, `ddos`, and `forum`. |
+| `dep.enable_site_indicator`    | `DEP_ENABLE_SITE_INDICATOR`    | `true`                                                    | Create a domain indicator per victim.                                                                                                                                                                                        |
+| `dep.enable_hash_indicator`    | `DEP_ENABLE_HASH_INDICATOR`    | `true`                                                    | Create a hash indicator when a hash is provided.                                                                                                                                                                             |
+| `dep.skip_empty_victim`        | `DEP_SKIP_EMPTY_VICTIM`        | `true`                                                    | Skip items where victim is empty, `n/a`, or `none`.                                                                                                                                                                          |
+| `dep.create_sector_identities` | `DEP_CREATE_SECTOR_IDENTITIES` | `true`                                                    | Create sector identities and link victims with a `part-of` relationship.                                                                                                                                                     |
+| `dep.create_intrusion_sets`    | `DEP_CREATE_INTRUSION_SETS`    | `true`                                                    | Create intrusion sets from DEP actor values and link incidents with `attributed-to` (incident mode only).                                                                                                                    |
+| `dep.primary_object`           | `DEP_PRIMARY_OBJECT`           | `report`                                                  | Primary object: `report` wraps all objects in a STIX Report container; `incident` creates a standalone Incident object.                                                                                                      |
+| `dep.create_country_locations` | `DEP_CREATE_COUNTRY_LOCATIONS` | `true`                                                    | Create country locations and link victim identities with `located-at`.                                                                                                                                                       |
 
 ### DEP request behavior
 
@@ -79,6 +79,10 @@ Each DEP fetch sends:
 - `dset`
 - `full=true`
 
+When multiple datasets are configured, the connector loops over them and sends one DEP request per dataset.
+
+Dataset aliases are normalized to the short API codes before requests are sent. For example, `ddos` becomes `dds` and `vandalism` becomes `vnd`.
+
 The connector adds `extended=true` only when `DEP_EXTENDED_RESULTS=true`.
 
 ## Why `IntrusionSet` for DEP actor values
@@ -117,7 +121,8 @@ docker run --rm \
 
 - The project uses [**go-task**](https://github.com/go-task/task) with a `Taskfile.yml` to streamline common development commands.
 - The project uses [**uv**](https://docs.astral.sh/uv/) as the Python virtual environment and dependency management tool.
-- The connector stores `last_run` in OpenCTI worker state and fetches with an overlap (`DEP_OVERLAP_HOURS`) to catch delayed DEP changes. Delete the state in OpenCTI to force a full backfill window from `DEP_LOOKBACK_DAYS`.
+- `task test` runs the first-party pytest suite under `tests/`, covering dataset parsing, connector runtime helpers including run-window behavior, and STIX conversion behavior.
+- The connector stores `last_run_by_dataset` in OpenCTI worker state and applies the overlap (`DEP_OVERLAP_HOURS`) independently per dataset. Adding a new dataset later starts that dataset from the full `DEP_LOOKBACK_DAYS` window without affecting the existing ones.
 - Reports and incidents are created with deterministic IDs derived from DEP `hashid`, and bundles are sent with `update=True`, so repeated records update existing objects instead of creating duplicates.
 - In `report` mode each announcement is wrapped in a STIX `Report` object whose `object_refs` contains all correlated entities (victim, indicators, intrusion set, country, sector and their relationships). This produces a pre-correlated Knowledge Graph view directly in OpenCTI, consistent with most other connectors and feeds.
 - In `incident` mode the announcement is modeled as a STIX `Incident` with explicit `targets`, `attributed-to`, and `indicates` relationships.
diff --git a/config.yml.sample b/config.yml.sample
index ad381f0..06ff0ca 100644
--- a/config.yml.sample
+++ b/config.yml.sample
@@ -23,7 +23,8 @@ dep:
   confidence: 70
   login_endpoint: https://cognito-idp.eu-west-1.amazonaws.com/
   api_endpoint: https://api.eu-ep1.doubleextortion.com/v1/dbtr/privlist
-  dset: ext
+  datasets:
+    - ext
   lookback_days: 7
   overlap_hours: 72
   extended_results: true  # Adds extended=true to DEP API requests.
diff --git a/dep_connector/__init__.py b/dep_connector/__init__.py
index d5f749c..075b3ea 100644
--- a/dep_connector/__init__.py
+++ b/dep_connector/__init__.py
@@ -1,3 +1,11 @@
+from dep_connector.client_api import DepDataset
 from dep_connector.connector import DepConnector
+from dep_connector.converter_to_stix import LeakRecord, PrimaryObject, StixBuilder
 
-__all__ = ["DepConnector"]
+__all__ = [
+    "DepConnector",
+    "DepDataset",
+    "LeakRecord",
+    "PrimaryObject",
+    "StixBuilder",
+]
diff --git a/dep_connector/client_api.py b/dep_connector/client_api.py
index 8f42f8a..fcc2a77 100644
--- a/dep_connector/client_api.py
+++ b/dep_connector/client_api.py
@@ -1,5 +1,6 @@
 import json
 import logging
+from enum import StrEnum
 from typing import TypeAlias
 
 import requests
@@ -11,6 +12,40 @@
 DepApiItem: TypeAlias = dict[str, JsonValue]
 
 
+class DepDataset(StrEnum):
+    EXTORTION = "ext"
+    PRIVACY = "prv"
+    OPENNEWS = "nws"
+    VANDALISM = "vnd"
+    DDOS = "dds"
+    FORUM = "frm"
+
+    @classmethod
+    def _missing_(cls, value: object) -> "DepDataset | None":
+        if not isinstance(value, str):
+            return None
+        return DATASET_ALIASES.get(value)
+
+
+DATASET_ALIASES: dict[str, DepDataset] = {
+    "extortion": DepDataset.EXTORTION,
+    "privacy": DepDataset.PRIVACY,
+    "opennews": DepDataset.OPENNEWS,
+    "news": DepDataset.OPENNEWS,
+    "vandalism": DepDataset.VANDALISM,
+    "ddos": DepDataset.DDOS,
+    "forum": DepDataset.FORUM,
+}
+
+
+def dataset_alias_summary() -> str:
+    aliases_by_dataset: dict[DepDataset, list[str]] = {}
+    for alias, dataset in DATASET_ALIASES.items():
+        aliases_by_dataset.setdefault(dataset, []).append(alias)
+    groups = ["/".join(aliases_by_dataset[dataset]) for dataset in DepDataset]
+    return ", ".join(group for group in groups if group)
+
+
 class DepClient:
     def __init__(
         self,
@@ -21,7 +56,6 @@ def __init__(
         username: str | None,
         password: str | None,
         client_id: str,
-        dataset: str,
         extended_results: bool,
     ) -> None:
         self.login_endpoint = login_endpoint
@@ -30,7 +64,6 @@ def __init__(
         self.username = username
         self.password = password
         self.client_id = client_id
-        self.dataset = dataset
         self.extended_results = extended_results
 
     def authenticate(self) -> str:
@@ -59,14 +92,17 @@ def authenticate(self) -> str:
 
     def fetch_raw(
         self,
+        dataset: DepDataset,
         start_date: str,
         end_date: str,
+        token: str | None = None,
     ) -> list[DepApiItem]:
-        token = self.authenticate()
+        if token is None:
+            token = self.authenticate()
         params: dict[str, str] = {
             "ts": start_date,
             "te": end_date,
-            "dset": self.dataset,
+            "dset": dataset,
             "full": "true",
         }
         if self.extended_results:
diff --git a/dep_connector/connector.py b/dep_connector/connector.py
index 83483a2..e5d240d 100644
--- a/dep_connector/connector.py
+++ b/dep_connector/connector.py
@@ -1,3 +1,4 @@
+from collections.abc import Iterable, Mapping
 from datetime import UTC, datetime, timedelta
 
 import pycti  # type: ignore[import-untyped]
@@ -5,7 +6,7 @@
 from stix2 import TLP_AMBER  # type: ignore[import-untyped]
 from stix2 import v21 as stix2
 
-from dep_connector.client_api import DepClient
+from dep_connector.client_api import DepClient, DepDataset, dataset_alias_summary
 from dep_connector.config_loader import load_config
 from dep_connector.converter_to_stix import LeakRecord, PrimaryObject, StixBuilder
 
@@ -15,16 +16,49 @@ def __init__(self) -> None:
         config = load_config()
         self.helper = pycti.OpenCTIConnectorHelper(config)
         label_value = "DigIntLab"
-        author_identity = stix2.Identity(
-            id=pycti.Identity.generate_id(label_value, identity_class="organization"),
-            name=label_value,
-            description="We Track and Monitor the Cyber Space",
-            contact_information="https://doubleextortion.com/",
-            identity_class="organization",
-            object_marking_refs=[TLP_AMBER],
-        )
         self._current_work_id: str | None = None
+        self._load_runtime_config(config)
+        self.client = self._build_client(config)
+        self.datasets = self._parse_datasets(self.raw_datasets)
+        self.stix = StixBuilder(
+            author_identity=stix2.Identity(
+                id=pycti.Identity.generate_id(
+                    label_value, identity_class="organization"
+                ),
+                name=label_value,
+                description="We Track and Monitor the Cyber Space",
+                contact_information="https://doubleextortion.com/",
+                identity_class="organization",
+                object_marking_refs=[TLP_AMBER],
+            ),
+            confidence=self.confidence,
+            label_value=label_value,
+        )
+
+    def _config_dataset_value(self, config: dict[str, object]) -> object:
+        datasets = pycti.get_config_variable(
+            "DEP_DATASETS", ["dep", "datasets"], config
+        )
+        if datasets is not None:
+            return datasets
+        return pycti.get_config_variable(
+            "DEP_DSETS",
+            ["dep", "dsets"],
+            config,
+            default=[DepDataset.EXTORTION],
+        )
+
+    @staticmethod
+    def _parse_primary_object(value: str) -> PrimaryObject:
+        try:
+            return PrimaryObject(value.lower())
+        except ValueError as exc:
+            error = (
+                f"DEP primary object must be one of: report, incident (got: {value})"
+            )
+            raise ValueError(error) from exc
 
+    def _load_runtime_config(self, config: dict[str, object]) -> None:
         self.interval = pycti.get_config_variable(
             "CONNECTOR_RUN_INTERVAL",
             ["connector", "interval"],
@@ -46,45 +80,12 @@ def __init__(self) -> None:
             default=72,
             isNumber=True,
         )
-        confidence = pycti.get_config_variable(
-            "DEP_CONFIDENCE", ["dep", "confidence"], config, default=70, isNumber=True
-        )
-        api_key = pycti.get_config_variable("DEP_API_KEY", ["dep", "api_key"], config)
-        username = pycti.get_config_variable(
-            "DEP_USERNAME", ["dep", "username"], config
-        )
-        password = pycti.get_config_variable(
-            "DEP_PASSWORD", ["dep", "password"], config
-        )
-        client_id = pycti.get_config_variable(
-            "DEP_CLIENT_ID", ["dep", "client_id"], config, default=""
-        )
-        if not client_id:
-            error = "DEP client ID must be provided via configuration"
-            raise ValueError(error)
-        login_endpoint = pycti.get_config_variable(
-            "DEP_LOGIN_ENDPOINT",
-            ["dep", "login_endpoint"],
+        self.confidence = pycti.get_config_variable(
+            "DEP_CONFIDENCE",
+            ["dep", "confidence"],
             config,
-            default="https://cognito-idp.eu-west-1.amazonaws.com/",
-        )
-        api_endpoint = pycti.get_config_variable(
-            "DEP_API_ENDPOINT",
-            ["dep", "api_endpoint"],
-            config,
-            default="https://api.eu-ep1.doubleextortion.com/v1/dbtr/privlist",
-        )
-        dataset = pycti.get_config_variable(
-            "DEP_DSET",
-            ["dep", "dset"],
-            config,
-            default="ext",
-        )
-        extended_results = pycti.get_config_variable(
-            "DEP_EXTENDED_RESULTS",
-            ["dep", "extended_results"],
-            config,
-            default=True,
+            default=70,
+            isNumber=True,
         )
         self.enable_site_indicator = pycti.get_config_variable(
             "DEP_ENABLE_SITE_INDICATOR",
@@ -122,55 +123,127 @@ def __init__(self) -> None:
             config,
             default=True,
         )
-        primary_object_value = str(
+        self.primary_object = self._parse_primary_object(
+            str(
+                pycti.get_config_variable(
+                    "DEP_PRIMARY_OBJECT",
+                    ["dep", "primary_object"],
+                    config,
+                    default=PrimaryObject.REPORT,
+                )
+            ).strip()
+        )
+        self.raw_datasets = self._config_dataset_value(config)
+
+    def _build_client(self, config: dict[str, object]) -> DepClient:
+        client_id = str(
             pycti.get_config_variable(
-                "DEP_PRIMARY_OBJECT",
-                ["dep", "primary_object"],
-                config,
-                default=PrimaryObject.REPORT.value,
-            )
-        ).strip()
-        try:
-            self.primary_object = PrimaryObject(primary_object_value.lower())
-        except ValueError as exc:
-            error = (
-                "DEP primary object must be one of: report, incident "
-                f"(got: {primary_object_value})"
+                "DEP_CLIENT_ID", ["dep", "client_id"], config, default=""
             )
-            raise ValueError(error) from exc
-
-        self.client = DepClient(
-            login_endpoint=login_endpoint,
-            api_endpoint=api_endpoint,
-            api_key=api_key,
-            username=username,
-            password=password,
-            client_id=client_id,
-            dataset=dataset,
-            extended_results=extended_results,
         )
-        self.stix = StixBuilder(
-            author_identity=author_identity,
-            confidence=confidence,
-            label_value=label_value,
+        if not client_id:
+            error = "DEP client ID must be provided via configuration"
+            raise ValueError(error)
+        return DepClient(
+            login_endpoint=str(
+                pycti.get_config_variable(
+                    "DEP_LOGIN_ENDPOINT",
+                    ["dep", "login_endpoint"],
+                    config,
+                    default="https://cognito-idp.eu-west-1.amazonaws.com/",
+                )
+            ),
+            api_endpoint=str(
+                pycti.get_config_variable(
+                    "DEP_API_ENDPOINT",
+                    ["dep", "api_endpoint"],
+                    config,
+                    default="https://api.eu-ep1.doubleextortion.com/v1/dbtr/privlist",
+                )
+            ),
+            api_key=pycti.get_config_variable(
+                "DEP_API_KEY", ["dep", "api_key"], config
+            ),
+            username=pycti.get_config_variable(
+                "DEP_USERNAME", ["dep", "username"], config
+            ),
+            password=pycti.get_config_variable(
+                "DEP_PASSWORD", ["dep", "password"], config
+            ),
+            client_id=client_id,
+            extended_results=pycti.get_config_variable(
+                "DEP_EXTENDED_RESULTS",
+                ["dep", "extended_results"],
+                config,
+                default=True,
+            ),
         )
 
-    def _fetch_data(self, start: datetime, end: datetime) -> list[LeakRecord]:
+    def _parse_datasets(self, raw_datasets: object) -> tuple[DepDataset, ...]:
+        values: list[str] = []
+        if isinstance(raw_datasets, str):
+            values = [part.strip().lower() for part in raw_datasets.split(",")]
+        elif isinstance(raw_datasets, Iterable):
+            values = [str(value).strip().lower() for value in raw_datasets]
+        else:
+            values = [str(raw_datasets).strip().lower()]
+
+        parsed: list[DepDataset] = []
+        invalid: list[str] = []
+        for value in values:
+            if not value:
+                continue
+            try:
+                dataset = DepDataset(value)
+            except ValueError:
+                invalid.append(value)
+                continue
+            if dataset not in parsed:
+                parsed.append(dataset)
+
+        if invalid:
+            allowed = ", ".join(DepDataset)
+            invalid_values = ", ".join(invalid)
+            error = (
+                "DEP datasets must use supported values or aliases "
+                f"({allowed}; aliases: {dataset_alias_summary()}); got: {invalid_values}"
+            )
+            raise ValueError(error)
+        if not parsed:
+            error = "DEP_DATASETS must contain at least one dataset value"
+            raise ValueError(error)
+        return tuple(parsed)
+
+    def _fetch_data(
+        self,
+        dataset: DepDataset,
+        start: datetime,
+        end: datetime,
+        token: str | None = None,
+    ) -> list[LeakRecord]:
+        parsed_items: list[LeakRecord] = []
+        start_date = start.strftime("%Y-%m-%d")
+        end_date = end.strftime("%Y-%m-%d")
+        if token is None:
+            token = self.client.authenticate()
+        self.helper.log_info(f"Fetching DEP dataset '{dataset}'")
         raw_items = self.client.fetch_raw(
-            start_date=start.strftime("%Y-%m-%d"),
-            end_date=end.strftime("%Y-%m-%d"),
+            dataset=dataset,
+            start_date=start_date,
+            end_date=end_date,
+            token=token,
         )
-        parsed_items: list[LeakRecord] = []
         for raw_item in raw_items:
             try:
-                parsed_items.append(LeakRecord(**raw_item))
+                parsed_items.append(LeakRecord(**raw_item, dep_dataset=dataset))
             except ValidationError as error:
                 try:
                     victim_value = raw_item["victim"]
                 except KeyError:
                     victim_value = None
                 self.helper.log_warning(
-                    f"Skipping invalid DEP item for victim {victim_value}: {error}"
+                    "Skipping invalid DEP item for "
+                    f"dataset {dataset} and victim {victim_value}: {error}"
                 )
         return parsed_items
 
@@ -183,53 +256,55 @@ def _should_skip_item(self, victim: str | None) -> bool:
     def _build_indicators(self, item: LeakRecord) -> list[stix2.Indicator]:
         indicators: list[stix2.Indicator] = []
         if self.enable_site_indicator:
-            site_indicator = self.stix.create_site_indicator(item)
-            if site_indicator:
-                indicators.append(site_indicator)
+            indicator = self.stix.create_site_indicator(item)
+            if indicator:
+                indicators.append(indicator)
         if self.enable_hash_indicator:
-            hash_indicator = self.stix.create_hash_indicator(item)
-            if hash_indicator:
-                indicators.append(hash_indicator)
+            indicator = self.stix.create_hash_indicator(item)
+            if indicator:
+                indicators.append(indicator)
         return indicators
 
     def _build_indicator_victim_relationships(
         self,
+        item: LeakRecord,
         indicators: list[stix2.Indicator],
         victim: stix2.Identity | None,
     ) -> list[stix2.Relationship]:
         if victim is None:
             return []
         return [
-            self.stix.build_relationship("related-to", indicator.id, victim.id)
+            self.stix.build_relationship(item, "related-to", indicator.id, victim.id)
             for indicator in indicators
         ]
 
     def _build_cross_entity_relationships(
         self,
+        item: LeakRecord,
         intrusion_set: stix2.IntrusionSet | None,
         sector_identity: stix2.Identity | None,
         country_location: stix2.Location | None,
     ) -> list[stix2._STIXBase21]:
-        objects: list[stix2._STIXBase21] = []
+        relationships: list[stix2._STIXBase21] = []
         if intrusion_set and sector_identity:
-            objects.append(
+            relationships.append(
                 self.stix.build_relationship(
-                    "targets", intrusion_set.id, sector_identity.id
+                    item, "targets", intrusion_set.id, sector_identity.id
                 )
             )
         if intrusion_set and country_location:
-            objects.append(
+            relationships.append(
                 self.stix.build_relationship(
-                    "targets", intrusion_set.id, country_location.id
+                    item, "targets", intrusion_set.id, country_location.id
                 )
             )
         if sector_identity and country_location:
-            objects.append(
+            relationships.append(
                 self.stix.build_relationship(
-                    "related-to", sector_identity.id, country_location.id
+                    item, "related-to", sector_identity.id, country_location.id
                 )
             )
-        return objects
+        return relationships
 
     def _build_optional_entities(
         self,
@@ -240,11 +315,12 @@ def _build_optional_entities(
         objects: list[stix2._STIXBase21] = []
         sector_identity: stix2.Identity | None = None
         if self.create_sector_identities and item.sector and victim:
-            sector_identity = self.stix.create_sector_identity(item.sector)
-        if sector_identity and victim:
+            sector_identity = self.stix.create_sector_identity(item.sector, item)
             objects.append(sector_identity)
             objects.append(
-                self.stix.build_relationship("part-of", victim.id, sector_identity.id)
+                self.stix.build_relationship(
+                    item, "part-of", victim.id, sector_identity.id
+                )
             )
 
         intrusion_set: stix2.IntrusionSet | None = None
@@ -253,29 +329,28 @@ def _build_optional_entities(
             and item.actor
             and not self.stix.is_low_quality_actor(item.actor)
         ):
-            intrusion_set = self.stix.create_intrusion_set(item.actor)
-        if intrusion_set:
+            intrusion_set = self.stix.create_intrusion_set(item.actor, item)
             objects.append(intrusion_set)
             if incident_id is not None:
                 objects.append(
                     self.stix.build_relationship(
-                        "attributed-to", incident_id, intrusion_set.id
+                        item, "attributed-to", incident_id, intrusion_set.id
                     )
                 )
 
         country_location: stix2.Location | None = None
         if self.create_country_locations and item.country and victim:
-            country_location = self.stix.create_country_location(item.country)
-        if country_location and victim:
+            country_location = self.stix.create_country_location(item.country, item)
             objects.append(country_location)
             objects.append(
                 self.stix.build_relationship(
-                    "located-at", victim.id, country_location.id
+                    item, "located-at", victim.id, country_location.id
                 )
             )
+
         objects.extend(
             self._build_cross_entity_relationships(
-                intrusion_set, sector_identity, country_location
+                item, intrusion_set, sector_identity, country_location
             )
         )
         return objects
@@ -292,7 +367,9 @@ def _build_content(
             content.append(victim)
         content.extend(self._build_optional_entities(item, victim, incident_id))
         content.extend(indicators)
-        content.extend(self._build_indicator_victim_relationships(indicators, victim))
+        content.extend(
+            self._build_indicator_victim_relationships(item, indicators, victim)
+        )
         return content
 
     def _send_objects(self, objects: list[stix2._STIXBase21]) -> None:
@@ -334,10 +411,10 @@ def _process_item_as_incident(
         objects.append(incident)
         if victim:
             objects.append(
-                self.stix.build_relationship("targets", incident.id, victim.id)
+                self.stix.build_relationship(item, "targets", incident.id, victim.id)
             )
         objects.extend(
-            self.stix.build_relationship("indicates", indicator.id, incident.id)
+            self.stix.build_relationship(item, "indicates", indicator.id, incident.id)
             for indicator in indicators
         )
         self._send_objects(objects)
@@ -355,30 +432,12 @@ def _process_item_as_report(
 
     def _run_cycle(self) -> None:
         now = datetime.now(UTC)
-        start = now - timedelta(days=self.lookback_days)
-        state = self.helper.get_state() or {}
-        try:
-            last_run = state["last_run"]
-        except KeyError:
-            last_run = None
-        if isinstance(last_run, str):
-            try:
-                start = datetime.fromisoformat(last_run) - timedelta(
-                    hours=self.overlap_hours
-                )
-            except ValueError:
-                self.helper.log_warning(
-                    f"Ignoring invalid last_run state value: {last_run}"
-                )
-        elif last_run is not None:
-            self.helper.log_warning(
-                "Ignoring non-string last_run state value returned by OpenCTI helper"
-            )
         end = now
+        state = self.helper.get_state() or {}
 
         self.helper.log_info(
-            "Fetching DEP data from "
-            f"{start.isoformat()} to {end.isoformat()} "
+            "Fetching DEP data for datasets "
+            f"{', '.join(self.datasets)} "
             f"(overlap: {self.overlap_hours}h)"
         )
 
@@ -387,25 +446,20 @@ def _run_cycle(self) -> None:
             f"DEP connector - {now.strftime('%Y-%m-%d %H:%M:%S')} UTC",
         )
         try:
-            try:
-                items = self._fetch_data(start, end)
-            except Exception as error:
-                self.helper.log_error(f"Failed to fetch DEP data: {error}")
-                return
-
-            self.helper.log_info(f"Received {len(items)} entries from DEP API")
-
-            for item in items:
-                try:
-                    self._process_item(item)
-                except Exception as error:
-                    self.helper.log_error(
-                        f"Failed to process DEP item for victim {item.victim}: {error}"
-                    )
-
-            self.helper.log_info("Persisting connector state")
-            self.helper.set_state({"last_run": end.isoformat()})
-            self.helper.log_info("DEP run completed")
+            token = self.client.authenticate()
+            for dataset in self.datasets:
+                start = self._resolve_dataset_window_start(dataset, now, state)
+                self.helper.log_info(
+                    "Fetching DEP data from "
+                    f"{start.isoformat()} to {end.isoformat()} "
+                    f"for dataset {dataset}"
+                )
+                items = self._fetch_cycle_items(dataset, start, end, token)
+                if items is None:
+                    continue
+                self._process_cycle_items(items)
+                self._persist_dataset_state(dataset, end)
+                state = self.helper.get_state() or state
         finally:
             self.helper.api.work.to_processed(
                 self._current_work_id,
@@ -419,3 +473,65 @@ def run(self) -> None:
             message_callback=self._run_cycle,
             duration_period=f"PT{self.interval}S",
         )
+
+    def _resolve_dataset_window_start(
+        self, dataset: DepDataset, now: datetime, state: Mapping[str, object]
+    ) -> datetime:
+        start = now - timedelta(days=self.lookback_days)
+        raw_last_run_by_dataset = state.get("last_run_by_dataset")
+        if isinstance(raw_last_run_by_dataset, dict):
+            dataset_last_run = raw_last_run_by_dataset.get(str(dataset))
+            if isinstance(dataset_last_run, str):
+                try:
+                    return datetime.fromisoformat(dataset_last_run) - timedelta(
+                        hours=self.overlap_hours
+                    )
+                except ValueError:
+                    self.helper.log_warning(
+                        "Ignoring invalid dataset last_run state value "
+                        f"for {dataset}: {dataset_last_run}"
+                    )
+                    return start
+            return start
+        return start
+
+    def _fetch_cycle_items(
+        self,
+        dataset: DepDataset,
+        start: datetime,
+        end: datetime,
+        token: str,
+    ) -> list[LeakRecord] | None:
+        try:
+            items = self._fetch_data(dataset, start, end, token)
+        except Exception as error:
+            self.helper.log_error(
+                f"Failed to fetch DEP data for dataset {dataset}: {error}"
+            )
+            return None
+        self.helper.log_info(
+            f"Received {len(items)} entries from DEP API for dataset {dataset}"
+        )
+        return items
+
+    def _process_cycle_items(self, items: list[LeakRecord]) -> None:
+        for item in items:
+            try:
+                self._process_item(item)
+            except Exception as error:
+                self.helper.log_error(
+                    f"Failed to process DEP item for victim {item.victim}: {error}"
+                )
+
+    def _persist_dataset_state(self, dataset: DepDataset, end: datetime) -> None:
+        self.helper.log_info(f"Persisting connector state for dataset {dataset}")
+        state = self.helper.get_state() or {}
+        raw_last_run_by_dataset = state.get("last_run_by_dataset")
+        last_run_by_dataset = (
+            dict(raw_last_run_by_dataset)
+            if isinstance(raw_last_run_by_dataset, dict)
+            else {}
+        )
+        last_run_by_dataset[str(dataset)] = end.isoformat()
+        self.helper.set_state({"last_run_by_dataset": last_run_by_dataset})
+        self.helper.log_info("DEP run completed")
diff --git a/dep_connector/converter_to_stix.py b/dep_connector/converter_to_stix.py
index c6a8cdf..4e23df6 100644
--- a/dep_connector/converter_to_stix.py
+++ b/dep_connector/converter_to_stix.py
@@ -34,6 +34,7 @@ class LeakRecord(BaseModel):
 
     date: dt_date
     hashid: str
+    dep_dataset: str | None = None
 
     victim: str | None = None
     sector: str | None = None
@@ -139,16 +140,31 @@ def _external_reference(
     return reference
 
 
-def _primary_custom_properties(
-    actor: str | None,
-    country: str | None,
-) -> dict[str, str]:
-    properties: dict[str, str] = {}
-    if actor is not None:
-        properties["dep_actor"] = actor
-    if country is not None:
-        properties["dep_country"] = country
-    return properties
+def _primary_stix_id(object_type: str, item: LeakRecord) -> str:
+    return (
+        f"{object_type}--"
+        f"{uuid5(NAMESPACE_URL, f'dep-announcement:{item.normalized_hashid}')}"
+    )
+
+
+def _victim_external_references(item: LeakRecord) -> list[dict[str, str]]:
+    external_references: list[dict[str, str]] = []
+    if item.ann_link:
+        external_references.append(
+            _external_reference(
+                "dep",
+                url=item.ann_link,
+                description=item.ann_title,
+            )
+        )
+    if item.site and item.site != item.ann_link:
+        external_references.append(
+            _external_reference(
+                "victim-site",
+                url=_ensure_scheme(item.site),
+            )
+        )
+    return external_references
 
 
 class StixBuilder:
@@ -163,6 +179,14 @@ def __init__(
         self.confidence = confidence
         self.label_value = label_value
 
+    def _common_object_kwargs(self, item: LeakRecord) -> dict[str, object]:
+        return {
+            "confidence": self.confidence,
+            "labels": self.build_labels(item),
+            "created_by_ref": self.author_identity,
+            "object_marking_refs": [TLP_AMBER],
+        }
+
     def create_victim_identity(
         self,
         item: LeakRecord,
@@ -173,23 +197,6 @@ def create_victim_identity(
         if not victim_name:
             return None
 
-        external_references: list[dict[str, str]] = []
-        if item.ann_link:
-            external_references.append(
-                _external_reference(
-                    "dep",
-                    url=item.ann_link,
-                    description=item.ann_title,
-                )
-            )
-        if item.site and item.site != item.ann_link:
-            external_references.append(
-                _external_reference(
-                    "victim-site",
-                    url=_ensure_scheme(item.site),
-                )
-            )
-
         description_parts = []
         if item.sector and include_sector_in_description:
             description_parts.append(f"Industry sector: {item.sector}")
@@ -202,26 +209,20 @@ def create_victim_identity(
             name=victim_name,
             description=description,
             identity_class="organization",
-            confidence=self.confidence,
-            labels=[self.label_value],
-            created_by_ref=self.author_identity,
-            external_references=external_references or None,
-            object_marking_refs=[TLP_AMBER],
+            external_references=_victim_external_references(item) or None,
+            **self._common_object_kwargs(item),
         )
 
-    def create_sector_identity(self, sector: str) -> stix2.Identity:
+    def create_sector_identity(self, sector: str, item: LeakRecord) -> stix2.Identity:
         sector_key = sector.lower()
         return stix2.Identity(
             id=pycti.Identity.generate_id(sector_key, identity_class="class"),
             name=sector,
             identity_class="class",
-            created_by_ref=self.author_identity,
-            confidence=self.confidence,
-            labels=[self.label_value],
-            object_marking_refs=[TLP_AMBER],
+            **self._common_object_kwargs(item),
         )
 
-    def create_intrusion_set(self, actor: str) -> stix2.IntrusionSet:
+    def create_intrusion_set(self, actor: str, item: LeakRecord) -> stix2.IntrusionSet:
         actor_key = actor.lower()
         intrusion_set_id = (
             f"intrusion-set--{uuid5(NAMESPACE_URL, f'dep-actor:{actor_key}')}"
@@ -229,33 +230,23 @@ def create_intrusion_set(self, actor: str) -> stix2.IntrusionSet:
         return stix2.IntrusionSet(
             id=intrusion_set_id,
             name=actor,
-            confidence=self.confidence,
-            labels=[self.label_value],
-            created_by_ref=self.author_identity,
-            object_marking_refs=[TLP_AMBER],
+            **self._common_object_kwargs(item),
         )
 
-    def create_country_location(self, country: str) -> stix2.Location:
+    def create_country_location(self, country: str, item: LeakRecord) -> stix2.Location:
         country_key = country.lower()
         location_id = f"location--{uuid5(NAMESPACE_URL, f'dep-country:{country_key}')}"
         return stix2.Location(
             id=location_id,
             name=country,
             country=country,
-            confidence=self.confidence,
-            labels=[self.label_value],
-            created_by_ref=self.author_identity,
-            object_marking_refs=[TLP_AMBER],
             custom_properties={"x_opencti_location_type": "Country"},
             allow_custom=True,
+            **self._common_object_kwargs(item),
         )
 
     def create_incident(self, item: LeakRecord) -> stix2.Incident:
-        incident_name = self.build_primary_name(item)
-        description = self.build_primary_description(item)
         first_seen = datetime.combine(item.date, datetime.min.time(), tzinfo=UTC)
-        external_reference = self.build_primary_external_reference(item)
-        incident_id = f"incident--{uuid5(NAMESPACE_URL, f'dep-announcement:{item.normalized_hashid}')}"
         custom_properties = {
             "incident_type": "cybercrime",
             "first_seen": first_seen,
@@ -263,58 +254,38 @@ def create_incident(self, item: LeakRecord) -> stix2.Incident:
         }
 
         return stix2.Incident(
-            id=incident_id,
-            name=incident_name,
-            description=description,
+            id=_primary_stix_id("incident", item),
+            name=self.build_primary_name(item),
+            description=self.build_primary_description(item),
             created=first_seen,
-            confidence=self.confidence,
-            labels=self.build_labels(item),
-            created_by_ref=self.author_identity,
-            external_references=[external_reference],
-            object_marking_refs=[TLP_AMBER],
+            external_references=[self.build_primary_external_reference(item)],
             custom_properties=custom_properties,
+            **self._common_object_kwargs(item),
         )
 
     def create_report(self, item: LeakRecord, object_refs: list[str]) -> stix2.Report:
-        report_name = self.build_primary_name(item)
-        description = self.build_primary_description(item)
         published = datetime.combine(item.date, datetime.min.time(), tzinfo=UTC)
-        external_reference = self.build_primary_external_reference(item)
         custom_properties = self.build_primary_custom_properties(item)
-        report_id = f"report--{uuid5(NAMESPACE_URL, f'dep-announcement:{item.normalized_hashid}')}"
+        report_kwargs = {
+            "id": _primary_stix_id("report", item),
+            "name": self.build_primary_name(item),
+            "description": self.build_primary_description(item),
+            "published": published,
+            "report_types": ["threat-report"],
+            "external_references": [self.build_primary_external_reference(item)],
+            "object_refs": object_refs,
+            **self._common_object_kwargs(item),
+        }
         if custom_properties:
-            return stix2.Report(
-                id=report_id,
-                name=report_name,
-                description=description,
-                published=published,
-                report_types=["threat-report"],
-                confidence=self.confidence,
-                labels=self.build_labels(item),
-                created_by_ref=self.author_identity,
-                external_references=[external_reference],
-                object_refs=object_refs,
-                object_marking_refs=[TLP_AMBER],
-                custom_properties=custom_properties,
-            )
-        return stix2.Report(
-            id=report_id,
-            name=report_name,
-            description=description,
-            published=published,
-            report_types=["threat-report"],
-            confidence=self.confidence,
-            labels=self.build_labels(item),
-            created_by_ref=self.author_identity,
-            external_references=[external_reference],
-            object_refs=object_refs,
-            object_marking_refs=[TLP_AMBER],
-        )
+            report_kwargs["custom_properties"] = custom_properties
+        return stix2.Report(**report_kwargs)
 
     def build_labels(self, item: LeakRecord) -> list[str]:
         labels = {self.label_value}
+        if item.dep_dataset:
+            labels.add(f"dep:dataset:{item.dep_dataset.lower()}")
         labels.update(
-            f"dep:announcement-type:{announcement_type.value.lower()}"
+            f"dep:announcement-type:{announcement_type.lower()}"
             for announcement_type in item.announcement_types
         )
         return sorted(labels)
@@ -325,17 +296,11 @@ def create_site_indicator(self, item: LeakRecord) -> stix2.Indicator | None:
             return None
 
         pattern = f"[domain-name:value = '{domain}']"
-        return stix2.Indicator(
-            id=pycti.Indicator.generate_id(pattern),
+        return self._create_indicator(
+            item,
+            pattern=pattern,
             name=f"Domain associated with {item.victim or 'unknown victim'}",
             description="Victim domain",
-            pattern_type="stix",
-            pattern=pattern,
-            valid_from=datetime.now(UTC),
-            confidence=self.confidence,
-            labels=[self.label_value],
-            created_by_ref=self.author_identity,
-            object_marking_refs=[TLP_AMBER],
         )
 
     def create_hash_indicator(self, item: LeakRecord) -> stix2.Indicator | None:
@@ -347,26 +312,34 @@ def create_hash_indicator(self, item: LeakRecord) -> stix2.Indicator | None:
             return None
 
         pattern = f"[file:hashes.'{hash_type}' = '{hash_value}']"
-        return stix2.Indicator(
-            id=pycti.Indicator.generate_id(pattern),
+        return self._create_indicator(
+            item,
+            pattern=pattern,
             name=f"Announcement hash for {item.victim or 'unknown victim'}",
             description="Hash identifier for tracking",
+        )
+
+    def _create_indicator(
+        self,
+        item: LeakRecord,
+        *,
+        pattern: str,
+        name: str,
+        description: str,
+    ) -> stix2.Indicator:
+        return stix2.Indicator(
+            id=pycti.Indicator.generate_id(pattern),
+            name=name,
+            description=description,
             pattern_type="stix",
             pattern=pattern,
             valid_from=datetime.now(UTC),
-            confidence=self.confidence,
-            labels=[self.label_value],
-            created_by_ref=self.author_identity,
-            object_marking_refs=[TLP_AMBER],
+            **self._common_object_kwargs(item),
         )
 
     @staticmethod
     def detect_hash_type(hash_value: str) -> str | None:
-        length_to_type = {32: "MD5", 40: "SHA-1", 64: "SHA-256"}
-        length = len(hash_value)
-        if length in length_to_type:
-            return length_to_type[length]
-        return None
+        return {32: "MD5", 40: "SHA-1", 64: "SHA-256"}.get(len(hash_value))
 
     @staticmethod
     def is_low_quality_actor(actor: str) -> bool:
@@ -375,6 +348,7 @@ def is_low_quality_actor(actor: str) -> bool:
 
     def build_relationship(
         self,
+        item: LeakRecord,
         relationship_type: str,
         source_ref: str,
         target_ref: str,
@@ -386,10 +360,7 @@ def build_relationship(
             relationship_type=relationship_type,
             source_ref=source_ref,
             target_ref=target_ref,
-            created_by_ref=self.author_identity,
-            confidence=self.confidence,
-            labels=[self.label_value],
-            object_marking_refs=[TLP_AMBER],
+            **self._common_object_kwargs(item),
         )
 
     @staticmethod
@@ -399,21 +370,21 @@ def build_primary_name(item: LeakRecord) -> str:
 
     @staticmethod
     def build_primary_description(item: LeakRecord) -> str | None:
-        if item.ann_description:
-            return unquote(item.ann_description)
-        return None
+        return unquote(item.ann_description) if item.ann_description else None
 
     @staticmethod
     def build_primary_external_reference(item: LeakRecord) -> dict[str, str]:
-        url = item.ann_link
-        if url is None and item.site:
-            url = _ensure_scheme(item.site)
         return _external_reference(
             "dep",
-            url=url,
+            url=item.ann_link or (_ensure_scheme(item.site) if item.site else None),
             description=item.ann_title,
         )
 
     @staticmethod
     def build_primary_custom_properties(item: LeakRecord) -> dict[str, str]:
-        return _primary_custom_properties(item.actor, item.country)
+        properties: dict[str, str] = {}
+        if item.actor is not None:
+            properties["dep_actor"] = item.actor
+        if item.country is not None:
+            properties["dep_country"] = item.country
+        return properties
diff --git a/pyproject.toml b/pyproject.toml
index d1470fc..b8db29b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -20,6 +20,7 @@ Documentation = "https://github.com/DigintLab/opencti-connector/blob/main/README
 [dependency-groups]
 dev = [
     "mypy[faster-cache]>=1.19.0",
+    "pytest>=8.4.2",
     "ruff>=0.14.9",
     "types-pyyaml>=6.0.12.20250915",
     "types-requests>=2.32.4.20250913",
@@ -58,6 +59,9 @@ docstring-code-format = true
 [tool.ruff.lint.isort]
 case-sensitive = true
 
+[tool.ruff.lint.per-file-ignores]
+"tests/*.py" = ["S101", "S106", "SLF001"]
+
 [tool.ruff.lint.mccabe]
 max-complexity = 10
 
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/tests/__init__.py
@@ -0,0 +1 @@
+
diff --git a/tests/test_api_spec_datasets.py b/tests/test_api_spec_datasets.py
new file mode 100644
index 0000000..0131cf3
--- /dev/null
+++ b/tests/test_api_spec_datasets.py
@@ -0,0 +1,19 @@
+import json
+import re
+from pathlib import Path
+
+from dep_connector import DepDataset
+
+
+def test_dep_dataset_enum_matches_api_spec() -> None:
+    spec_path = Path(__file__).resolve().parent.parent / "dep-api-spec.json"
+    spec = json.loads(spec_path.read_text(encoding="utf-8"))
+    parameters = spec["paths"]["/dbtr/privlist"]["get"]["parameters"]
+    description = next(
+        parameter["description"]
+        for parameter in parameters
+        if parameter["name"] == "dset"
+    )
+    dataset_values = set(re.findall(r"“([a-z]{3})”", description))
+
+    assert dataset_values == set(DepDataset)
diff --git a/tests/test_connector_datasets.py b/tests/test_connector_datasets.py
new file mode 100644
index 0000000..b05977c
--- /dev/null
+++ b/tests/test_connector_datasets.py
@@ -0,0 +1,43 @@
+import pytest
+
+from dep_connector import DepConnector, DepDataset
+
+
+@pytest.mark.parametrize(
+    ("raw_datasets", "expected"),
+    [
+        ("ext,dds", (DepDataset.EXTORTION, DepDataset.DDOS)),
+        ("extortion,ddos", (DepDataset.EXTORTION, DepDataset.DDOS)),
+        (
+            "ext,ddos,vandalism",
+            (DepDataset.EXTORTION, DepDataset.DDOS, DepDataset.VANDALISM),
+        ),
+        (["ext", "dds"], (DepDataset.EXTORTION, DepDataset.DDOS)),
+        (["extortion", "ddos"], (DepDataset.EXTORTION, DepDataset.DDOS)),
+        (
+            [" ext ", "ddos", "extortion"],
+            (DepDataset.EXTORTION, DepDataset.DDOS),
+        ),
+    ],
+)
+def test_parse_datasets_accepts_multiple_official_values(
+    raw_datasets: object,
+    expected: tuple[DepDataset, ...],
+) -> None:
+    connector = DepConnector.__new__(DepConnector)
+
+    assert connector._parse_datasets(raw_datasets) == expected
+
+
+def test_parse_datasets_rejects_unknown_values() -> None:
+    connector = DepConnector.__new__(DepConnector)
+
+    with pytest.raises(ValueError, match="supported values or aliases"):
+        connector._parse_datasets("ext,sanctions")
+
+
+def test_parse_datasets_requires_at_least_one_value() -> None:
+    connector = DepConnector.__new__(DepConnector)
+
+    with pytest.raises(ValueError, match="at least one dataset"):
+        connector._parse_datasets("")
diff --git a/tests/test_connector_runtime.py b/tests/test_connector_runtime.py
new file mode 100644
index 0000000..d4a79e6
--- /dev/null
+++ b/tests/test_connector_runtime.py
@@ -0,0 +1,224 @@
+from datetime import UTC, datetime
+from unittest.mock import Mock, patch
+
+import pycti  # type: ignore[import-untyped]
+from stix2 import TLP_AMBER  # type: ignore[import-untyped]
+from stix2 import v21 as stix2
+
+from dep_connector import DepConnector, DepDataset, PrimaryObject
+
+
+def test_should_skip_item_honors_flag() -> None:
+    connector = DepConnector.__new__(DepConnector)
+    connector.skip_empty_victim = True
+
+    assert connector._should_skip_item("")
+    assert connector._should_skip_item("n/a")
+    assert connector._should_skip_item(" none ")
+    assert not connector._should_skip_item("Acme Corp")
+
+    connector.skip_empty_victim = False
+    assert not connector._should_skip_item("")
+
+
+def test_fetch_data_fetches_one_dataset_with_injected_dep_dataset() -> None:
+    connector = DepConnector.__new__(DepConnector)
+    connector.helper = Mock()
+    connector.client = Mock()
+    connector.client.fetch_raw.return_value = [
+        {"date": "2026-03-27", "hashid": "a" * 64, "victim": "Ext Victim"}
+    ]
+
+    items = connector._fetch_data(
+        DepDataset.EXTORTION,
+        datetime(2026, 3, 26, tzinfo=UTC),
+        datetime(2026, 3, 27, tzinfo=UTC),
+        "token-123",
+    )
+
+    assert [item.dep_dataset for item in items] == ["ext"]
+    assert [item.victim for item in items] == ["Ext Victim"]
+    connector.client.fetch_raw.assert_called_once_with(
+        dataset=DepDataset.EXTORTION,
+        start_date="2026-03-26",
+        end_date="2026-03-27",
+        token="token-123",
+    )
+    connector.helper.log_info.assert_called_once_with("Fetching DEP dataset 'ext'")
+
+
+def test_fetch_data_skips_invalid_records_without_aborting() -> None:
+    connector = DepConnector.__new__(DepConnector)
+    connector.helper = Mock()
+    connector.client = Mock()
+    connector.client.fetch_raw.return_value = [
+        {"date": "2026-03-27", "hashid": "a" * 64, "victim": "Valid Victim"},
+        {"date": "2026-03-27", "victim": "Missing Hash"},
+    ]
+
+    items = connector._fetch_data(
+        DepDataset.EXTORTION,
+        datetime(2026, 3, 26, tzinfo=UTC),
+        datetime(2026, 3, 27, tzinfo=UTC),
+        "token-123",
+    )
+
+    assert len(items) == 1
+    assert items[0].victim == "Valid Victim"
+    connector.helper.log_warning.assert_called_once()
+
+
+def test_build_indicators_collects_enabled_non_empty_indicators() -> None:
+    connector = DepConnector.__new__(DepConnector)
+    connector.enable_site_indicator = True
+    connector.enable_hash_indicator = True
+    connector.stix = Mock()
+
+    site_indicator = Mock()
+    hash_indicator = Mock()
+    connector.stix.create_site_indicator.return_value = site_indicator
+    connector.stix.create_hash_indicator.return_value = hash_indicator
+    item = Mock()
+
+    assert connector._build_indicators(item) == [site_indicator, hash_indicator]
+
+
+def test_process_item_skips_placeholder_victim_before_building_content() -> None:
+    connector = DepConnector.__new__(DepConnector)
+    connector.skip_empty_victim = True
+    connector.helper = Mock()
+    connector.stix = Mock()
+    connector.enable_site_indicator = False
+    connector.enable_hash_indicator = False
+    connector.primary_object = PrimaryObject.REPORT
+
+    item = Mock(victim=" n/a ")
+
+    connector._process_item(item)
+
+    connector.helper.log_info.assert_called_once_with(
+        "Skipping DEP item with empty or placeholder victim value"
+    )
+    connector.stix.create_victim_identity.assert_not_called()
+
+
+def test_process_item_routes_to_report_mode() -> None:
+    connector = DepConnector.__new__(DepConnector)
+    connector.skip_empty_victim = False
+    connector.create_sector_identities = False
+    connector.helper = Mock()
+    connector.stix = Mock()
+    connector.enable_site_indicator = False
+    connector.enable_hash_indicator = False
+    connector.primary_object = PrimaryObject.REPORT
+
+    victim = Mock()
+    connector.stix.create_victim_identity.return_value = victim
+    item = Mock(victim="Acme Corp")
+
+    with (
+        patch.object(
+            connector, "_build_indicators", return_value=[]
+        ) as build_indicators,
+        patch.object(connector, "_process_item_as_report") as process_as_report,
+        patch.object(connector, "_process_item_as_incident") as process_as_incident,
+    ):
+        connector._process_item(item)
+
+    connector.stix.create_victim_identity.assert_called_once_with(
+        item,
+        include_sector_in_description=True,
+    )
+    build_indicators.assert_called_once_with(item)
+    process_as_report.assert_called_once_with(item, victim, [])
+    process_as_incident.assert_not_called()
+
+
+def test_send_objects_deduplicates_by_stix_id_before_sending() -> None:
+    connector = DepConnector.__new__(DepConnector)
+    connector.helper = Mock()
+    connector._current_work_id = "work-id"
+
+    first = stix2.Identity(
+        id=pycti.Identity.generate_id("Example Victim", identity_class="organization"),
+        name="Example Victim",
+        identity_class="organization",
+        object_marking_refs=[TLP_AMBER],
+    )
+    duplicate = stix2.Identity(
+        id=first.id,
+        name="Example Victim Duplicate",
+        identity_class="organization",
+        object_marking_refs=[TLP_AMBER],
+    )
+    second = stix2.Identity(
+        id=pycti.Identity.generate_id("Second Victim", identity_class="organization"),
+        name="Second Victim",
+        identity_class="organization",
+        object_marking_refs=[TLP_AMBER],
+    )
+
+    connector._send_objects([first, duplicate, second])
+
+    connector.helper.send_stix2_bundle.assert_called_once()
+    sent_bundle = connector.helper.send_stix2_bundle.call_args.args[0]
+    assert sent_bundle.count(first.id) == 1
+    assert sent_bundle.count(second.id) == 1
+
+
+def test_resolve_run_window_start_uses_dataset_specific_state() -> None:
+    connector = DepConnector.__new__(DepConnector)
+    connector.lookback_days = 7
+    connector.overlap_hours = 24
+    connector.helper = Mock()
+    now = datetime(2026, 3, 28, tzinfo=UTC)
+    state = {"last_run_by_dataset": {"dds": "2026-03-27T09:00:00+00:00"}}
+
+    start = connector._resolve_dataset_window_start(DepDataset.DDOS, now, state)
+
+    assert start == datetime(2026, 3, 26, 9, 0, tzinfo=UTC)
+
+
+def test_resolve_run_window_start_new_dataset_uses_lookback_when_map_exists() -> None:
+    connector = DepConnector.__new__(DepConnector)
+    connector.lookback_days = 7
+    connector.overlap_hours = 24
+    connector.helper = Mock()
+    now = datetime(2026, 3, 28, tzinfo=UTC)
+    state = {"last_run_by_dataset": {"ext": "2026-03-27T09:00:00+00:00"}}
+
+    start = connector._resolve_dataset_window_start(DepDataset.DDOS, now, state)
+
+    assert start == datetime(2026, 3, 21, tzinfo=UTC)
+
+
+def test_resolve_run_window_start_without_dataset_state_uses_lookback() -> None:
+    connector = DepConnector.__new__(DepConnector)
+    connector.lookback_days = 7
+    connector.overlap_hours = 24
+    connector.helper = Mock()
+    now = datetime(2026, 3, 28, tzinfo=UTC)
+
+    start = connector._resolve_dataset_window_start(DepDataset.EXTORTION, now, {})
+
+    assert start == datetime(2026, 3, 21, tzinfo=UTC)
+
+
+def test_persist_dataset_state_merges_existing_dataset_entries() -> None:
+    connector = DepConnector.__new__(DepConnector)
+    connector.helper = Mock()
+    connector.helper.get_state.return_value = {
+        "last_run_by_dataset": {"ext": "2026-03-27T09:00:00+00:00"}
+    }
+    end = datetime(2026, 3, 28, 12, 0, tzinfo=UTC)
+
+    connector._persist_dataset_state(DepDataset.DDOS, end)
+
+    connector.helper.set_state.assert_called_once_with(
+        {
+            "last_run_by_dataset": {
+                "ext": "2026-03-27T09:00:00+00:00",
+                "dds": "2026-03-28T12:00:00+00:00",
+            }
+        }
+    )
diff --git a/tests/test_converter_core.py b/tests/test_converter_core.py
new file mode 100644
index 0000000..c8e89a1
--- /dev/null
+++ b/tests/test_converter_core.py
@@ -0,0 +1,135 @@
+import pycti  # type: ignore[import-untyped]
+from stix2 import TLP_AMBER  # type: ignore[import-untyped]
+from stix2 import v21 as stix2
+
+from dep_connector import LeakRecord, StixBuilder
+
+
+def build_builder() -> StixBuilder:
+    author_identity = stix2.Identity(
+        id=pycti.Identity.generate_id("DigIntLab", identity_class="organization"),
+        name="DigIntLab",
+        identity_class="organization",
+        object_marking_refs=[TLP_AMBER],
+    )
+    return StixBuilder(
+        author_identity=author_identity,
+        confidence=70,
+        label_value="DigIntLab",
+    )
+
+
+def test_leak_record_normalizes_source_fields() -> None:
+    item = LeakRecord(
+        date="2026-03-27",
+        hashid="A" * 64,
+        victim="Example Victim",
+        annLink="https//example.com/leak",
+        site=" Example.com ",
+        victimDomain=" Victim.Example.com ",
+        sector="  Finance   ",
+        actor=" Example Gang ",
+        country=" n/a ",
+    )
+
+    assert item.ann_link == "https://example.com/leak"
+    assert item.site == "Example.com"
+    assert item.victim_domain == "Victim.Example.com"
+    assert item.sector == "Finance"
+    assert item.actor == "Example Gang"
+    assert item.country is None
+    assert item.normalized_hashid == "a" * 64
+    assert item.indicator_domain == "victim.example.com"
+
+
+def test_indicator_domain_falls_back_to_site() -> None:
+    item = LeakRecord(
+        date="2026-03-27",
+        hashid="b" * 64,
+        victim="Example Victim",
+        site="https://Portal.Example.com/path",
+    )
+
+    assert item.indicator_domain == "portal.example.com"
+
+
+def test_report_id_is_deterministic_from_hashid() -> None:
+    builder = build_builder()
+    object_refs = [builder.author_identity.id]
+    first = LeakRecord(
+        date="2026-03-27",
+        hashid="c" * 64,
+        victim="Original Victim",
+        annLink="https://example.com/original",
+        annTitle="Original Title",
+        annDescription="Original description",
+        dep_dataset="ext",
+    )
+    second = LeakRecord(
+        date="2026-03-27",
+        hashid="c" * 64,
+        victim="Updated Victim",
+        annLink="https://example.com/updated",
+        annTitle="Updated Title",
+        annDescription="Updated description",
+        dep_dataset="dds",
+    )
+
+    first_report = builder.create_report(first, object_refs)
+    second_report = builder.create_report(second, object_refs)
+
+    assert first_report.id == second_report.id
+    assert first_report.name != second_report.name
+    assert "dep:dataset:ext" in first_report.labels
+    assert "dep:dataset:dds" in second_report.labels
+
+
+def test_build_primary_description_url_decodes_text() -> None:
+    item = LeakRecord(
+        date="2026-03-27",
+        hashid="e" * 64,
+        victim="Encoded Victim",
+        annDescription="Leaked%20records%20available",
+    )
+
+    assert StixBuilder.build_primary_description(item) == "Leaked records available"
+
+
+def test_build_primary_external_reference_falls_back_to_site_with_scheme() -> None:
+    item = LeakRecord(
+        date="2026-03-27",
+        hashid="f" * 64,
+        victim="Fallback Victim",
+        site="portal.example.com",
+        annTitle="Fallback title",
+    )
+
+    reference = StixBuilder.build_primary_external_reference(item)
+
+    assert reference == {
+        "source_name": "dep",
+        "url": "https://portal.example.com",
+        "description": "Fallback title",
+    }
+
+
+def test_incident_id_is_deterministic_from_hashid() -> None:
+    builder = build_builder()
+    first = LeakRecord(
+        date="2026-03-27",
+        hashid="d" * 64,
+        victim="Original Victim",
+        annLink="https://example.com/original",
+    )
+    second = LeakRecord(
+        date="2026-03-27",
+        hashid="d" * 64,
+        victim="Updated Victim",
+        annLink="https://example.com/updated",
+    )
+
+    first_incident = builder.create_incident(first)
+    second_incident = builder.create_incident(second)
+
+    assert first_incident.id == second_incident.id
+    assert first_incident.name != second_incident.name
diff --git a/tests/test_converter_labels.py b/tests/test_converter_labels.py
new file mode 100644
index 0000000..e9fd1bd
--- /dev/null
+++ b/tests/test_converter_labels.py
@@ -0,0 +1,75 @@
+import pycti  # type: ignore[import-untyped]
+from stix2 import TLP_AMBER  # type: ignore[import-untyped]
+from stix2 import v21 as stix2
+
+from dep_connector import LeakRecord, StixBuilder
+
+
+def build_builder() -> StixBuilder:
+    author_identity = stix2.Identity(
+        id=pycti.Identity.generate_id("DigIntLab", identity_class="organization"),
+        name="DigIntLab",
+        identity_class="organization",
+        object_marking_refs=[TLP_AMBER],
+    )
+    return StixBuilder(
+        author_identity=author_identity,
+        confidence=70,
+        label_value="DigIntLab",
+    )
+
+
+def build_item() -> LeakRecord:
+    return LeakRecord(
+        date="2026-03-27",
+        hashid="a" * 64,
+        victim="Example Victim",
+        sector="Finance",
+        actor="Example Gang",
+        country="Italy",
+        site="example.com",
+        annDataTypes=["PII"],
+        dep_dataset="dds",
+    )
+
+
+def test_build_labels_include_dataset_and_announcement_type() -> None:
+    builder = build_builder()
+    item = build_item()
+
+    assert builder.build_labels(item) == [
+        "DigIntLab",
+        "dep:announcement-type:pii",
+        "dep:dataset:dds",
+    ]
+
+
+def test_dep_objects_and_relationships_propagate_labels() -> None:
+    builder = build_builder()
+    item = build_item()
+
+    victim = builder.create_victim_identity(item, include_sector_in_description=False)
+    assert victim is not None
+    sector = builder.create_sector_identity("Finance", item)
+    intrusion_set = builder.create_intrusion_set("Example Gang", item)
+    country = builder.create_country_location("Italy", item)
+    site_indicator = builder.create_site_indicator(item)
+    hash_indicator = builder.create_hash_indicator(item)
+    relationship = builder.build_relationship(
+        item, "targets", intrusion_set.id, victim.id
+    )
+
+    expected_labels = [
+        "DigIntLab",
+        "dep:announcement-type:pii",
+        "dep:dataset:dds",
+    ]
+    assert victim.labels == expected_labels
+    assert sector.labels == expected_labels
+    assert intrusion_set.labels == expected_labels
+    assert country.labels == expected_labels
+    assert site_indicator is not None
+    assert site_indicator.labels == expected_labels
+    assert hash_indicator is not None
+    assert hash_indicator.labels == expected_labels
+    assert relationship.labels == expected_labels
diff --git a/uv.lock b/uv.lock
index 5c4bd56..6abd0de 100644
--- a/uv.lock
+++ b/uv.lock
@@ -270,6 +270,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/20/b0/36bd937216ec521246249be3bf9855081de4c5e06a0c9b4219dbeda50373/importlib_metadata-8.7.0-py3-none-any.whl", hash = "sha256:e5dd1551894c77868a30651cef00984d50e1002d06942a7101d34870c5f02afd", size = 27656, upload-time = "2025-04-27T15:29:00.214Z" },
 ]
 
+[[package]]
+name = "iniconfig"
+version = "2.3.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/72/34/14ca021ce8e5dfedc35312d08ba8bf51fdd999c576889fc2c24cb97f4f10/iniconfig-2.3.0.tar.gz", hash = "sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730", size = 20503, upload-time = "2025-10-18T21:55:43.219Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484, upload-time = "2025-10-18T21:55:41.639Z" },
+]
+
 [[package]]
 name = "librt"
 version = "0.7.3"
@@ -400,6 +409,7 @@ dependencies = [
 [package.dev-dependencies]
 dev = [
     { name = "mypy", extra = ["faster-cache"] },
+    { name = "pytest" },
     { name = "ruff" },
     { name = "types-pyyaml" },
     { name = "types-requests" },
@@ -416,6 +426,7 @@ requires-dist = [
 [package.metadata.requires-dev]
 dev = [
     { name = "mypy", extras = ["faster-cache"], specifier = ">=1.19.0" },
+    { name = "pytest", specifier = ">=8.4.2" },
     { name = "ruff", specifier = ">=0.14.9" },
     { name = "types-pyyaml", specifier = ">=6.0.12.20250915" },
     { name = "types-requests", specifier = ">=2.32.4.20250913" },
@@ -556,6 +567,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/f9/f3/f412836ec714d36f0f4ab581b84c491e3f42c6b5b97a6c6ed1817f3c16d0/pika-1.3.2-py3-none-any.whl", hash = "sha256:0779a7c1fafd805672796085560d290213a465e4f6f76a6fb19e378d8041a14f", size = 155415, upload-time = "2023-05-05T14:25:41.484Z" },
 ]
 
+[[package]]
+name = "pluggy"
+version = "1.6.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/f9/e2/3e91f31a7d2b083fe6ef3fa267035b518369d9511ffab804f839851d2779/pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", size = 69412, upload-time = "2025-05-15T12:30:07.975Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" },
+]
+
 [[package]]
 name = "prometheus-client"
 version = "0.22.1"
@@ -706,6 +726,31 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/36/c7/cfc8e811f061c841d7990b0201912c3556bfeb99cdcb7ed24adc8d6f8704/pydantic_core-2.41.5-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:56121965f7a4dc965bff783d70b907ddf3d57f6eba29b6d2e5dabfaf07799c51", size = 2145302, upload-time = "2025-11-04T13:43:46.64Z" },
 ]
 
+[[package]]
+name = "pygments"
+version = "2.19.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/b0/77/a5b8c569bf593b0140bde72ea885a803b82086995367bf2037de0159d924/pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887", size = 4968631, upload-time = "2025-06-21T13:39:12.283Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" },
+]
+
+[[package]]
+name = "pytest"
+version = "9.0.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "colorama", marker = "sys_platform == 'win32'" },
+    { name = "iniconfig" },
+    { name = "packaging" },
+    { name = "pluggy" },
+    { name = "pygments" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/d1/db/7ef3487e0fb0049ddb5ce41d3a49c235bf9ad299b6a25d5780a89f19230f/pytest-9.0.2.tar.gz", hash = "sha256:75186651a92bd89611d1d9fc20f0b4345fd827c41ccd5c299a868a05d70edf11", size = 1568901, upload-time = "2025-12-06T21:30:51.014Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/3b/ab/b3226f0bd7cdcf710fbede2b3548584366da3b19b5021e74f5bde2a8fa3f/pytest-9.0.2-py3-none-any.whl", hash = "sha256:711ffd45bf766d5264d487b917733b453d917afd2b0ad65223959f59089f875b", size = 374801, upload-time = "2025-12-06T21:30:49.154Z" },
+]
+
 [[package]]
 name = "python-dateutil"
 version = "2.9.0.post0"