From 0579b6652116cba68c012eeab1df69f2110f045e Mon Sep 17 00:00:00 2001
From: Igor Jankowski <igorjankowwski@gmail.com>
Date: Thu, 2 Apr 2026 09:43:46 +0200
Subject: [PATCH 01/17] feat(env): implement event-driven architecture with
 normalized delta_t returns

---
 netforge_rl/environment/parallel_env.py | 109 ++++++++++++++----------
 1 file changed, 62 insertions(+), 47 deletions(-)

diff --git a/netforge_rl/environment/parallel_env.py b/netforge_rl/environment/parallel_env.py
index bcf2004..4a7d030 100644
--- a/netforge_rl/environment/parallel_env.py
+++ b/netforge_rl/environment/parallel_env.py
@@ -14,6 +14,10 @@
 from netforge_rl.nlp.log_encoder import LogEncoder, EMBEDDING_DIM
 
 
+# Normalization constant for Neural ODE integration
+MAX_ACTION_DURATION = 50.0
+
+
 class NetForgeRLEnv(BaseNetForgeRLEnv):
     """MARL Environment for CybORG.
 
@@ -40,10 +44,10 @@ def __init__(self, scenario_config: dict):
         )
         self.green_agent = GreenAgent()
         self.possible_agents = [
-            'red_commander',
             'red_operator',
-            'blue_commander',
-            'blue_operator',
+            'blue_dmz',
+            'blue_internal',
+            'blue_restricted',
         ]
         self.agents = self.possible_agents[:]
 
@@ -91,6 +95,9 @@ def __init__(self, scenario_config: dict):
                     'siem_embedding': gym.spaces.Box(
                         low=-1.0, high=1.0, shape=(EMBEDDING_DIM,), dtype=np.float32
                     ),
+                    'delta_t': gym.spaces.Box(
+                        low=0.0, high=1.0, shape=(1,), dtype=np.float32
+                    ),
                 }
             )
             for agent in self.possible_agents
@@ -134,6 +141,7 @@ def reset(
                 'obs': obs.to_numpy(max_size=256),
                 'action_mask': self.action_mask(agent_id),
                 'siem_embedding': np.zeros(EMBEDDING_DIM, dtype=np.float32),
+                'delta_t': np.zeros(1, dtype=np.float32),
             }
         self.current_tick = 0
         self.event_queue = []
@@ -162,9 +170,7 @@ def action_mask(self, agent: str) -> np.ndarray:
         mask[:valid_action_types] = 1
 
         # 2. Target IP Dimension (12-61)
-        target_ips = sorted(list(self.global_state.all_hosts.keys()))
-        num_targets = min(len(target_ips), 50)
-        mask[12 : 12 + num_targets] = 1
+        mask[12 : 12 + 50] = 1
 
         return mask
 
@@ -184,7 +190,7 @@ def step(
         2. INTERRUPTION LOGIC: Immediate cancel operations for specific defensive tasks.
         3. ADVANCE TIME: `current_tick` progresses by 1.
         4. RESOLVE MATURE EVENTS: Apply ActionEffects that reach `completion_tick`.
-        5. OBSERVATION: Agents receive POMDP updates every tick.
+        5. OBSERVATION: Agents receive POMDP updates with normalized Delta T info.
         """
         intended_effects = {}
 
@@ -236,6 +242,7 @@ def step(
                         'action': action,
                         'effect': effect,
                         'target_ip': getattr(action, 'target_ip', None),
+                        'start_tick': self.current_tick,
                     }
                 )
 
@@ -259,8 +266,19 @@ def step(
                             self.current_tick
                         )
 
-        # 3. ADVANCE TIME
-        self.current_tick += 1
+        # 3. ADVANCE TIME (EVENT-DRIVEN JUMP)
+        prev_tick = self.current_tick
+        if self.event_queue:
+            # Jump to the next event completion time
+            next_event_tick = min(event['completion_tick'] for event in self.event_queue)
+            self.current_tick = max(self.current_tick + 1, next_event_tick)
+        else:
+            # No events queued; advance by 1
+            self.current_tick += 1
+
+        delta_t = float(self.current_tick - prev_tick)
+        delta_t_norm = delta_t / MAX_ACTION_DURATION
+
         self.global_state.current_tick = self.current_tick
         self.global_state.subnet_bandwidth.clear()
 
@@ -269,8 +287,11 @@ def step(
             self.current_tick, self.global_state
         )
         for anomaly in noise_data.get('alerts', []):
-            anomaly['arrival_tick'] = self.current_tick + self.log_latency
-            self.global_state.siem_log_buffer.append(anomaly)
+            # Arrival tick logic stays for delayed observation if needed, 
+            # but for now we push raw strings + subnets to buffer
+            self.siem_logger._push_to_buffer(
+                anomaly['data'], anomaly['subnet'], self.global_state
+            )
 
         # 4. RESOLVE MATURE EVENTS
         intended_effects = {}
@@ -316,24 +337,14 @@ def step(
                 host = self.global_state.all_hosts.get(target_ip)
                 is_honeytoken_trap = host and host.contains_honeytokens
 
-                signature = (
-                    'HONEYTOKEN_TRIGGERED'
-                    if is_honeytoken_trap
-                    else 'RED_ACTION_DETECTED'
-                )
-                severity = 10 if is_honeytoken_trap else 5
                 log_delay = 0 if is_honeytoken_trap else self.log_latency
-
-                self.global_state.siem_log_buffer.append(
-                    {
-                        'type': 'anomaly',
-                        'source': res_agent,
-                        'target': target_ip,
-                        'signature': signature,
-                        'severity': severity,
-                        'false_positive': False,
-                        'arrival_tick': self.current_tick + log_delay,
-                    }
+                
+                # Use templates for TP to ensure high-fidelity raw logs
+                from netforge_rl.siem.event_templates import sysmon_1
+                log_string = sysmon_1(res_agent, process='exploit_payload')
+                
+                self.siem_logger._push_to_buffer(
+                    log_string, host.subnet_cidr if host else 'unknown', self.global_state
                 )
 
         # Generate background SIEM noise every tick
@@ -350,31 +361,28 @@ def step(
         is_truncated = self.current_tick >= self.max_ticks
         truncate = {agent: is_truncated for agent in self.agents}
 
-        # Encode recent SIEM logs once per step (shared cost for all Blue agents)
-        recent_logs = self.siem_logger.get_recent_logs(self.global_state, n=8)
-        siem_vec = self.log_encoder.encode_buffer(recent_logs, agg='mean')
+        # Encode subnet-specific SIEM logs for decentralized Blue agents
+        agent_siem_vecs = {}
+        for agent in self.agents:
+            if 'blue' in agent.lower():
+                # Extract subnet tag (e.g., 'blue_dmz' -> 'dmz')
+                subnet_tag = agent.split('_')[1] if '_' in agent else 'dmz'
+                subset_logs = self.siem_logger.get_filtered_logs(
+                    self.global_state, subnet_tag=subnet_tag, n=8
+                )
+                agent_siem_vecs[agent] = self.log_encoder.encode_buffer(
+                    subset_logs, agg='mean'
+                )
 
         for agent in self.agents:
             obs = BaseObservation(agent)
             obs.update_from_state(self.global_state, resolved_effects)
 
             obs_array = obs.to_numpy(max_size=256)
-            if 'operator' in agent:
-                commander_id = agent.replace('operator', 'commander')
-                if commander_id in agent_actions:
-                    cmd_action = agent_actions[commander_id]
-                    cmd_val = (
-                        (float(cmd_action[0]) / 12.0)
-                        if getattr(cmd_action, '__iter__', False)
-                        and not isinstance(cmd_action, BaseAction)
-                        else 1.0
-                    )
-                    obs_array[0] = cmd_val
-
-            # Blue agents receive the live SIEM embedding; Red gets zeros.
-            # This gives Blue an information advantage that models real SOC telemetry.
-            if 'blue' in agent:
-                agent_siem_vec = siem_vec
+            
+            # Blue agents receive subnet-specific SIEM embeddings; Red gets zeros.
+            if 'blue' in agent.lower():
+                agent_siem_vec = agent_siem_vecs.get(agent, np.zeros(EMBEDDING_DIM, dtype=np.float32))
             else:
                 agent_siem_vec = np.zeros(EMBEDDING_DIM, dtype=np.float32)
 
@@ -382,6 +390,7 @@ def step(
                 'obs': obs_array,
                 'action_mask': self.action_mask(agent),
                 'siem_embedding': agent_siem_vec,
+                'delta_t': np.array([delta_t_norm], dtype=np.float32),
             }
             agent_effect = resolved_effects.get(agent)
             rewards[agent] = self._calculate_reward(
@@ -397,6 +406,12 @@ def step(
         # ── Build info dicts with security metrics for callbacks ──
         infos = self._extract_agent_infos(observations, resolved_effects)
 
+        # Add temporal metadata for Neural ODE cells
+        for agent in self.agents:
+            if agent in infos:
+                infos[agent]['delta_t'] = delta_t
+                infos[agent]['delta_t_norm'] = delta_t_norm
+
         return observations, rewards, terminate, truncate, infos
 
     def render(self):

From 2918860bb469e9a41d768236a6573661c4f65d5e Mon Sep 17 00:00:00 2001
From: Igor Jankowski <igorjankowwski@gmail.com>
Date: Thu, 2 Apr 2026 09:43:58 +0200
Subject: [PATCH 02/17] feat(env): decouple blue agent into decentralized
 subnet-specific actors (CTDE)

---
 netforge_rl/siem/siem_logger.py | 45 +++++++++++++++++++++++++++++----
 1 file changed, 40 insertions(+), 5 deletions(-)

diff --git a/netforge_rl/siem/siem_logger.py b/netforge_rl/siem/siem_logger.py
index 20d4628..7308db3 100644
--- a/netforge_rl/siem/siem_logger.py
+++ b/netforge_rl/siem/siem_logger.py
@@ -65,7 +65,10 @@ def log_action(
 
         log_line = self._generate_event(action_name, src_ip, tgt_ip)
         if log_line:
-            self._push_to_buffer(log_line, global_state)
+            # Determine subnet for filtering
+            host = global_state.all_hosts.get(tgt_ip)
+            subnet = host.subnet_cidr if host else 'unknown'
+            self._push_to_buffer(log_line, subnet, global_state)
         return log_line
 
     def log_background_noise(self, global_state: 'GlobalNetworkState') -> None:
@@ -98,7 +101,7 @@ def log_background_noise(self, global_state: 'GlobalNetworkState') -> None:
         norm_weights = [w / total for w in weights]
         chosen = self._rng.choices(callables, weights=norm_weights, k=1)[0]
         log_line = chosen(src.ip, dst.ip)
-        self._push_to_buffer(f'[BACKGROUND] {log_line}', global_state)
+        self._push_to_buffer(f'[BACKGROUND] {log_line}', src.subnet_cidr, global_state)
 
     def get_recent_logs(
         self,
@@ -106,7 +109,36 @@ def get_recent_logs(
         n: int = 8,
     ) -> list[str]:
         """Return the N most recent SIEM log lines from the buffer."""
-        return list(global_state.siem_log_buffer[-n:])
+        return [entry[0] for entry in global_state.siem_log_buffer[-n:]]
+
+    def get_filtered_logs(
+        self,
+        global_state: 'GlobalNetworkState',
+        subnet_tag: str | None = None,
+        n: int = 8,
+    ) -> list[str]:
+        """Return the N most recent logs filtered by subnet mapping.
+
+        Subnet tags are mapped to CIDRs:
+            dmz -> 192.168.1.0/24
+            internal -> 10.0.0.0/24
+            restricted -> 10.0.1.0/24
+        """
+        mapping = {
+            'dmz': '192.168.1.0/24',
+            'internal': '10.0.0.0/24',
+            'restricted': '10.0.1.0/24',
+        }
+        target_cidr = mapping.get(subnet_tag)
+        if not target_cidr:
+            return self.get_recent_logs(global_state, n)
+
+        filtered = [
+            entry[0]
+            for entry in global_state.siem_log_buffer
+            if entry[1] == target_cidr
+        ]
+        return filtered[-n:]
 
     def _generate_event(self, action_name: str, src_ip: str, tgt_ip: str) -> str | None:
         templates = ACTION_EVENT_MAP.get(action_name, ACTION_EVENT_MAP['_default'])
@@ -134,9 +166,12 @@ def _infer_src_ip(self, agent_id: str, global_state: 'GlobalNetworkState') -> st
         return '10.0.0.1'
 
     def _push_to_buffer(
-        self, log_line: str, global_state: 'GlobalNetworkState'
+        self,
+        log_line: str,
+        subnet_cidr: str,
+        global_state: 'GlobalNetworkState',
     ) -> None:
-        global_state.siem_log_buffer.append(log_line)
+        global_state.siem_log_buffer.append((log_line, subnet_cidr))
         # Rolling window — evict oldest entries beyond max
         if len(global_state.siem_log_buffer) > SIEM_BUFFER_MAX:
             global_state.siem_log_buffer.pop(0)

From dfaebe53623bd52554aacfd5a777b6bc973b2582 Mon Sep 17 00:00:00 2001
From: Igor Jankowski <igorjankowwski@gmail.com>
Date: Thu, 2 Apr 2026 09:44:02 +0200
Subject: [PATCH 03/17] feat(env): upgrade GreenAgent to produce high-fidelity
 raw Windows Event XML logs

---
 netforge_rl/agents/green_agent.py | 29 ++++++++++++-----------------
 1 file changed, 12 insertions(+), 17 deletions(-)

diff --git a/netforge_rl/agents/green_agent.py b/netforge_rl/agents/green_agent.py
index ccae9fb..5cbc8ba 100644
--- a/netforge_rl/agents/green_agent.py
+++ b/netforge_rl/agents/green_agent.py
@@ -11,6 +11,8 @@ class GreenAgent:
 
     def __init__(self, agent_id: str = 'green_agent_0'):
         self.agent_id = agent_id
+        from netforge_rl.siem.event_templates import evid_4624, sysmon_3, evid_4688
+        self._benign_templates = [evid_4624, sysmon_3, evid_4688]
 
     def generate_noise(self, current_tick: int, global_state: Any) -> Dict[str, Any]:
         """Generates random telemetry alerts based on the current tick's position
@@ -41,33 +43,26 @@ def generate_noise(self, current_tick: int, global_state: Any) -> Dict[str, Any]
             source = random.choice(hosts)
             target = random.choice(hosts)
             if source.ip != target.ip:
+                template = random.choice(self._benign_templates)
+                log_string = template(source.ip, target.ip)
                 noise_logs.append(
                     {
-                        'type': 'benign_traffic',
-                        'source': source.ip,
-                        'target': target.ip,
-                        'protocol': random.choice(['TCP', 'UDP', 'HTTP', 'DNS']),
-                        'severity': 0,
+                        'type': 'benign_xml',
+                        'data': log_string,
+                        'subnet': source.subnet_cidr,
                     }
                 )
 
         if random.random() < probability_of_false_positive:
             # Generate a false positive anomaly that could trip Blue's SIEM
             target = random.choice(hosts)
+            from netforge_rl.siem.event_templates import evid_4625
+            log_string = evid_4625('unknown_external', target.ip, username='Administrator')
             noise_logs.append(
                 {
-                    'type': 'anomaly',
-                    'source': 'unknown_external',
-                    'target': target.ip,
-                    'signature': random.choice(
-                        [
-                            'Failed_Login_Spike',
-                            'Malformed_Packet',
-                            'Suspicious_User_Agent',
-                        ]
-                    ),
-                    'severity': random.randint(1, 4),
-                    'false_positive': True,
+                    'type': 'anomaly_xml',
+                    'data': log_string,
+                    'subnet': target.subnet_cidr,
                 }
             )
 

From d0d444a09b7742c56205361fe07bf3f464ffe849 Mon Sep 17 00:00:00 2001
From: Igor Jankowski <igorjankowwski@gmail.com>
Date: Thu, 2 Apr 2026 09:44:10 +0200
Subject: [PATCH 04/17] test(env): add unit tests for asynchronicity, subnet
 filtering, and XML fidelity

---
 tests/environment/test_ct_gmarl_env.py | 104 +++++++++++++++++++++++++
 1 file changed, 104 insertions(+)
 create mode 100644 tests/environment/test_ct_gmarl_env.py

diff --git a/tests/environment/test_ct_gmarl_env.py b/tests/environment/test_ct_gmarl_env.py
new file mode 100644
index 0000000..2f0ede0
--- /dev/null
+++ b/tests/environment/test_ct_gmarl_env.py
@@ -0,0 +1,104 @@
+import pytest
+import numpy as np
+from netforge_rl.environment.parallel_env import NetForgeRLEnv, MAX_ACTION_DURATION
+
+def test_asynchronous_step_delta_t():
+    """Verify that step() correctly jumps over idle time and returns normalized delta_t."""
+    env = NetForgeRLEnv(scenario_config={'scenario_type': 'apt_espionage'})
+    obs, infos = env.reset()
+    
+    # 1. Start an asynchronous action (e.g. Exploit) that takes 5 ticks
+    # We need an action that has duration > 1. 
+    # Let's mock a Blue action like 'IsolateHost' if it has a duration.
+    # Or just use the red operator's exploit if duration is defined.
+    
+    # Initial tick = 0.
+    # We'll queue a mock action with duration 10
+    from netforge_rl.core.action import BaseAction, ActionEffect
+    class MockLongAction(BaseAction):
+        def __init__(self, agent_id):
+            super().__init__(agent_id)
+            self.duration = 10
+            self.cost = 0
+        def validate(self, state): return True
+        def execute(self, state): return ActionEffect(True, {}, {})
+    
+    # Manually append to event_queue since we need to test the jump
+    env.event_queue.append({
+        'completion_tick': 10,
+        'agent': 'blue_dmz',
+        'action': MockLongAction('blue_dmz'),
+        'effect': ActionEffect(True, {}, {}),
+        'target_ip': '10.0.0.1'
+    })
+    
+    # Step the environment with no new actions
+    obs, rewards, terminate, truncate, infos = env.step({})
+    
+    # Expected: Jump to tick 10 (since it's the next event)
+    assert env.current_tick == 10
+    
+    # delta_t = 10 - 0 = 10
+    # delta_t_norm = 10 / 50 = 0.2
+    for agent in env.possible_agents:
+        assert 'delta_t' in infos[agent]
+        assert infos[agent]['delta_t'] == 10.0
+        assert 'delta_t_norm' in infos[agent]
+        assert np.isclose(infos[agent]['delta_t_norm'], 0.2)
+        
+        # Check observation key
+        assert 'delta_t' in obs[agent]
+        assert np.isclose(obs[agent]['delta_t'][0], 0.2)
+
+def test_subnet_filtering():
+    """Verify that agents only see logs from their own subnets."""
+    env = NetForgeRLEnv(scenario_config={'scenario_type': 'apt_espionage'})
+    env.reset()
+    
+    # Push a log explicitly to the DMZ subnet
+    env.siem_logger._push_to_buffer("DMZ_ALERT_XML", "192.168.1.0/24", env.global_state)
+    # Push a log explicitly to the Internal subnet
+    env.siem_logger._push_to_buffer("INTERNAL_ALERT_XML", "10.0.0.0/24", env.global_state)
+    
+    # Step the environment
+    obs, rewards, terminate, truncate, infos = env.step({})
+    
+    # Blue DMZ agent should encode DMZ_ALERT_XML but NOT Internal
+    # Since they are averaged/maxed, the embedding will be non-zero for both.
+    # However, we can check the length of filtered logs inside SIEMLogger.
+    
+    dmz_logs = env.siem_logger.get_filtered_logs(env.global_state, subnet_tag='dmz')
+    assert "DMZ_ALERT_XML" in dmz_logs
+    assert "INTERNAL_ALERT_XML" not in dmz_logs
+    
+    internal_logs = env.siem_logger.get_filtered_logs(env.global_state, subnet_tag='internal')
+    assert "INTERNAL_ALERT_XML" in internal_logs
+    assert "DMZ_ALERT_XML" not in internal_logs
+
+def test_green_agent_xml_fidelity():
+    """Verify that GreenAgent produces XML-style strings."""
+    from netforge_rl.agents.green_agent import GreenAgent
+    from netforge_rl.core.state import GlobalNetworkState, Host, Subnet
+    
+    state = GlobalNetworkState()
+    subnet = Subnet("192.168.1.0/24", "DMZ")
+    state.add_subnet(subnet)
+    host = Host("192.168.1.1", "Target", "192.168.1.0/24")
+    state.register_host(host)
+    
+    ga = GreenAgent()
+    # Force high probability of noise for testing
+    import random
+    random.seed(42)
+    
+    # Generate 100 samples to find at least one XML
+    found_xml = False
+    for _ in range(100):
+        noise = ga.generate_noise(1, state)
+        for alert in noise['alerts']:
+            if '<Event' in alert['data'] and 'xmlns' in alert['data']:
+                found_xml = True
+                break
+        if found_xml: break
+    
+    assert found_xml, "GreenAgent should have produced at least one XML encoded log."

From 48d34e28f3e894299971dbda64805c0a927d1a74 Mon Sep 17 00:00:00 2001
From: Igor Jankowski <igorjankowwski@gmail.com>
Date: Thu, 2 Apr 2026 09:45:05 +0200
Subject: [PATCH 05/17] fix(env): Remove unused log_delay variable to satisfy
 Ruff

---
 netforge_rl/environment/parallel_env.py | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/netforge_rl/environment/parallel_env.py b/netforge_rl/environment/parallel_env.py
index 4a7d030..43c40ce 100644
--- a/netforge_rl/environment/parallel_env.py
+++ b/netforge_rl/environment/parallel_env.py
@@ -270,7 +270,9 @@ def step(
         prev_tick = self.current_tick
         if self.event_queue:
             # Jump to the next event completion time
-            next_event_tick = min(event['completion_tick'] for event in self.event_queue)
+            next_event_tick = min(
+                event['completion_tick'] for event in self.event_queue
+            )
             self.current_tick = max(self.current_tick + 1, next_event_tick)
         else:
             # No events queued; advance by 1
@@ -287,7 +289,7 @@ def step(
             self.current_tick, self.global_state
         )
         for anomaly in noise_data.get('alerts', []):
-            # Arrival tick logic stays for delayed observation if needed, 
+            # Arrival tick logic stays for delayed observation if needed,
             # but for now we push raw strings + subnets to buffer
             self.siem_logger._push_to_buffer(
                 anomaly['data'], anomaly['subnet'], self.global_state
@@ -337,14 +339,15 @@ def step(
                 host = self.global_state.all_hosts.get(target_ip)
                 is_honeytoken_trap = host and host.contains_honeytokens
 
-                log_delay = 0 if is_honeytoken_trap else self.log_latency
-                
                 # Use templates for TP to ensure high-fidelity raw logs
                 from netforge_rl.siem.event_templates import sysmon_1
+
                 log_string = sysmon_1(res_agent, process='exploit_payload')
-                
+
                 self.siem_logger._push_to_buffer(
-                    log_string, host.subnet_cidr if host else 'unknown', self.global_state
+                    log_string,
+                    host.subnet_cidr if host else 'unknown',
+                    self.global_state,
                 )
 
         # Generate background SIEM noise every tick
@@ -379,10 +382,12 @@ def step(
             obs.update_from_state(self.global_state, resolved_effects)
 
             obs_array = obs.to_numpy(max_size=256)
-            
+
             # Blue agents receive subnet-specific SIEM embeddings; Red gets zeros.
             if 'blue' in agent.lower():
-                agent_siem_vec = agent_siem_vecs.get(agent, np.zeros(EMBEDDING_DIM, dtype=np.float32))
+                agent_siem_vec = agent_siem_vecs.get(
+                    agent, np.zeros(EMBEDDING_DIM, dtype=np.float32)
+                )
             else:
                 agent_siem_vec = np.zeros(EMBEDDING_DIM, dtype=np.float32)
 

From a95d5617de98d05131480fba4a07eee1bce8f74d Mon Sep 17 00:00:00 2001
From: Igor Jankowski <igorjankowwski@gmail.com>
Date: Thu, 2 Apr 2026 09:45:46 +0200
Subject: [PATCH 06/17] fix(env): Remove unused log_delay variable to satisfy
 Ruff

---
 netforge_rl/agents/green_agent.py          |  6 +-
 netforge_rl/models/recurrent_mask_model.py | 95 ----------------------
 tests/environment/test_ct_gmarl_env.py     | 93 ++++++++++++---------
 3 files changed, 59 insertions(+), 135 deletions(-)
 delete mode 100644 netforge_rl/models/recurrent_mask_model.py

diff --git a/netforge_rl/agents/green_agent.py b/netforge_rl/agents/green_agent.py
index 5cbc8ba..9b0d4c9 100644
--- a/netforge_rl/agents/green_agent.py
+++ b/netforge_rl/agents/green_agent.py
@@ -12,6 +12,7 @@ class GreenAgent:
     def __init__(self, agent_id: str = 'green_agent_0'):
         self.agent_id = agent_id
         from netforge_rl.siem.event_templates import evid_4624, sysmon_3, evid_4688
+
         self._benign_templates = [evid_4624, sysmon_3, evid_4688]
 
     def generate_noise(self, current_tick: int, global_state: Any) -> Dict[str, Any]:
@@ -57,7 +58,10 @@ def generate_noise(self, current_tick: int, global_state: Any) -> Dict[str, Any]
             # Generate a false positive anomaly that could trip Blue's SIEM
             target = random.choice(hosts)
             from netforge_rl.siem.event_templates import evid_4625
-            log_string = evid_4625('unknown_external', target.ip, username='Administrator')
+
+            log_string = evid_4625(
+                'unknown_external', target.ip, username='Administrator'
+            )
             noise_logs.append(
                 {
                     'type': 'anomaly_xml',
diff --git a/netforge_rl/models/recurrent_mask_model.py b/netforge_rl/models/recurrent_mask_model.py
deleted file mode 100644
index 5f1b46d..0000000
--- a/netforge_rl/models/recurrent_mask_model.py
+++ /dev/null
@@ -1,95 +0,0 @@
-import torch
-from torch import nn
-
-from ray.rllib.models.torch.recurrent_net import RecurrentNetwork as TorchRNN
-from ray.rllib.utils.annotations import override
-from ray.rllib.utils.typing import ModelConfigDict, TensorType
-from typing import List, Tuple
-
-
-class MaskedLSTMModel(TorchRNN, nn.Module):
-    """
-    A custom PyTorch model integrating native RLlib LSTM cells with strict Action Masking.
-
-    We subclass TorchRNN to allow Ray to handle complex `seq_lens` padding and tensor
-    BPTT dimension tracking natively. We extract the mask out of the flattened array manually.
-    """
-
-    def __init__(
-        self,
-        obs_space,
-        action_space,
-        num_outputs: int,
-        model_config: ModelConfigDict,
-        name: str,
-    ):
-        nn.Module.__init__(self)
-        super().__init__(obs_space, action_space, num_outputs, model_config, name)
-
-        self.cell_size = model_config.get('custom_model_config', {}).get(
-            'lstm_cell_size', 128
-        )
-
-        # 1. Feature Extractor (Dense Layers)
-        # Input size is 256 sliced from the flattened 318 Dict space
-        self.fc1 = nn.Linear(256, 128)
-        self.fc2 = nn.Linear(128, 128)
-
-        # 2. LSTM Memory Unit
-        self.lstm = nn.LSTM(
-            input_size=128,
-            hidden_size=self.cell_size,
-            batch_first=True,
-        )
-
-        # 3. Action Type & Logit Masking Arrays
-        self.action_branch = nn.Linear(self.cell_size, num_outputs)
-        self.value_branch = nn.Linear(self.cell_size, 1)
-
-        self._cur_value = None
-
-    @override(TorchRNN)
-    def forward_rnn(
-        self, inputs: TensorType, state: List[TensorType], seq_lens: TensorType
-    ) -> Tuple[TensorType, List[TensorType]]:
-        # Ray flatly concatenates spaces in alphanumeric order.
-        # action_mask Box(62)
-        # obs Box(256)
-        # Therefore: action_mask is [:62], obs is [62:]
-        action_mask = inputs[:, :, :62]
-        obs = inputs[:, :, 62:]
-
-        # 1. Core Embeddings over Observation Sequence
-        x = nn.functional.relu(self.fc1(obs))
-        x = nn.functional.relu(self.fc2(x))
-
-        # 2. Evaluate Temporal Memory
-        h_in, c_in = state[0].unsqueeze(0), state[1].unsqueeze(0)
-        x, (h_out, c_out) = self.lstm(x, (h_in, c_in))
-
-        # 3. Finalize Output Logit Distribution Branches
-        logits = self.action_branch(x)
-        self._cur_value = torch.reshape(self.value_branch(x), [-1])
-
-        # 4. Apply Action Mask dynamically over the sequence batch
-        masked_logits = torch.where(
-            action_mask == 0.0,
-            torch.tensor(-1e10, device=logits.device, dtype=logits.dtype),
-            logits,
-        )
-
-        return masked_logits, [h_out.squeeze(0), c_out.squeeze(0)]
-
-    @override(TorchRNN)
-    def value_function(self) -> TensorType:
-        assert self._cur_value is not None, (
-            'Evaluate forward_rnn() before value_function() call.'
-        )
-        return self._cur_value
-
-    @override(TorchRNN)
-    def get_initial_state(self) -> List[TensorType]:
-        return [
-            torch.zeros(self.cell_size, dtype=torch.float32),
-            torch.zeros(self.cell_size, dtype=torch.float32),
-        ]
diff --git a/tests/environment/test_ct_gmarl_env.py b/tests/environment/test_ct_gmarl_env.py
index 2f0ede0..4f17ccf 100644
--- a/tests/environment/test_ct_gmarl_env.py
+++ b/tests/environment/test_ct_gmarl_env.py
@@ -1,43 +1,50 @@
-import pytest
 import numpy as np
-from netforge_rl.environment.parallel_env import NetForgeRLEnv, MAX_ACTION_DURATION
+from netforge_rl.environment.parallel_env import NetForgeRLEnv
+
 
 def test_asynchronous_step_delta_t():
     """Verify that step() correctly jumps over idle time and returns normalized delta_t."""
     env = NetForgeRLEnv(scenario_config={'scenario_type': 'apt_espionage'})
     obs, infos = env.reset()
-    
+
     # 1. Start an asynchronous action (e.g. Exploit) that takes 5 ticks
-    # We need an action that has duration > 1. 
+    # We need an action that has duration > 1.
     # Let's mock a Blue action like 'IsolateHost' if it has a duration.
     # Or just use the red operator's exploit if duration is defined.
-    
+
     # Initial tick = 0.
     # We'll queue a mock action with duration 10
     from netforge_rl.core.action import BaseAction, ActionEffect
+
     class MockLongAction(BaseAction):
         def __init__(self, agent_id):
             super().__init__(agent_id)
             self.duration = 10
             self.cost = 0
-        def validate(self, state): return True
-        def execute(self, state): return ActionEffect(True, {}, {})
-    
+
+        def validate(self, state):
+            return True
+
+        def execute(self, state):
+            return ActionEffect(True, {}, {})
+
     # Manually append to event_queue since we need to test the jump
-    env.event_queue.append({
-        'completion_tick': 10,
-        'agent': 'blue_dmz',
-        'action': MockLongAction('blue_dmz'),
-        'effect': ActionEffect(True, {}, {}),
-        'target_ip': '10.0.0.1'
-    })
-    
+    env.event_queue.append(
+        {
+            'completion_tick': 10,
+            'agent': 'blue_dmz',
+            'action': MockLongAction('blue_dmz'),
+            'effect': ActionEffect(True, {}, {}),
+            'target_ip': '10.0.0.1',
+        }
+    )
+
     # Step the environment with no new actions
     obs, rewards, terminate, truncate, infos = env.step({})
-    
+
     # Expected: Jump to tick 10 (since it's the next event)
     assert env.current_tick == 10
-    
+
     # delta_t = 10 - 0 = 10
     # delta_t_norm = 10 / 50 = 0.2
     for agent in env.possible_agents:
@@ -45,52 +52,59 @@ def execute(self, state): return ActionEffect(True, {}, {})
         assert infos[agent]['delta_t'] == 10.0
         assert 'delta_t_norm' in infos[agent]
         assert np.isclose(infos[agent]['delta_t_norm'], 0.2)
-        
+
         # Check observation key
         assert 'delta_t' in obs[agent]
         assert np.isclose(obs[agent]['delta_t'][0], 0.2)
 
+
 def test_subnet_filtering():
     """Verify that agents only see logs from their own subnets."""
     env = NetForgeRLEnv(scenario_config={'scenario_type': 'apt_espionage'})
     env.reset()
-    
+
     # Push a log explicitly to the DMZ subnet
-    env.siem_logger._push_to_buffer("DMZ_ALERT_XML", "192.168.1.0/24", env.global_state)
+    env.siem_logger._push_to_buffer('DMZ_ALERT_XML', '192.168.1.0/24', env.global_state)
     # Push a log explicitly to the Internal subnet
-    env.siem_logger._push_to_buffer("INTERNAL_ALERT_XML", "10.0.0.0/24", env.global_state)
-    
+    env.siem_logger._push_to_buffer(
+        'INTERNAL_ALERT_XML', '10.0.0.0/24', env.global_state
+    )
+
     # Step the environment
     obs, rewards, terminate, truncate, infos = env.step({})
-    
+
     # Blue DMZ agent should encode DMZ_ALERT_XML but NOT Internal
     # Since they are averaged/maxed, the embedding will be non-zero for both.
     # However, we can check the length of filtered logs inside SIEMLogger.
-    
+
     dmz_logs = env.siem_logger.get_filtered_logs(env.global_state, subnet_tag='dmz')
-    assert "DMZ_ALERT_XML" in dmz_logs
-    assert "INTERNAL_ALERT_XML" not in dmz_logs
-    
-    internal_logs = env.siem_logger.get_filtered_logs(env.global_state, subnet_tag='internal')
-    assert "INTERNAL_ALERT_XML" in internal_logs
-    assert "DMZ_ALERT_XML" not in internal_logs
+    assert 'DMZ_ALERT_XML' in dmz_logs
+    assert 'INTERNAL_ALERT_XML' not in dmz_logs
+
+    internal_logs = env.siem_logger.get_filtered_logs(
+        env.global_state, subnet_tag='internal'
+    )
+    assert 'INTERNAL_ALERT_XML' in internal_logs
+    assert 'DMZ_ALERT_XML' not in internal_logs
+
 
 def test_green_agent_xml_fidelity():
     """Verify that GreenAgent produces XML-style strings."""
     from netforge_rl.agents.green_agent import GreenAgent
     from netforge_rl.core.state import GlobalNetworkState, Host, Subnet
-    
+
     state = GlobalNetworkState()
-    subnet = Subnet("192.168.1.0/24", "DMZ")
+    subnet = Subnet('192.168.1.0/24', 'DMZ')
     state.add_subnet(subnet)
-    host = Host("192.168.1.1", "Target", "192.168.1.0/24")
+    host = Host('192.168.1.1', 'Target', '192.168.1.0/24')
     state.register_host(host)
-    
+
     ga = GreenAgent()
     # Force high probability of noise for testing
     import random
+
     random.seed(42)
-    
+
     # Generate 100 samples to find at least one XML
     found_xml = False
     for _ in range(100):
@@ -99,6 +113,7 @@ def test_green_agent_xml_fidelity():
             if '<Event' in alert['data'] and 'xmlns' in alert['data']:
                 found_xml = True
                 break
-        if found_xml: break
-    
-    assert found_xml, "GreenAgent should have produced at least one XML encoded log."
+        if found_xml:
+            break
+
+    assert found_xml, 'GreenAgent should have produced at least one XML encoded log.'

From 25f57480d3c60952e796bdb6ff60b569f598d8d4 Mon Sep 17 00:00:00 2001
From: Igor Jankowski <igorjankowwski@gmail.com>
Date: Fri, 3 Apr 2026 18:14:33 +0200
Subject: [PATCH 07/17] chore(env): finalize high-fidelity state-vector and
 termination logic

---
 netforge_rl/core/action.py              | 17 +----
 netforge_rl/core/registry.py            | 32 +++++-----
 netforge_rl/environment/parallel_env.py | 85 +++++++++++++++++++++----
 netforge_rl/nlp/log_encoder.py          |  2 +-
 netforge_rl/scenarios/apt_espionage.py  | 61 +++++++++---------
 5 files changed, 122 insertions(+), 75 deletions(-)

diff --git a/netforge_rl/core/action.py b/netforge_rl/core/action.py
index eb60ac7..411b793 100644
--- a/netforge_rl/core/action.py
+++ b/netforge_rl/core/action.py
@@ -8,7 +8,6 @@
 
 class ActionEffect:
     """Encapsulates the resulting state changes from an action for conflict
-
     resolution.
     """
 
@@ -18,17 +17,18 @@ def __init__(
         state_deltas: Union[Dict[str, Any], List['IStateDeltaCommand']],
         observation_data: Dict[str, Any],
         eta: int = 0,
+        action: Optional['BaseAction'] = None,
     ):
         self.success = success
         self.state_deltas = state_deltas
         self.observation_data = observation_data
         self.eta = eta
+        self.action = action
+        self.cost = getattr(action, 'cost', 0) if action else 0
 
 
 class BaseAction(ABC):
     """Modular Base Action for the MARL CybORG Environment.
-
-    All highly specific network attacks (Layer 2 - Layer 7) inherit from this class.
     """
 
     def __init__(
@@ -52,14 +52,10 @@ def __init__(
         self.required_prior_state = required_prior_state
 
     def validate(self, global_state: 'GlobalNetworkState') -> bool:
-        """Checks if the action is physically possible in the current network
-        state (e.g., is there a route, are preconditions met).
-        """
         if self.target_ip and self.target_ip not in global_state.all_hosts:
             return False
 
         if self.required_prior_state:
-            # Check Action History state logic
             agent_history = global_state.action_history.get(self.agent_id, set())
             expected_record = f'{self.required_prior_state}:{self.target_ip}'
             if expected_record not in agent_history:
@@ -67,9 +63,7 @@ def validate(self, global_state: 'GlobalNetworkState') -> bool:
 
         if self.target_ip:
             host = global_state.all_hosts[self.target_ip]
-            # Simple declarative Zone constraints example
             if 'red' in self.agent_id.lower() and host.subnet_cidr == '10.0.1.0/24':
-                # Secure Data targets cannot be touched without pivoting via DMZ or Internal User privileges first
                 has_dmz = any(
                     h.privilege in ['User', 'Root']
                     for h in global_state.all_hosts.values()
@@ -87,9 +81,4 @@ def validate(self, global_state: 'GlobalNetworkState') -> bool:
 
     @abstractmethod
     def execute(self, global_state: 'GlobalNetworkState') -> ActionEffect:
-        """Computes the theoretical effect of the action.
-
-        Note: State is NOT mutated directly here. Mutations are returned via ActionEffect
-        to allow the Environment to resolve simultaneous multi-agent collisions.
-        """
         pass
diff --git a/netforge_rl/core/registry.py b/netforge_rl/core/registry.py
index ded6f4a..a88ed8f 100644
--- a/netforge_rl/core/registry.py
+++ b/netforge_rl/core/registry.py
@@ -5,12 +5,10 @@
 class ActionRegistry:
     """A Factory Registry for dynamically tracking and instantiating
     BaseAction subclasses without monolithic if/else blocks.
-
-    Adheres strictly to the Open-Closed Principle.
     """
 
     def __init__(self):
-        # Maps (team, action_group_id) -> ActionClass
+        # Primary team mappings
         self._actions: Dict[str, Dict[int, Type]] = {
             'red': {},
             'red_commander': {},
@@ -32,20 +30,23 @@ def decorator(cls):
     def get_action_class(self, agent_id: str, group_id: int) -> Optional[Type]:
         """Retrieves the class constructor for a specific integer offset."""
         if 'red' in agent_id.lower():
-            team = 'red_commander' if 'commander' in agent_id.lower() else 'red'
+            primary_team = 'red_commander' if 'commander' in agent_id.lower() else 'red'
         else:
-            team = 'blue_commander' if 'commander' in agent_id.lower() else 'blue'
+            primary_team = 'blue_commander' if 'commander' in agent_id.lower() else 'blue'
 
-        return self._actions.get(team, {}).get(group_id)
+        # Attempt to find the action in the primary team registry
+        action_cls = self._actions.get(primary_team, {}).get(group_id)
+        
+        # Fallback: Check if the action was registered specifically to the role (e.g., 'red_operator')
+        if not action_cls:
+            action_cls = self._actions.get(agent_id.lower(), {}).get(group_id)
+            
+        return action_cls
 
     def instantiate_action(
         self, agent_id: str, action_data: object, target_ips: list
     ) -> Optional[object]:
-        """Factory method to resolve the generic action payload to an instance.
-
-        Supports legacy integer decoding or advanced Hierarchical MultiDiscrete
-        arrays: [action_type_id, target_ip_index].
-        """
+        """Factory method to resolve the generic action payload to an instance."""
         if not target_ips:
             target_ips = ['127.0.0.1']
 
@@ -64,9 +65,9 @@ def instantiate_action(
             action_group = action_int // len(target_ips)
 
             if 'red' in agent_id.lower():
-                mod = 4 if 'commander' in agent_id.lower() else 11
+                mod = 12 # Standardized bounds
             else:
-                mod = 5 if 'commander' in agent_id.lower() else 7
+                mod = 12
 
             action_type_id = action_group % mod
 
@@ -74,8 +75,7 @@ def instantiate_action(
         if not ActionCls:
             return None
 
-        # Pass required kwargs dynamically based on the action archetype
-        # Determine accepted arguments dynamically
+        # Pass required kwargs dynamically
         sig = inspect.signature(ActionCls.__init__)
         params = sig.parameters
 
@@ -83,11 +83,9 @@ def instantiate_action(
         if 'target_ip' in params:
             kwargs['target_ip'] = target_ip
         elif 'target_subnet' in params:
-            # Approximate subnet from target_ip for actions requiring Subnets
             parts = target_ip.split('.')
             kwargs['target_subnet'] = f'{parts[0]}.{parts[1]}.{parts[2]}.0/24'
         elif 'target_agent_id' in params:
-            # Map target_agent_id randomly or conventionally for Coordination actions
             kwargs['target_agent_id'] = (
                 'red_operator' if agent_id == 'red_commander' else 'red_commander'
             )
diff --git a/netforge_rl/environment/parallel_env.py b/netforge_rl/environment/parallel_env.py
index 43c40ce..713901f 100644
--- a/netforge_rl/environment/parallel_env.py
+++ b/netforge_rl/environment/parallel_env.py
@@ -5,6 +5,7 @@
 from netforge_rl.core.action import BaseAction, ActionEffect
 from netforge_rl.core.observation import BaseObservation
 from netforge_rl.core.registry import action_registry
+import netforge_rl.actions  # 🧬 Fusion: Trigger action registration decorators
 from netforge_rl.core.physics import ConflictResolutionEngine
 from netforge_rl.environment.base_env import BaseNetForgeRLEnv
 from netforge_rl.topologies.network_generator import NetworkGenerator
@@ -90,7 +91,7 @@ def __init__(self, scenario_config: dict):
                         low=-1.0, high=1.0, shape=(256,), dtype=np.float32
                     ),
                     'action_mask': gym.spaces.Box(
-                        low=0, high=1, shape=(62,), dtype=np.int8
+                        low=0, high=1, shape=(32 + 50,), dtype=np.int8
                     ),
                     'siem_embedding': gym.spaces.Box(
                         low=-1.0, high=1.0, shape=(EMBEDDING_DIM,), dtype=np.float32
@@ -104,11 +105,11 @@ def __init__(self, scenario_config: dict):
         }
         self.action_spaces = {
             agent: gym.spaces.MultiDiscrete(
-                [12, 50]
-            )  # [Action Type (max 12), Target IP Index (max 50 padded)]
+                [32, 50]
+            )  # [Action Type (max 32), Target IP Index (max 50 padded)]
             for agent in self.possible_agents
         }
-        self.max_ticks = 1000
+        self.max_ticks = scenario_config.get('max_ticks', 1000)
         self.current_tick = 0
         self.event_queue = []
 
@@ -131,8 +132,17 @@ def reset(
         }
         self.global_state.agent_compute = {agent: 1000 for agent in self.agents}
         self.global_state.business_downtime_score = 0.0
-        # Clear SIEM log buffer on new episode
+        # SIEM log buffer and research metrics
         self.global_state.siem_log_buffer = []
+        self.episode_metrics = {
+            'infection_times': {}, # IP -> tick
+            'detection_times': {}, # IP -> tick (first SIEM alert)
+            'isolation_times': {}, # IP -> tick
+            'exfiltrated_data': 0.0,
+            'sla_uptime_sum': 0.0,
+            'steps_count': 0
+        }
+        
         observations = {}
         for agent_id in self.agents:
             obs = BaseObservation(agent_id)
@@ -159,18 +169,16 @@ def action_mask(self, agent: str) -> np.ndarray:
         pruning out computationally redundant modulo duplicates.
         """
         # RLlib explicitly requires MultiDiscrete action masks to be concatenated flat boolean layers.
-        # Action space: [12 types, 50 IPs]. Therefore Mask shape = (62,)
-        mask = np.zeros(62, dtype=np.int8)
+        # Action space: [max 32 types, 50 IPs]. Therefore Mask shape = (82,)
+        mask = np.zeros(82, dtype=np.int8)
 
-        # 1. Action Type Dimension (0-11)
         if 'red' in agent.lower():
-            valid_action_types = 4 if 'commander' in agent.lower() else 9
+            valid_action_types = 17
         else:
-            valid_action_types = 5 if 'commander' in agent.lower() else 7
+            valid_action_types = 15
         mask[:valid_action_types] = 1
 
-        # 2. Target IP Dimension (12-61)
-        mask[12 : 12 + 50] = 1
+        mask[32 : 32 + 50] = 1
 
         return mask
 
@@ -233,6 +241,7 @@ def step(
 
                 # Generate intended effect (though state might shift by completion time)
                 effect = action.execute(self.global_state)
+                effect.action = action  # 🧬 Link for reward attribution
 
                 self.global_state.agent_locked_until[agent] = completion_tick
                 self.event_queue.append(
@@ -492,6 +501,16 @@ def _extract_agent_infos(self, observations: dict, resolved_effects: dict) -> di
             info['successful_exploits'] = float(successful_exploits)
             info['hosts_isolated'] = float(hosts_isolated)
             info['services_restored'] = float(services_restored)
+            
+            if agent_effect:
+                target_ip = getattr(agent_effect.action, 'target_ip', None)
+                if target_ip and target_ip in self.global_state.all_hosts:
+                    ordered_hosts = sorted(list(self.global_state.all_hosts.keys()))
+                    info['target_ip_index'] = ordered_hosts.index(target_ip)
+                else:
+                    info['target_ip_index'] = None
+            else:
+                info['target_ip_index'] = None
 
             # Extra context for analysis
             info['agent_energy'] = float(self.global_state.agent_energy.get(agent, 0))
@@ -510,6 +529,48 @@ def _extract_agent_infos(self, observations: dict, resolved_effects: dict) -> di
                 )
             )
 
+            sla_final = (self.episode_metrics['sla_uptime_sum'] / self.episode_metrics['steps_count'] 
+                         if self.episode_metrics['steps_count'] > 0 else 1.0)
+            info['SLA_Uptime_Percentage'] = float(sla_final)
+            
+            # Calculate MTTC (Mean Time To Containment)
+            mttc_vals = []
+            for ip, t_iso in self.episode_metrics['isolation_times'].items():
+                if ip in self.episode_metrics['infection_times']:
+                    mttc_vals.append(t_iso - self.episode_metrics['infection_times'][ip])
+            info['MTTC'] = float(sum(mttc_vals) / len(mttc_vals)) if mttc_vals else 0.0
+            
+            # Cumulative Impact
+            info['Total_Exfiltrated_Data'] = float(self.episode_metrics['exfiltrated_data'])
+
             infos[agent] = info
 
         return infos
+
+    def global_state_vector(self) -> np.ndarray:
+        """Returns a flattened 512-dim vector representing the true global network physics.
+        Used primarily for Centralized Critics (MAPPO, QMIX) during training.
+        """
+        vec = []
+        ordered_hosts = sorted(list(self.global_state.all_hosts.keys()))
+        
+        for i in range(50):
+            if i < len(ordered_hosts):
+                host = self.global_state.all_hosts[ordered_hosts[i]]
+                priv = {"None": 0.0, "User": 0.5, "Root": 1.0}.get(host.privilege, 0.0)
+                status = 1.0 if host.status == "online" else 0.0
+                decoy = 1.0 if host.decoy != "inactive" else 0.0
+                vec.extend([priv, status, decoy])
+            else:
+                vec.extend([0.0, 0.0, 0.0])
+        
+        vec.append(self.global_state.business_downtime_score / 100.0)
+        vec.append(float(self.current_tick) / float(self.max_ticks))
+        
+        for agent in self.possible_agents:
+            vec.append(float(self.global_state.agent_energy.get(agent, 0)) / 100.0)
+
+        result = np.zeros(512, dtype=np.float32)
+        v_arr = np.array(vec, dtype=np.float32)
+        result[:len(v_arr)] = v_arr
+        return result
diff --git a/netforge_rl/nlp/log_encoder.py b/netforge_rl/nlp/log_encoder.py
index e0832f8..b5a32c6 100644
--- a/netforge_rl/nlp/log_encoder.py
+++ b/netforge_rl/nlp/log_encoder.py
@@ -122,7 +122,7 @@ def _build_tfidf(self):
         )
         pipeline.fit(corpus)
         logger.info(
-            'LogEncoder[tfidf]: fitted on %d corpus documents → %d-dim LSA.',
+            'LogEncoder[tfidf]: fitted on %d corpus documents -> %d-dim LSA.',
             len(corpus),
             EMBEDDING_DIM,
         )
diff --git a/netforge_rl/scenarios/apt_espionage.py b/netforge_rl/scenarios/apt_espionage.py
index 48ba6a0..a2222da 100644
--- a/netforge_rl/scenarios/apt_espionage.py
+++ b/netforge_rl/scenarios/apt_espionage.py
@@ -9,9 +9,6 @@
 class AptEspionageScenario(BaseScenario):
     """
     Scenario: Red Team acts as an Advanced Persistent Threat.
-
-    - Red is rewarded for uninterrupted stealthy persistence.
-    - Red is penalized heavily if discovered or isolated by Blue.
     """
 
     def __init__(self, agents):
@@ -26,31 +23,41 @@ def calculate_reward(
         reward = 0.0
 
         if effect:
+            # Action Cost Penalty
             if getattr(effect, 'cost', 0) > 0:
-                reward -= (
-                    effect.cost * 0.2
-                )  # Larger penalty for wasting energy in APT scope
+                reward -= (effect.cost * 0.1)
+
+            if 'red' in agent_id.lower() and effect.success:
+                action_obj = getattr(effect, 'action', None)
+                action_name = type(action_obj).__name__ if action_obj else "Unknown"
+                
+                # RECON Reward
+                if 'Discover' in action_name or 'Scan' in action_name:
+                    reward += 2.0
+                
+                # BREACH Reward
+                if 'Exploit' in action_name or 'Pivot' in action_name:
+                    reward += 10.0
+                
+                # EXFIL Reward
+                if 'Exfiltrate' in action_name or 'Dump' in action_name or 'Impact' in action_name:
+                    reward += 20.0
+
+                if 'shared' in str(effect.observation_data):
+                    reward += 5.0
 
-            if 'red' in agent_id.lower() and effect.observation_data:
-                if 'shared' in effect.observation_data:
-                    reward += 5.0  # Massive emphasis on coordinated reconnaissance
-                # Massive penalty for triggering Honeypots during stealth operation
                 if 'Failed against Decoy' in str(effect.observation_data.values()):
                     reward -= 15.0
-                elif 'kernel panic' in str(effect.observation_data.values()):
-                    reward -= 25.0
-                # Direct penalty for being blocked natively by Firewalls
-                elif 'validation failed' in str(effect.observation_data.values()):
-                    reward -= 1.0
 
         if 'red' in agent_id.lower():
+            # Persistence Reward
+            total_persistence = 0.0
             for h in global_state.all_hosts.values():
-                # Reward persistence
                 if h.privilege in ['User', 'Root'] and h.status == 'online':
-                    reward += 1.0
-                # Extreme penalty if caught/isolated
+                    total_persistence += 5.0
                 elif h.privilege in ['User', 'Root'] and h.status == 'isolated':
-                    reward -= 20.0
+                    total_persistence -= 20.0
+            reward += total_persistence
 
         elif 'blue' in agent_id.lower():
             for h in global_state.all_hosts.values():
@@ -58,25 +65,17 @@ def calculate_reward(
                 is_isolated = h.status == 'isolated'
 
                 if not is_infected and not is_isolated:
-                    reward += 1.0  # Uptime
+                    reward += 1.0 
                 elif is_infected and not is_isolated:
-                    reward -= 5.0  # Exfiltration / espionage penalty
+                    reward -= 5.0 
                 elif is_infected and is_isolated:
-                    reward += 20.0  # Successfully rooted out the APT
+                    reward += 20.0 
 
         return reward
 
     def check_termination(self, global_state: 'GlobalNetworkState') -> Dict[str, bool]:
-        # If Blue isolates all infected hosts, the APT is defeated
         terminate = {agent: False for agent in self.agents}
-
-        infected_hosts = [
-            h
-            for h in global_state.all_hosts.values()
-            if h.privilege in ['User', 'Root']
-        ]
+        infected_hosts = [h for h in global_state.all_hosts.values() if h.privilege in ['User', 'Root']]
         if infected_hosts and all(h.status == 'isolated' for h in infected_hosts):
-            # Terminate episode if Red has no active footholds left
             return {agent: True for agent in self.agents}
-
         return terminate

From d1b4f0c3d663fd9a9af786d4b2e0140c0f5e6da4 Mon Sep 17 00:00:00 2001
From: Igor Jankowski <igorjankowwski@gmail.com>
Date: Sat, 4 Apr 2026 13:55:22 +0200
Subject: [PATCH 08/17] Update: Set strictly to 100 nodes for Neural Network
 shape constant

---
 netforge_rl/environment/parallel_env.py     | 12 ++++++------
 netforge_rl/topologies/network_generator.py |  6 +++---
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/netforge_rl/environment/parallel_env.py b/netforge_rl/environment/parallel_env.py
index 713901f..46f8737 100644
--- a/netforge_rl/environment/parallel_env.py
+++ b/netforge_rl/environment/parallel_env.py
@@ -91,7 +91,7 @@ def __init__(self, scenario_config: dict):
                         low=-1.0, high=1.0, shape=(256,), dtype=np.float32
                     ),
                     'action_mask': gym.spaces.Box(
-                        low=0, high=1, shape=(32 + 50,), dtype=np.int8
+                        low=0, high=1, shape=(32 + 100,), dtype=np.int8
                     ),
                     'siem_embedding': gym.spaces.Box(
                         low=-1.0, high=1.0, shape=(EMBEDDING_DIM,), dtype=np.float32
@@ -105,8 +105,8 @@ def __init__(self, scenario_config: dict):
         }
         self.action_spaces = {
             agent: gym.spaces.MultiDiscrete(
-                [32, 50]
-            )  # [Action Type (max 32), Target IP Index (max 50 padded)]
+                [32, 100]
+            )  # [Action Type (max 32), Target IP Index (max 100 padded)]
             for agent in self.possible_agents
         }
         self.max_ticks = scenario_config.get('max_ticks', 1000)
@@ -169,8 +169,8 @@ def action_mask(self, agent: str) -> np.ndarray:
         pruning out computationally redundant modulo duplicates.
         """
         # RLlib explicitly requires MultiDiscrete action masks to be concatenated flat boolean layers.
-        # Action space: [max 32 types, 50 IPs]. Therefore Mask shape = (82,)
-        mask = np.zeros(82, dtype=np.int8)
+        # Action space: [max 32 types, 100 IPs]. Therefore Mask shape = (132,)
+        mask = np.zeros(132, dtype=np.int8)
 
         if 'red' in agent.lower():
             valid_action_types = 17
@@ -178,7 +178,7 @@ def action_mask(self, agent: str) -> np.ndarray:
             valid_action_types = 15
         mask[:valid_action_types] = 1
 
-        mask[32 : 32 + 50] = 1
+        mask[32 : 32 + 100] = 1
 
         return mask
 
diff --git a/netforge_rl/topologies/network_generator.py b/netforge_rl/topologies/network_generator.py
index f15170a..d690214 100644
--- a/netforge_rl/topologies/network_generator.py
+++ b/netforge_rl/topologies/network_generator.py
@@ -33,7 +33,7 @@ def generate(self, seed: Optional[int] = None) -> GlobalNetworkState:
     def _generate_procedural(self) -> GlobalNetworkState:
         """Creates a randomized network using NetworkX hierarchical patterns.
 
-        Enforces a constant size of 50 hosts for Neural Network dimension consistency.
+        Enforces a constant size of 100 hosts for Neural Network dimension consistency.
         Active topology spans 15-30 nodes; the rest are instantiated as inactive padding.
         """
         import networkx as nx
@@ -152,8 +152,8 @@ def _generate_procedural(self) -> GlobalNetworkState:
                 dc.is_domain_controller = True
                 dc.cached_credentials.append('Enterprise_Admin_Token')
 
-        # Fill strictly to 50 nodes for Neural Network shape constant
-        padding_needed = 50 - len(state.all_hosts)
+        # Fill strictly to 100 nodes for Neural Network shape constant
+        padding_needed = 100 - len(state.all_hosts)
         for p in range(padding_needed):
             pad_ip = f'169.254.0.{p + 1}'
             pad_host = Host(

From b025504a7a20c8d5b927b1705857bb7be28e528f Mon Sep 17 00:00:00 2001
From: Igor Jankowski <igorjankowwski@gmail.com>
Date: Sun, 5 Apr 2026 17:22:34 +0200
Subject: [PATCH 09/17] Some fixes

---
 netforge_rl/core/observation.py         |  5 ++++-
 netforge_rl/core/state.py               | 21 +++++++++++++++++++++
 netforge_rl/environment/parallel_env.py |  5 +++++
 3 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/netforge_rl/core/observation.py b/netforge_rl/core/observation.py
index d38b182..380af22 100644
--- a/netforge_rl/core/observation.py
+++ b/netforge_rl/core/observation.py
@@ -113,7 +113,10 @@ def to_numpy(self, max_size: int = 256) -> np.ndarray:
                 vector[idx] = val
                 idx += 1
 
-        for ip, data in self.visible_hosts.items():
+        # Sort the visible hosts by IP to ensure deterministic tensor mapping for RL models
+        sorted_ips = sorted(list(self.visible_hosts.keys()))
+        for ip in sorted_ips:
+            data = self.visible_hosts[ip]
             if idx + 2 >= max_size:
                 break
 
diff --git a/netforge_rl/core/state.py b/netforge_rl/core/state.py
index 0939f0e..4f787e0 100644
--- a/netforge_rl/core/state.py
+++ b/netforge_rl/core/state.py
@@ -1,3 +1,4 @@
+import numpy as np
 from typing import Dict, Set, Any
 
 
@@ -208,6 +209,26 @@ def can_route_to(
 
         return False
 
+    def get_adjacency_matrix(self) -> np.ndarray:
+        """Returns a 100x100 adjacency matrix representing routing capabilities between all hosts."""
+        import numpy as np
+        adj = np.zeros((100, 100), dtype=np.float32)
+        
+        # We need a stable ordering of IPs, so we sort them
+        sorted_ips = sorted(list(self.all_hosts.keys()))
+        
+        for i, src_ip in enumerate(sorted_ips):
+            for j, dst_ip in enumerate(sorted_ips):
+                if i == j:
+                    adj[i, j] = 1.0
+                elif self.can_route_to(dst_ip):
+                    # Simplification: if it can route to dst, we mark an edge.
+                    # A more accurate version would check if src_ip can route to dst_ip,
+                    # but can_route_to doesn't take src_ip. It assumes global routing rules based on subnets.
+                    adj[i, j] = 1.0
+                    
+        return adj
+
     def reallocate_dhcp(self):
         """Simulates dynamic mid-episode restructuring of the network.
 
diff --git a/netforge_rl/environment/parallel_env.py b/netforge_rl/environment/parallel_env.py
index 46f8737..28d243c 100644
--- a/netforge_rl/environment/parallel_env.py
+++ b/netforge_rl/environment/parallel_env.py
@@ -96,6 +96,9 @@ def __init__(self, scenario_config: dict):
                     'siem_embedding': gym.spaces.Box(
                         low=-1.0, high=1.0, shape=(EMBEDDING_DIM,), dtype=np.float32
                     ),
+                    'adj_matrix': gym.spaces.Box(
+                        low=0.0, high=1.0, shape=(10000,), dtype=np.float32
+                    ),
                     'delta_t': gym.spaces.Box(
                         low=0.0, high=1.0, shape=(1,), dtype=np.float32
                     ),
@@ -151,6 +154,7 @@ def reset(
                 'obs': obs.to_numpy(max_size=256),
                 'action_mask': self.action_mask(agent_id),
                 'siem_embedding': np.zeros(EMBEDDING_DIM, dtype=np.float32),
+                'adj_matrix': self.global_state.get_adjacency_matrix().flatten(),
                 'delta_t': np.zeros(1, dtype=np.float32),
             }
         self.current_tick = 0
@@ -404,6 +408,7 @@ def step(
                 'obs': obs_array,
                 'action_mask': self.action_mask(agent),
                 'siem_embedding': agent_siem_vec,
+                'adj_matrix': self.global_state.get_adjacency_matrix().flatten(),
                 'delta_t': np.array([delta_t_norm], dtype=np.float32),
             }
             agent_effect = resolved_effects.get(agent)

From 51470538debf29f4d8d6f97a1eaaa89426e4a9c7 Mon Sep 17 00:00:00 2001
From: Igor Jankowski <igorjankowwski@gmail.com>
Date: Tue, 7 Apr 2026 08:13:28 +0200
Subject: [PATCH 10/17] Refactor: bigger range

---
 netforge_rl/environment/parallel_env.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/netforge_rl/environment/parallel_env.py b/netforge_rl/environment/parallel_env.py
index 28d243c..68e2176 100644
--- a/netforge_rl/environment/parallel_env.py
+++ b/netforge_rl/environment/parallel_env.py
@@ -559,7 +559,7 @@ def global_state_vector(self) -> np.ndarray:
         vec = []
         ordered_hosts = sorted(list(self.global_state.all_hosts.keys()))
         
-        for i in range(50):
+        for i in range(100):
             if i < len(ordered_hosts):
                 host = self.global_state.all_hosts[ordered_hosts[i]]
                 priv = {"None": 0.0, "User": 0.5, "Root": 1.0}.get(host.privilege, 0.0)

From 747155c81b103b211f7fd8020ac4e484ccdf5e6e Mon Sep 17 00:00:00 2001
From: Igor Jankowski <igorjankowwski@gmail.com>
Date: Fri, 10 Apr 2026 10:32:34 +0200
Subject: [PATCH 11/17] Ruff fix and MTC calculating

---
 netforge_rl/environment/parallel_env.py | 49 ++++++++++++++-----------
 1 file changed, 28 insertions(+), 21 deletions(-)

diff --git a/netforge_rl/environment/parallel_env.py b/netforge_rl/environment/parallel_env.py
index 68e2176..a04e859 100644
--- a/netforge_rl/environment/parallel_env.py
+++ b/netforge_rl/environment/parallel_env.py
@@ -5,7 +5,7 @@
 from netforge_rl.core.action import BaseAction, ActionEffect
 from netforge_rl.core.observation import BaseObservation
 from netforge_rl.core.registry import action_registry
-import netforge_rl.actions  # 🧬 Fusion: Trigger action registration decorators
+import netforge_rl.actions  # noqa: F401 
 from netforge_rl.core.physics import ConflictResolutionEngine
 from netforge_rl.environment.base_env import BaseNetForgeRLEnv
 from netforge_rl.topologies.network_generator import NetworkGenerator
@@ -138,14 +138,14 @@ def reset(
         # SIEM log buffer and research metrics
         self.global_state.siem_log_buffer = []
         self.episode_metrics = {
-            'infection_times': {}, # IP -> tick
-            'detection_times': {}, # IP -> tick (first SIEM alert)
-            'isolation_times': {}, # IP -> tick
+            'infection_times': {},  # IP -> tick
+            'detection_times': {},  # IP -> tick (first SIEM alert)
+            'isolation_times': {},  # IP -> tick
             'exfiltrated_data': 0.0,
             'sla_uptime_sum': 0.0,
-            'steps_count': 0
+            'steps_count': 0,
         }
-        
+
         observations = {}
         for agent_id in self.agents:
             obs = BaseObservation(agent_id)
@@ -350,7 +350,6 @@ def step(
 
                 # Active Deception intercept
                 host = self.global_state.all_hosts.get(target_ip)
-                is_honeytoken_trap = host and host.contains_honeytokens
 
                 # Use templates for TP to ensure high-fidelity raw logs
                 from netforge_rl.siem.event_templates import sysmon_1
@@ -506,7 +505,7 @@ def _extract_agent_infos(self, observations: dict, resolved_effects: dict) -> di
             info['successful_exploits'] = float(successful_exploits)
             info['hosts_isolated'] = float(hosts_isolated)
             info['services_restored'] = float(services_restored)
-            
+
             if agent_effect:
                 target_ip = getattr(agent_effect.action, 'target_ip', None)
                 if target_ip and target_ip in self.global_state.all_hosts:
@@ -534,19 +533,27 @@ def _extract_agent_infos(self, observations: dict, resolved_effects: dict) -> di
                 )
             )
 
-            sla_final = (self.episode_metrics['sla_uptime_sum'] / self.episode_metrics['steps_count'] 
-                         if self.episode_metrics['steps_count'] > 0 else 1.0)
+            sla_final = (
+                self.episode_metrics['sla_uptime_sum']
+                / self.episode_metrics['steps_count']
+                if self.episode_metrics['steps_count'] > 0
+                else 1.0
+            )
             info['SLA_Uptime_Percentage'] = float(sla_final)
-            
+
             # Calculate MTTC (Mean Time To Containment)
             mttc_vals = []
             for ip, t_iso in self.episode_metrics['isolation_times'].items():
                 if ip in self.episode_metrics['infection_times']:
-                    mttc_vals.append(t_iso - self.episode_metrics['infection_times'][ip])
+                    mttc_vals.append(
+                        t_iso - self.episode_metrics['infection_times'][ip]
+                    )
             info['MTTC'] = float(sum(mttc_vals) / len(mttc_vals)) if mttc_vals else 0.0
-            
+
             # Cumulative Impact
-            info['Total_Exfiltrated_Data'] = float(self.episode_metrics['exfiltrated_data'])
+            info['Total_Exfiltrated_Data'] = float(
+                self.episode_metrics['exfiltrated_data']
+            )
 
             infos[agent] = info
 
@@ -558,24 +565,24 @@ def global_state_vector(self) -> np.ndarray:
         """
         vec = []
         ordered_hosts = sorted(list(self.global_state.all_hosts.keys()))
-        
+
         for i in range(100):
             if i < len(ordered_hosts):
                 host = self.global_state.all_hosts[ordered_hosts[i]]
-                priv = {"None": 0.0, "User": 0.5, "Root": 1.0}.get(host.privilege, 0.0)
-                status = 1.0 if host.status == "online" else 0.0
-                decoy = 1.0 if host.decoy != "inactive" else 0.0
+                priv = {'None': 0.0, 'User': 0.5, 'Root': 1.0}.get(host.privilege, 0.0)
+                status = 1.0 if host.status == 'online' else 0.0
+                decoy = 1.0 if host.decoy != 'inactive' else 0.0
                 vec.extend([priv, status, decoy])
             else:
                 vec.extend([0.0, 0.0, 0.0])
-        
+
         vec.append(self.global_state.business_downtime_score / 100.0)
         vec.append(float(self.current_tick) / float(self.max_ticks))
-        
+
         for agent in self.possible_agents:
             vec.append(float(self.global_state.agent_energy.get(agent, 0)) / 100.0)
 
         result = np.zeros(512, dtype=np.float32)
         v_arr = np.array(vec, dtype=np.float32)
-        result[:len(v_arr)] = v_arr
+        result[: len(v_arr)] = v_arr
         return result

From cd6f0e22797c167c4f04ec038039089e62f8551a Mon Sep 17 00:00:00 2001
From: Igor Jankowski <igorjankowwski@gmail.com>
Date: Fri, 10 Apr 2026 10:33:08 +0200
Subject: [PATCH 12/17] Ruff format

---
 netforge_rl/core/action.py   |  3 +--
 netforge_rl/core/registry.py | 10 ++++++----
 netforge_rl/core/state.py    |  7 ++++---
 3 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/netforge_rl/core/action.py b/netforge_rl/core/action.py
index 411b793..4d09926 100644
--- a/netforge_rl/core/action.py
+++ b/netforge_rl/core/action.py
@@ -28,8 +28,7 @@ def __init__(
 
 
 class BaseAction(ABC):
-    """Modular Base Action for the MARL CybORG Environment.
-    """
+    """Modular Base Action for the MARL CybORG Environment."""
 
     def __init__(
         self,
diff --git a/netforge_rl/core/registry.py b/netforge_rl/core/registry.py
index a88ed8f..bf9ea85 100644
--- a/netforge_rl/core/registry.py
+++ b/netforge_rl/core/registry.py
@@ -32,15 +32,17 @@ def get_action_class(self, agent_id: str, group_id: int) -> Optional[Type]:
         if 'red' in agent_id.lower():
             primary_team = 'red_commander' if 'commander' in agent_id.lower() else 'red'
         else:
-            primary_team = 'blue_commander' if 'commander' in agent_id.lower() else 'blue'
+            primary_team = (
+                'blue_commander' if 'commander' in agent_id.lower() else 'blue'
+            )
 
         # Attempt to find the action in the primary team registry
         action_cls = self._actions.get(primary_team, {}).get(group_id)
-        
+
         # Fallback: Check if the action was registered specifically to the role (e.g., 'red_operator')
         if not action_cls:
             action_cls = self._actions.get(agent_id.lower(), {}).get(group_id)
-            
+
         return action_cls
 
     def instantiate_action(
@@ -65,7 +67,7 @@ def instantiate_action(
             action_group = action_int // len(target_ips)
 
             if 'red' in agent_id.lower():
-                mod = 12 # Standardized bounds
+                mod = 12  # Standardized bounds
             else:
                 mod = 12
 
diff --git a/netforge_rl/core/state.py b/netforge_rl/core/state.py
index 4f787e0..e2b7bf4 100644
--- a/netforge_rl/core/state.py
+++ b/netforge_rl/core/state.py
@@ -212,11 +212,12 @@ def can_route_to(
     def get_adjacency_matrix(self) -> np.ndarray:
         """Returns a 100x100 adjacency matrix representing routing capabilities between all hosts."""
         import numpy as np
+
         adj = np.zeros((100, 100), dtype=np.float32)
-        
+
         # We need a stable ordering of IPs, so we sort them
         sorted_ips = sorted(list(self.all_hosts.keys()))
-        
+
         for i, src_ip in enumerate(sorted_ips):
             for j, dst_ip in enumerate(sorted_ips):
                 if i == j:
@@ -226,7 +227,7 @@ def get_adjacency_matrix(self) -> np.ndarray:
                     # A more accurate version would check if src_ip can route to dst_ip,
                     # but can_route_to doesn't take src_ip. It assumes global routing rules based on subnets.
                     adj[i, j] = 1.0
-                    
+
         return adj
 
     def reallocate_dhcp(self):

From a346aeabddf09733f1bce3b3c5def00fdd4e83fc Mon Sep 17 00:00:00 2001
From: Igor Jankowski <igorjankowwski@gmail.com>
Date: Fri, 10 Apr 2026 10:33:52 +0200
Subject: [PATCH 13/17] Ruff format for apt

---
 netforge_rl/scenarios/apt_espionage.py | 28 +++++++++++++++++---------
 1 file changed, 18 insertions(+), 10 deletions(-)

diff --git a/netforge_rl/scenarios/apt_espionage.py b/netforge_rl/scenarios/apt_espionage.py
index a2222da..04fe25a 100644
--- a/netforge_rl/scenarios/apt_espionage.py
+++ b/netforge_rl/scenarios/apt_espionage.py
@@ -25,22 +25,26 @@ def calculate_reward(
         if effect:
             # Action Cost Penalty
             if getattr(effect, 'cost', 0) > 0:
-                reward -= (effect.cost * 0.1)
+                reward -= effect.cost * 0.1
 
             if 'red' in agent_id.lower() and effect.success:
                 action_obj = getattr(effect, 'action', None)
-                action_name = type(action_obj).__name__ if action_obj else "Unknown"
-                
+                action_name = type(action_obj).__name__ if action_obj else 'Unknown'
+
                 # RECON Reward
                 if 'Discover' in action_name or 'Scan' in action_name:
                     reward += 2.0
-                
+
                 # BREACH Reward
                 if 'Exploit' in action_name or 'Pivot' in action_name:
                     reward += 10.0
-                
+
                 # EXFIL Reward
-                if 'Exfiltrate' in action_name or 'Dump' in action_name or 'Impact' in action_name:
+                if (
+                    'Exfiltrate' in action_name
+                    or 'Dump' in action_name
+                    or 'Impact' in action_name
+                ):
                     reward += 20.0
 
                 if 'shared' in str(effect.observation_data):
@@ -65,17 +69,21 @@ def calculate_reward(
                 is_isolated = h.status == 'isolated'
 
                 if not is_infected and not is_isolated:
-                    reward += 1.0 
+                    reward += 1.0
                 elif is_infected and not is_isolated:
-                    reward -= 5.0 
+                    reward -= 5.0
                 elif is_infected and is_isolated:
-                    reward += 20.0 
+                    reward += 20.0
 
         return reward
 
     def check_termination(self, global_state: 'GlobalNetworkState') -> Dict[str, bool]:
         terminate = {agent: False for agent in self.agents}
-        infected_hosts = [h for h in global_state.all_hosts.values() if h.privilege in ['User', 'Root']]
+        infected_hosts = [
+            h
+            for h in global_state.all_hosts.values()
+            if h.privilege in ['User', 'Root']
+        ]
         if infected_hosts and all(h.status == 'isolated' for h in infected_hosts):
             return {agent: True for agent in self.agents}
         return terminate

From 73af8cd4c74804fe11aa697ac72c77b5b203b01f Mon Sep 17 00:00:00 2001
From: Igor Jankowski <igorjankowwski@gmail.com>
Date: Fri, 10 Apr 2026 10:46:45 +0200
Subject: [PATCH 14/17] fix(core): add null-checks to ConflictResolutionEngine

---
 netforge_rl/core/physics.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/netforge_rl/core/physics.py b/netforge_rl/core/physics.py
index 9ba2bb8..018468c 100644
--- a/netforge_rl/core/physics.py
+++ b/netforge_rl/core/physics.py
@@ -21,7 +21,7 @@ def resolve(effects: Dict[str, ActionEffect]) -> Dict[str, ActionEffect]:
         blue_defended_nodes = {}
         for blue_id in blue_agents:
             eff = effects[blue_id]
-            if eff.success:
+            if eff is not None and eff.success:
                 if isinstance(eff.state_deltas, dict):
                     for delta_key in eff.state_deltas.keys():
                         if 'hosts/' in delta_key:
@@ -35,7 +35,7 @@ def resolve(effects: Dict[str, ActionEffect]) -> Dict[str, ActionEffect]:
         # 2. Evaluate Red attacks against the compiled simultaneous defenses
         for red_id in red_agents:
             red_eff = effects[red_id]
-            if not red_eff.success:
+            if red_eff is None or not red_eff.success:
                 continue
 
             collision_detected = False

From 006f1afc5ae7b9f58d160c8e93630f3e3fc6f17f Mon Sep 17 00:00:00 2001
From: Igor Jankowski <igorjankowwski@gmail.com>
Date: Fri, 10 Apr 2026 12:53:48 +0200
Subject: [PATCH 15/17] feat(sim): implement honeytoken detection and
 structured SIEM alerts

---
 netforge_rl/agents/green_agent.py       |  2 ++
 netforge_rl/environment/parallel_env.py | 17 ++++++++++++++++-
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/netforge_rl/agents/green_agent.py b/netforge_rl/agents/green_agent.py
index 9b0d4c9..a573609 100644
--- a/netforge_rl/agents/green_agent.py
+++ b/netforge_rl/agents/green_agent.py
@@ -51,6 +51,7 @@ def generate_noise(self, current_tick: int, global_state: Any) -> Dict[str, Any]
                         'type': 'benign_xml',
                         'data': log_string,
                         'subnet': source.subnet_cidr,
+                        'severity': 0,
                     }
                 )
 
@@ -67,6 +68,7 @@ def generate_noise(self, current_tick: int, global_state: Any) -> Dict[str, Any]
                     'type': 'anomaly_xml',
                     'data': log_string,
                     'subnet': target.subnet_cidr,
+                    'severity': 3,
                 }
             )
 
diff --git a/netforge_rl/environment/parallel_env.py b/netforge_rl/environment/parallel_env.py
index a04e859..4cfeb54 100644
--- a/netforge_rl/environment/parallel_env.py
+++ b/netforge_rl/environment/parallel_env.py
@@ -5,7 +5,7 @@
 from netforge_rl.core.action import BaseAction, ActionEffect
 from netforge_rl.core.observation import BaseObservation
 from netforge_rl.core.registry import action_registry
-import netforge_rl.actions  # noqa: F401 
+import netforge_rl.actions  # noqa: F401
 from netforge_rl.core.physics import ConflictResolutionEngine
 from netforge_rl.environment.base_env import BaseNetForgeRLEnv
 from netforge_rl.topologies.network_generator import NetworkGenerator
@@ -362,6 +362,21 @@ def step(
                     self.global_state,
                 )
 
+                # Honeytoken Triggered Logic
+                if host and getattr(host, 'contains_honeytokens', False):
+
+                    honey_alert = {
+                        'signature': 'HONEYTOKEN_TRIGGERED',
+                        'severity': 10,
+                        'target': target_ip,
+                        'agent': res_agent,
+                    }
+                    self.siem_logger._push_to_buffer(
+                        honey_alert,  # We pass the dict directly; encoder will handle it via str()
+                        host.subnet_cidr,
+                        self.global_state,
+                    )
+
         # Generate background SIEM noise every tick
         self.siem_logger.log_background_noise(self.global_state)
 

From a9b75bf5fc1c6c1af189a17f52f6960ad134ebaa Mon Sep 17 00:00:00 2001
From: Igor Jankowski <igorjankowwski@gmail.com>
Date: Fri, 10 Apr 2026 12:56:13 +0200
Subject: [PATCH 16/17] test(env): synchronize test suite

---
 tests/environment/test_env_dynamics.py | 27 ++++++++++++++++++--------
 tests/environment/test_reset.py        |  8 ++++----
 tests/environment/test_step.py         | 21 ++++++++++++++------
 tests/siem/test_siem_logger.py         | 11 +++++++----
 4 files changed, 45 insertions(+), 22 deletions(-)

diff --git a/tests/environment/test_env_dynamics.py b/tests/environment/test_env_dynamics.py
index e208cc2..4d494d3 100644
--- a/tests/environment/test_env_dynamics.py
+++ b/tests/environment/test_env_dynamics.py
@@ -29,27 +29,37 @@ def execute(self, state):
 def test_soc_budget_limit(env):
     """Verify that SOC (Blue) is limited to 2 active actions."""
     env.reset(seed=42)
+    agent = 'blue_dmz'
+    env.global_state.agent_energy[agent] = 50
+
+    # Manually fill the queue with 2 agents
     env.event_queue.append(
         {
             'completion_tick': 10,
-            'agent': 'blue_operator',
+            'agent': 'blue_internal',
             'action': MagicMockAction(),
-            'effect': None,
+            'effect': ActionEffect(success=True, state_deltas={}, observation_data={}),
             'target_ip': None,
         }
     )
     env.event_queue.append(
         {
             'completion_tick': 10,
-            'agent': 'blue_commander',
+            'agent': 'blue_restricted',
             'action': MagicMockAction(),
-            'effect': None,
+            'effect': ActionEffect(success=True, state_deltas={}, observation_data={}),
             'target_ip': None,
         }
     )
 
-    env.step({'blue_operator': 0})
-    assert len(env.event_queue) == 2
+    # Attempt to add a 3rd action via step
+    initial_energy = env.global_state.agent_energy[agent]
+    env.step({agent: 0})
+
+    # Reward/Energy check: 3rd action should be ignored, so energy should not decrease
+    assert env.global_state.agent_energy[agent] == initial_energy
+    # Queue should be empty now because the 2 original resolved at tick 10
+    assert len(env.event_queue) == 0
 
 
 @pytest.mark.fast
@@ -125,9 +135,10 @@ def test_honeytoken_trap_alert(env):
         # 4. Final verification
         all_logs = env.global_state.siem_log_buffer
         honey_alerts = [
-            log
+            log[0]
             for log in all_logs
-            if isinstance(log, dict) and log.get('signature') == 'HONEYTOKEN_TRIGGERED'
+            if isinstance(log[0], dict)
+            and log[0].get('signature') == 'HONEYTOKEN_TRIGGERED'
         ]
 
         assert len(honey_alerts) > 0, (
diff --git a/tests/environment/test_reset.py b/tests/environment/test_reset.py
index 299b020..32644c1 100644
--- a/tests/environment/test_reset.py
+++ b/tests/environment/test_reset.py
@@ -22,7 +22,7 @@ def test_env_reset_shapes(env_sim_local):
 
         # Check shapes
         assert data['obs'].shape == (256,)
-        assert data['action_mask'].shape == (62,)
+        assert data['action_mask'].shape == (132,)
         assert data['siem_embedding'].shape == (128,)
 
         # Check types
@@ -44,6 +44,6 @@ def test_env_action_space_consistency(env_sim_local):
     """Verify action space shapes."""
     for agent in env_sim_local.agents:
         space = env_sim_local.action_space(agent)
-        # MultiDiscrete([12, 50])
-        assert space.nvec[0] == 12
-        assert space.nvec[1] == 50
+        # MultiDiscrete([32, 100])
+        assert space.nvec[0] == 32
+        assert space.nvec[1] == 100
diff --git a/tests/environment/test_step.py b/tests/environment/test_step.py
index b6ca2ba..b301272 100644
--- a/tests/environment/test_step.py
+++ b/tests/environment/test_step.py
@@ -45,7 +45,9 @@ def test_blue_siem_embedding_update(env_sim_local):
 
     # Inject a realistic log to ensure non-zero embedding
     fake_log = "<Event xmlns='...'><System><EventID>4624</EventID></System></Event>"
-    env_sim_local.siem_logger._push_to_buffer(fake_log, env_sim_local.global_state)
+    env_sim_local.siem_logger._push_to_buffer(
+        fake_log, '192.168.1.0/24', env_sim_local.global_state
+    )
 
     # Step to refresh observations
     actions = {a: env_sim_local.action_space(a).sample() for a in env_sim_local.agents}
@@ -53,12 +55,19 @@ def test_blue_siem_embedding_update(env_sim_local):
 
     # Check Blue agents
     blue_checked = False
-    for agent in ['blue_commander', 'blue_operator']:
-        if agent in obs:
+    for agent, agent_obs in obs.items():
+        if 'blue' in agent:
             blue_checked = True
-            emb = obs[agent]['siem_embedding']
-            # If LogEncoder is working, a non-empty string should result in non-zero vector
-            assert not np.allclose(emb, 0.0), f'Embedding for {agent} is zero'
+            emb = agent_obs['siem_embedding']
+            if 'dmz' in agent:
+                assert not np.allclose(emb, 0.0), (
+                    f'Embedding for {agent} is zero (expected signal)'
+                )
+            else:
+                # Other subnets should be zero since we only pushed to DMZ
+                assert np.allclose(emb, 0.0), (
+                    f'Embedding for {agent} is non-zero (expected noise-only)'
+                )
 
     assert blue_checked, 'No blue agents found in observations'
 
diff --git a/tests/siem/test_siem_logger.py b/tests/siem/test_siem_logger.py
index 638cddc..66a63b5 100644
--- a/tests/siem/test_siem_logger.py
+++ b/tests/siem/test_siem_logger.py
@@ -34,7 +34,7 @@ def test_siem_log_action(siem_logger, global_state):
     # P_LOG_ON_SUCCESS is 0.9. With seed 0, it should trigger.
     assert len(global_state.siem_log_buffer) > initial_buffer_size
     latest_log = global_state.siem_log_buffer[-1]
-    assert target_ip in latest_log
+    assert target_ip in latest_log[0]
 
 
 @pytest.mark.fast
@@ -42,10 +42,13 @@ def test_siem_buffer_rolling(siem_logger, global_state):
     """Verify the SIEM buffer rolls over at SIEM_BUFFER_MAX."""
     # Fill buffer
     for i in range(SIEM_BUFFER_MAX + 10):
-        siem_logger._push_to_buffer(f'Log_{i}', global_state)
+        siem_logger._push_to_buffer(f'Log_{i}', '10.0.0.0/24', global_state)
 
     assert len(global_state.siem_log_buffer) == SIEM_BUFFER_MAX
-    assert global_state.siem_log_buffer[-1] == f'Log_{SIEM_BUFFER_MAX + 9}'
+    assert global_state.siem_log_buffer[-1] == (
+        f'Log_{SIEM_BUFFER_MAX + 9}',
+        '10.0.0.0/24',
+    )
 
 
 @pytest.mark.fast
@@ -66,7 +69,7 @@ def test_log_background_noise(siem_logger, global_state):
 
     assert len(global_state.siem_log_buffer) > initial_size
     latest_log = global_state.siem_log_buffer[-1]
-    assert '[BACKGROUND]' in latest_log
+    assert '[BACKGROUND]' in latest_log[0]
 
 
 @pytest.mark.fast

From 65b3d4b5a2ab104a98b75a3e630c8ce307f24161 Mon Sep 17 00:00:00 2001
From: Igor Jankowski <igorjankowwski@gmail.com>
Date: Fri, 10 Apr 2026 12:56:50 +0200
Subject: [PATCH 17/17] Ruff format for par_env

---
 netforge_rl/environment/parallel_env.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/netforge_rl/environment/parallel_env.py b/netforge_rl/environment/parallel_env.py
index 4cfeb54..ff5bbe5 100644
--- a/netforge_rl/environment/parallel_env.py
+++ b/netforge_rl/environment/parallel_env.py
@@ -364,7 +364,6 @@ def step(
 
                 # Honeytoken Triggered Logic
                 if host and getattr(host, 'contains_honeytokens', False):
-
                     honey_alert = {
                         'signature': 'HONEYTOKEN_TRIGGERED',
                         'severity': 10,