From ff8c777a105d3812eedd2467a94f73e9d5d22792 Mon Sep 17 00:00:00 2001 From: Igor Jankowski Date: Sun, 29 Mar 2026 13:01:07 +0200 Subject: [PATCH 01/10] Add BLineAgent with B-Line killchain --- netforge_rl/agents/b_line.py | 74 ++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100644 netforge_rl/agents/b_line.py diff --git a/netforge_rl/agents/b_line.py b/netforge_rl/agents/b_line.py new file mode 100644 index 0000000..b28c15e --- /dev/null +++ b/netforge_rl/agents/b_line.py @@ -0,0 +1,74 @@ +from typing import Any +import random +import numpy as np + + +class BLineAgent: + """ + Scripted Red Agent that executes the exact B-Line killchain: + + DiscoverRemoteSystems -> DiscoverNetworkServices -> ExploitRemoteService -> PrivilegeEscalate -> Impact + """ + + def __init__(self, agent_id: str): + self.agent_id = agent_id + self.known_subnets = ['10.0.0.0/24', '192.168.1.0/24', '10.0.1.0/24'] + self.known_hosts = [] + self.exploited_hosts = [] + self.root_hosts = [] + self.impacted_hosts = [] + self.step_count = 0 + + def get_action(self, observation: np.ndarray, global_state) -> Any: + from netforge_rl.actions import ( + DiscoverRemoteSystems, + DiscoverNetworkServices, + ExploitRemoteService, + PrivilegeEscalate, + Impact, + ) + + self.step_count += 1 + + # 1. Discover Subnets if we don't know network yet + if not self.known_hosts or self.step_count < 3: + target_subnet = self.known_subnets[ + self.step_count % len(self.known_subnets) + ] + # Oracle gets to peek at true state to simulate finding hosts + for host in global_state.all_hosts.values(): + if ( + host.subnet_cidr == target_subnet + and host.ip not in self.known_hosts + ): + self.known_hosts.append(host.ip) + return DiscoverRemoteSystems(self.agent_id, target_subnet) + + # 2. Exploit known hosts if reachable + unexploited = [ + h + for h in self.known_hosts + if h not in self.exploited_hosts and global_state.can_route_to(h) + ] + if unexploited: + target = random.choice(unexploited) + self.exploited_hosts.append(target) + return ExploitRemoteService(self.agent_id, target) + + # 3. Escalate privileges + unescalated = [h for h in self.exploited_hosts if h not in self.root_hosts] + if unescalated: + target = random.choice(unescalated) + self.root_hosts.append(target) + return PrivilegeEscalate(self.agent_id, target) + + # 4. Impact fully compromised nodes + unimpacted = [h for h in self.root_hosts if h not in self.impacted_hosts] + if unimpacted: + target = random.choice(unimpacted) + self.impacted_hosts.append(target) + return Impact(self.agent_id, target) + + # Fallback to random scanning + target = random.choice(self.known_hosts) if self.known_hosts else '127.0.0.1' + return DiscoverNetworkServices(self.agent_id, target) From ee7c2b25effbac423c5dd9a43ca7483dcf9d04ff Mon Sep 17 00:00:00 2001 From: Igor Jankowski Date: Sun, 29 Mar 2026 13:04:32 +0200 Subject: [PATCH 02/10] Add Green agent for masking Red Agent activity --- netforge_rl/agents/__init__.py | 3 ++ netforge_rl/agents/green_agent.py | 64 +++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+) create mode 100644 netforge_rl/agents/__init__.py create mode 100644 netforge_rl/agents/green_agent.py diff --git a/netforge_rl/agents/__init__.py b/netforge_rl/agents/__init__.py new file mode 100644 index 0000000..939a67a --- /dev/null +++ b/netforge_rl/agents/__init__.py @@ -0,0 +1,3 @@ +from .b_line import BLineAgent + +__all__ = ['BLineAgent'] diff --git a/netforge_rl/agents/green_agent.py b/netforge_rl/agents/green_agent.py new file mode 100644 index 0000000..0d20f29 --- /dev/null +++ b/netforge_rl/agents/green_agent.py @@ -0,0 +1,64 @@ +import random +from typing import Any, Dict + + +class GreenAgent: + """Simulates benign corporate users to generate behavioral background noise + and false-positive alerts, masking Red Agent activity. + + It operates on a Day/Night cycle across the simulated business hours. + """ + + def __init__(self, agent_id: str = "green_agent_0"): + self.agent_id = agent_id + + def generate_noise(self, current_tick: int, global_state: Any) -> Dict[str, Any]: + """Generates random telemetry alerts based on the current tick's position + within the day/night cycle. + + Args: + current_tick (int): The current chronological tick of the environment. + global_state (GlobalNetworkState): The architecture state to extract subnets/hosts. + + Returns: + Dict[str, Any]: A dictionary representing telemetry generated by this tick. + """ + # Day: Ticks 0 to 100, Night: Ticks 101 to 149 + cycle_position = current_tick % 150 + is_day = cycle_position <= 100 + + noise_logs = [] + hosts = list(global_state.all_hosts.values()) + if not hosts: + return {'alerts': []} + + # Higher activity and more false positives during the day + probability_of_noise = 0.8 if is_day else 0.1 + probability_of_false_positive = 0.05 if is_day else 0.01 + + if random.random() < probability_of_noise: + # Generate benign background traffic + source = random.choice(hosts) + target = random.choice(hosts) + if source.ip != target.ip: + noise_logs.append({ + 'type': 'benign_traffic', + 'source': source.ip, + 'target': target.ip, + 'protocol': random.choice(['TCP', 'UDP', 'HTTP', 'DNS']), + 'severity': 0 + }) + + if random.random() < probability_of_false_positive: + # Generate a false positive anomaly that could trip Blue's SIEM + target = random.choice(hosts) + noise_logs.append({ + 'type': 'anomaly', + 'source': 'unknown_external', + 'target': target.ip, + 'signature': random.choice(['Failed_Login_Spike', 'Malformed_Packet', 'Suspicious_User_Agent']), + 'severity': random.randint(1, 4), + 'false_positive': True + }) + + return {'alerts': noise_logs} From 1a54e627cf8ecc8b385c2a2d23c7609e80d374a2 Mon Sep 17 00:00:00 2001 From: Igor Jankowski Date: Mon, 30 Mar 2026 20:00:19 +0200 Subject: [PATCH 03/10] feat: core architecture evolution: Implemented MultiDiscrete action masking, OT/SCADA PLC nodes, Business Downtime economics, and dynamic procedural network padding. --- netforge_rl/actions/__init__.py | 63 ++++ netforge_rl/actions/blue/__init__.py | 27 ++ netforge_rl/actions/red/__init__.py | 30 ++ netforge_rl/core/action.py | 87 +++++ netforge_rl/core/observation.py | 124 +++++++ netforge_rl/environment/parallel_env.py | 392 ++++++++++++++++++++ netforge_rl/scenarios/ransomware.py | 202 ++++++++++ netforge_rl/topologies/network_generator.py | 156 ++++++++ 8 files changed, 1081 insertions(+) create mode 100644 netforge_rl/actions/__init__.py create mode 100644 netforge_rl/actions/blue/__init__.py create mode 100644 netforge_rl/actions/red/__init__.py create mode 100644 netforge_rl/core/action.py create mode 100644 netforge_rl/core/observation.py create mode 100644 netforge_rl/environment/parallel_env.py create mode 100644 netforge_rl/scenarios/ransomware.py create mode 100644 netforge_rl/topologies/network_generator.py diff --git a/netforge_rl/actions/__init__.py b/netforge_rl/actions/__init__.py new file mode 100644 index 0000000..48f5125 --- /dev/null +++ b/netforge_rl/actions/__init__.py @@ -0,0 +1,63 @@ +from .blue import ( + IsolateHost, + RestoreHost, + Monitor, + Analyze, + DeployDecoy, + Remove, + RestoreFromBackup, + DecoyApache, + DecoySSHD, + DecoyTomcat, + Misinform, + ConfigureACL, +) +from .red import ( + NetworkScan, + DiscoverRemoteSystems, + DiscoverNetworkServices, + ExploitRemoteService, + PrivilegeEscalate, + Impact, + ExploitBlueKeep, + ExploitEternalBlue, + ExploitHTTP_RFI, + JuicyPotato, + V4L2KernelExploit, + KillProcess, + ShareIntelligence, + OverloadPLC, +) + +__all__ = [ + 'IsolateHost', + 'RestoreHost', + 'Monitor', + 'Analyze', + 'DeployDecoy', + 'Remove', + 'RestoreFromBackup', + 'DecoyApache', + 'DecoySSHD', + 'DecoyTomcat', + 'Misinform', + 'ConfigureACL', + 'NetworkScan', + 'DiscoverRemoteSystems', + 'DiscoverNetworkServices', + 'ExploitRemoteService', + 'PrivilegeEscalate', + 'Impact', + 'ExploitBlueKeep', + 'ExploitEternalBlue', + 'ExploitHTTP_RFI', + 'JuicyPotato', + 'V4L2KernelExploit', + 'KillProcess', + 'ShareIntelligence', + 'OverloadPLC', +] + +from .blue import SecurityAwarenessTraining + +from .blue import DeployHoneytoken diff --git a/netforge_rl/actions/blue/__init__.py b/netforge_rl/actions/blue/__init__.py new file mode 100644 index 0000000..1e9deeb --- /dev/null +++ b/netforge_rl/actions/blue/__init__.py @@ -0,0 +1,27 @@ +from .mitigation import ( + IsolateHost, + RestoreHost, + Remove, + RestoreFromBackup, + ConfigureACL, + SecurityAwarenessTraining, +) +from .analysis import Monitor, Analyze +from .deception import DeployDecoy, DecoyApache, DecoySSHD, DecoyTomcat, Misinform, DeployHoneytoken + +__all__ = [ + 'IsolateHost', + 'RestoreHost', + 'Remove', + 'RestoreFromBackup', + 'Monitor', + 'Analyze', + 'DeployDecoy', + 'DecoyApache', + 'DecoySSHD', + 'DecoyTomcat', + 'Misinform', + 'ConfigureACL', + 'SecurityAwarenessTraining', + 'DeployHoneytoken', +] diff --git a/netforge_rl/actions/red/__init__.py b/netforge_rl/actions/red/__init__.py new file mode 100644 index 0000000..5e0b35f --- /dev/null +++ b/netforge_rl/actions/red/__init__.py @@ -0,0 +1,30 @@ +from .reconnaissance import NetworkScan, DiscoverRemoteSystems, DiscoverNetworkServices +from .exploits import ( + ExploitRemoteService, + ExploitBlueKeep, + ExploitEternalBlue, + ExploitHTTP_RFI, +) +from .privilege_escalation import PrivilegeEscalate, JuicyPotato, V4L2KernelExploit +from .impact import Impact, KillProcess +from .coordination import ShareIntelligence +from .kinetic import OverloadPLC + +__all__ = [ + 'NetworkScan', + 'DiscoverRemoteSystems', + 'DiscoverNetworkServices', + 'ExploitRemoteService', + 'ExploitBlueKeep', + 'ExploitEternalBlue', + 'ExploitHTTP_RFI', + 'PrivilegeEscalate', + 'JuicyPotato', + 'V4L2KernelExploit', + 'Impact', + 'KillProcess', + 'ShareIntelligence', + 'OverloadPLC', +] + +from .social_engineering import SpearPhishing diff --git a/netforge_rl/core/action.py b/netforge_rl/core/action.py new file mode 100644 index 0000000..ed0604a --- /dev/null +++ b/netforge_rl/core/action.py @@ -0,0 +1,87 @@ +from abc import ABC, abstractmethod +from typing import Dict, Any, Optional, TYPE_CHECKING, Union, List + +if TYPE_CHECKING: + from netforge_rl.core.state import GlobalNetworkState + from netforge_rl.core.commands import IStateDeltaCommand + + +class ActionEffect: + """Encapsulates the resulting state changes from an action for conflict + + resolution. + """ + + def __init__( + self, + success: bool, + state_deltas: Union[Dict[str, Any], List['IStateDeltaCommand']], + observation_data: Dict[str, Any], + eta: int = 0, + ): + self.success = success + self.state_deltas = state_deltas + self.observation_data = observation_data + self.eta = eta + + +class BaseAction(ABC): + """Modular Base Action for the MARL CybORG Environment. + + All highly specific network attacks (Layer 2 - Layer 7) inherit from this class. + """ + + def __init__( + self, + agent_id: str, + target_ip: Optional[str] = None, + source_ip: Optional[str] = None, + cost: int = 1, + financial_cost: int = 0, + compute_cost: int = 0, + duration: int = 1, + required_prior_state: Optional[str] = None, + ): + self.agent_id = agent_id + self.target_ip = target_ip + self.source_ip = source_ip + self.cost = cost + self.financial_cost = financial_cost + self.compute_cost = compute_cost + self.duration = duration + self.required_prior_state = required_prior_state + + def validate(self, global_state: 'GlobalNetworkState') -> bool: + """Checks if the action is physically possible in the current network + state (e.g., is there a route, are preconditions met). + """ + if self.target_ip and self.target_ip not in global_state.all_hosts: + return False + + if self.required_prior_state: + # Check Action History state logic + agent_history = global_state.action_history.get(self.agent_id, set()) + expected_record = f"{self.required_prior_state}:{self.target_ip}" + if expected_record not in agent_history: + return False + + if self.target_ip: + host = global_state.all_hosts[self.target_ip] + # Simple declarative Zone constraints example + if 'red' in self.agent_id.lower() and host.subnet_cidr == '10.0.1.0/24': + # Secure Data targets cannot be touched without pivoting via DMZ or Internal User privileges first + has_dmz = any(h.privilege in ['User', 'Root'] for h in global_state.all_hosts.values() if h.subnet_cidr == '192.168.1.0/24') + has_internal = any(h.privilege in ['User', 'Root'] for h in global_state.all_hosts.values() if h.subnet_cidr == '10.0.0.0/24') + if not (has_dmz or has_internal): + return False + + return True + + @abstractmethod + def execute(self, global_state: 'GlobalNetworkState') -> ActionEffect: + """Computes the theoretical effect of the action. + + Note: State is NOT mutated directly here. Mutations are returned via ActionEffect + to allow the Environment to resolve simultaneous multi-agent collisions. + """ + pass diff --git a/netforge_rl/core/observation.py b/netforge_rl/core/observation.py new file mode 100644 index 0000000..fdc2e8c --- /dev/null +++ b/netforge_rl/core/observation.py @@ -0,0 +1,124 @@ +import numpy as np +from typing import Any, List + + +class BaseObservation: + """Represents the local view of the network from a single Agent's + + perspective. + + In MARL, Red and Blue teams receive fundamentally different subsets + of the global state. + """ + + def __init__(self, agent_id: str): + self.agent_id = agent_id + self.visible_hosts = {} + self.detected_anomalies = [] + self.active_sessions = [] + + # Array of floats representing the Commander's directive (e.g., target subnet index) + self.objective_vector = np.zeros(5, dtype=np.float32) + + # Tracks anomalies like 802.11 Deauths, Fragmented IP packets, etc. + self.network_telemetry = {} + + # SIEM Logs + self.siem_alerts = [] + + def update_from_state(self, global_state: Any, action_effects: List[Any]): + """Filters the global state down to only what is observable by this + + specific agent. + + Since we don't have the fully simulated GlobalNetworkState yet, + we generate dynamic dummy telemetry that structurally mimics the + CAGE challenge dictionaries. + """ + # Parse realistic data from the OOP GlobalNetworkState + if global_state: + # Enforce True Partial Observability (Fog of War) + # Agents only receive tensor data for hosts within their active knowledge graph + known_ips = global_state.agent_knowledge.get(self.agent_id, set()) + for ip in known_ips: + if ip in global_state.all_hosts: + host = global_state.all_hosts[ip] + + if 'blue' in self.agent_id.lower(): + # Strict POMDP: Blue cannot see physical truth vectors. + # They must rely on SIEM telemetry alone for detection. + self.visible_hosts[ip] = { + 'state': 'unknown', + 'status': host.status, + 'decoy': host.decoy, + } + else: + # Red Team directly monitors nodes they root. + self.visible_hosts[ip] = { + 'state': 'compromised' if host.privilege in ['User', 'Root'] else 'clean', + 'status': host.status, + 'decoy': 'unknown', + } + + if 'commander' in self.agent_id.lower() or 'blue' in self.agent_id.lower(): + # Pull SIEM logs that have arrived (arrival_tick <= current_tick) + if hasattr(global_state, 'siem_log_buffer'): + for log in global_state.siem_log_buffer: + if log.get('arrival_tick', 0) <= getattr(global_state, 'current_tick', 0): + self.siem_alerts.append(log) + + self.network_telemetry['global_alert_level'] = np.random.uniform(0, 1) + self.network_telemetry['total_isolated_subnets'] = np.random.randint(0, 5) + self.network_telemetry['active_alerts'] = len(self.siem_alerts) + + if 'operator' in self.agent_id.lower(): + self.objective_vector[2] = 1.0 + + def to_numpy(self, max_size: int = 256) -> np.ndarray: + """Serializes the object-oriented observation into a fixed-size Tensor + + for RL Neural Networks. + + This must be mathematically rigorous. If a node isn't seen, its + index must be explicitly 0. + """ + vector = np.zeros(max_size, dtype=np.float32) + idx = 0 + + if 'global_alert_level' in self.network_telemetry and idx < max_size: + vector[idx] = self.network_telemetry['global_alert_level'] + idx += 1 + + if 'total_isolated_subnets' in self.network_telemetry and idx < max_size: + vector[idx] = ( + float(self.network_telemetry['total_isolated_subnets']) / 10.0 + ) # Normalized + idx += 1 + + if 'active_alerts' in self.network_telemetry and idx < max_size: + vector[idx] = float(min(self.network_telemetry['active_alerts'] / 20.0, 1.0)) + idx += 1 + + for val in self.objective_vector: + if idx < max_size: + vector[idx] = val + idx += 1 + + for ip, data in self.visible_hosts.items(): + if idx + 2 >= max_size: + break + + ip_val = float(ip.split('.')[-1]) / 255.0 # Normalize IP tail + state_val = ( + 1.0 + if data.get('state') == 'compromised' + else -1.0 + if data.get('state') == 'clean' + else 0.0 + ) + + vector[idx] = ip_val + vector[idx + 1] = state_val + idx += 2 + + return vector diff --git a/netforge_rl/environment/parallel_env.py b/netforge_rl/environment/parallel_env.py new file mode 100644 index 0000000..267db11 --- /dev/null +++ b/netforge_rl/environment/parallel_env.py @@ -0,0 +1,392 @@ +from typing import Dict, Tuple +import numpy as np +import gymnasium as gym + +from netforge_rl.core.action import BaseAction, ActionEffect +from netforge_rl.core.observation import BaseObservation +from netforge_rl.core.registry import action_registry +from netforge_rl.core.physics import ConflictResolutionEngine +from netforge_rl.environment.base_env import BaseNetForgeRLEnv +import netforge_rl.actions # Triggers decorator registration +from netforge_rl.topologies.network_generator import NetworkGenerator +from netforge_rl.agents.green_agent import GreenAgent + + +class NetForgeRLEnv(BaseNetForgeRLEnv): + """MARL Environment for CybORG. + + Follows the PettingZoo Parallel API standard for simultaneous Multi- + Agent execution and relies exclusively on Gymnasium spaces natively. + """ + + metadata = {'render_modes': ['ansi'], 'name': 'netforge_rl_v3'} + + def __init__(self, scenario_config: dict): + # Default to procedural generation if no specific architecture config is provided + topology_path = ( + scenario_config.get('topology_path') if scenario_config else None + ) + self.network_generator = NetworkGenerator(config_path=topology_path) + + scenario_type = ( + scenario_config.get('scenario_type', 'ransomware') + if scenario_config + else 'ransomware' + ) + self.log_latency = scenario_config.get('log_latency', 2) if scenario_config else 2 + self.green_agent = GreenAgent() + self.possible_agents = [ + 'red_commander', + 'red_operator', + 'blue_commander', + 'blue_operator', + ] + self.agents = self.possible_agents[:] + + if scenario_type.lower() == 'ransomware': + from netforge_rl.scenarios.ransomware import RansomwareScenario + + self.scenario = RansomwareScenario(self.agents) + else: + from netforge_rl.scenarios.apt_espionage import AptEspionageScenario + + self.scenario = AptEspionageScenario(self.agents) + + self.global_state = self.network_generator.generate() + self.resolution_engine = ConflictResolutionEngine() + + # Native Gymnasium Spaces for PettingZoo API + RLlib Mapping + self.observation_spaces = { + agent: gym.spaces.Dict({ + "obs": gym.spaces.Box(low=-1.0, high=1.0, shape=(256,), dtype=np.float32), + "action_mask": gym.spaces.Box(low=0, high=1, shape=(62,), dtype=np.int8) # 12 action types + 50 IPs + }) + for agent in self.possible_agents + } + self.action_spaces = { + agent: gym.spaces.MultiDiscrete([12, 50]) # [Action Type (max 12), Target IP Index (max 50 padded)] + for agent in self.possible_agents + } + self.max_ticks = 1000 + self.current_tick = 0 + self.event_queue = [] + + def reset( + self, seed=None, options=None + ) -> Tuple[Dict[str, np.ndarray], Dict[str, dict]]: + """Resets the network state to initial configuration natively + + (Gymnasium style + PettingZoo). + """ + self.global_state = self.network_generator.generate(seed=seed) + self.agents = self.possible_agents[:] + self.global_state.agent_energy = {agent: 50 for agent in self.agents} + self.global_state.agent_funds = {agent: 10000 if 'blue' in agent else 5000 for agent in self.agents} + self.global_state.agent_compute = {agent: 1000 for agent in self.agents} + self.global_state.business_downtime_score = 0.0 + observations = {} + for agent_id in self.agents: + obs = BaseObservation(agent_id) + obs.update_from_state(self.global_state, []) + observations[agent_id] = { + "obs": obs.to_numpy(max_size=256), + "action_mask": self.action_mask(agent_id) + } + self.current_tick = 0 + self.event_queue = [] + + return observations, {agent: {} for agent in self.agents} + + def observation_space(self, agent): + return self.observation_spaces[agent] + + def action_space(self, agent): + return self.action_spaces[agent] + + def action_mask(self, agent: str) -> np.ndarray: + """Returns a binary mask denoting valid and distinct action integers for the agent, + pruning out computationally redundant modulo duplicates. + """ + # RLlib explicitly requires MultiDiscrete action masks to be concatenated flat boolean layers. + # Action space: [12 types, 50 IPs]. Therefore Mask shape = (62,) + mask = np.zeros(62, dtype=np.int8) + + # 1. Action Type Dimension (0-11) + if 'red' in agent.lower(): + valid_action_types = 4 if 'commander' in agent.lower() else 9 + else: + valid_action_types = 5 if 'commander' in agent.lower() else 7 + mask[:valid_action_types] = 1 + + # 2. Target IP Dimension (12-61) + target_ips = sorted(list(self.global_state.all_hosts.keys())) + num_targets = min(len(target_ips), 50) + mask[12:12 + num_targets] = 1 + + return mask + + def step( + self, agent_actions: Dict[str, int] + ) -> Tuple[ + Dict[str, BaseObservation], + Dict[str, float], + Dict[str, bool], + Dict[str, bool], + Dict[str, dict], + ]: + """ + Simultaneous Step Execution Logic: + + 1. PROCESS NEW ACTIONS: Validate budgets and enqueue async events. + 2. INTERRUPTION LOGIC: Immediate cancel operations for specific defensive tasks. + 3. ADVANCE TIME: `current_tick` progresses by 1. + 4. RESOLVE MATURE EVENTS: Apply ActionEffects that reach `completion_tick`. + 5. OBSERVATION: Agents receive POMDP updates every tick. + """ + intended_effects = {} + + # 1. PROCESS NEW ACTIONS + blue_active_actions_count = sum( + 1 for event in self.event_queue if 'blue' in event['agent'].lower() + ) + + for agent, action_int in agent_actions.items(): + # Validate temporal locks + if self.current_tick < self.global_state.agent_locked_until.get(agent, 0): + continue + + if isinstance(action_int, BaseAction): + action = action_int + else: + target_ips = sorted(list(self.global_state.all_hosts.keys())) + action = action_registry.instantiate_action(agent, action_int, target_ips) + if action is None: + continue # Invalid action/unmapped action bounds + + # SOC Budget Check (Max 2 active defensive actions) + if 'blue' in agent.lower(): + if blue_active_actions_count >= 2: + continue # SOC is busy, silently ignore + blue_active_actions_count += 1 + + # Validate temporal energy constraints + if self.global_state.agent_energy.get(agent, 0) < action.cost: + continue + + # Expend energy and validate state + self.global_state.agent_energy[agent] -= action.cost + + if action.validate(self.global_state): + eta = getattr(action, 'duration', 1) + completion_tick = self.current_tick + eta + + # Generate intended effect (though state might shift by completion time) + effect = action.execute(self.global_state) + + self.global_state.agent_locked_until[agent] = completion_tick + self.event_queue.append({ + 'completion_tick': completion_tick, + 'agent': agent, + 'action': action, + 'effect': effect, + 'target_ip': getattr(action, 'target_ip', None) + }) + + # 2. INTERRUPTION LOGIC (e.g., IsolateHost Immediately Cancels Ongoing Attacks) + for event in list(self.event_queue): + if type(event['action']).__name__ == "IsolateHost" and event['completion_tick'] > self.current_tick: + # Isolate is queued or starting now; interrupt Red + target_to_isolate = event['target_ip'] + for red_event in list(self.event_queue): + if 'red' in red_event['agent'].lower() and red_event['target_ip'] == target_to_isolate: + if red_event in self.event_queue: + self.event_queue.remove(red_event) + # Unlock Red agent since their attack was disrupted + self.global_state.agent_locked_until[red_event['agent']] = self.current_tick + + # 3. ADVANCE TIME + self.current_tick += 1 + self.global_state.current_tick = self.current_tick + self.global_state.subnet_bandwidth.clear() + + # GENERATE BACKGROUND NOISE & DELAYED ALERTS + noise_data = self.green_agent.generate_noise(self.current_tick, self.global_state) + for anomaly in noise_data.get('alerts', []): + anomaly['arrival_tick'] = self.current_tick + self.log_latency + self.global_state.siem_log_buffer.append(anomaly) + + # 4. RESOLVE MATURE EVENTS + remaining_events = [] + for event in self.event_queue: + if self.current_tick >= event['completion_tick']: + intended_effects[event['agent']] = event['effect'] + else: + remaining_events.append(event) + self.event_queue = remaining_events + + resolved_effects = self.resolution_engine.resolve(intended_effects) + + self._apply_state_deltas(resolved_effects) + + # Generate True Positive telemetry from attacks that hit SIEM + for res_agent, res_effect in resolved_effects.items(): + if 'red' in res_agent and res_effect.success: + target_ip = res_effect.observation_data.get('exploit', 'unknown') + + # Active Deception intercept + host = self.global_state.all_hosts.get(target_ip) + is_honeytoken_trap = host and host.contains_honeytokens + + signature = 'HONEYTOKEN_TRIGGERED' if is_honeytoken_trap else 'RED_ACTION_DETECTED' + severity = 10 if is_honeytoken_trap else 5 + log_delay = 0 if is_honeytoken_trap else self.log_latency + + self.global_state.siem_log_buffer.append({ + 'type': 'anomaly', + 'source': res_agent, + 'target': target_ip, + 'signature': signature, + 'severity': severity, + 'false_positive': False, + 'arrival_tick': self.current_tick + log_delay + }) + + observations = {} + rewards = {} + terminate = self.scenario.check_termination(self.global_state) + + # Trigger dynamic topology mutations mid-episode + if self.current_tick % 40 == 0: + self.global_state.reallocate_dhcp() + + is_truncated = self.current_tick >= self.max_ticks + truncate = {agent: is_truncated for agent in self.agents} + + for agent in self.agents: + obs = BaseObservation(agent) + obs.update_from_state(self.global_state, resolved_effects) + + obs_array = obs.to_numpy(max_size=256) + if 'operator' in agent: + commander_id = agent.replace('operator', 'commander') + if commander_id in agent_actions: + cmd_action = agent_actions[commander_id] + # Normalize the MultiDiscrete action to a float between 0.0 and 1.0 + cmd_val = ( + (float(cmd_action[0]) / 12.0) + if getattr(cmd_action, '__iter__', False) and not isinstance(cmd_action, BaseAction) + else 1.0 + ) + obs_array[0] = cmd_val + + observations[agent] = { + "obs": obs_array, + "action_mask": self.action_mask(agent) + } + # Reward shaping applied here natively factoring in immediate action outcomes + agent_effect = resolved_effects.get(agent) + rewards[agent] = self._calculate_reward( + agent, self.global_state, agent_effect + ) + + self.agents = [ + agent + for agent in self.agents + if not terminate[agent] and not truncate[agent] + ] + + # ── Build info dicts with security metrics for callbacks ── + infos = self._extract_agent_infos(observations, resolved_effects) + + return observations, rewards, terminate, truncate, infos + + def render(self): + """Standard PettingZoo GUI logging render hook.""" + pass + + def _decode_action(self, agent_id: str, action_int: int) -> BaseAction: + target_ips = sorted(list(self.global_state.all_hosts.keys())) + return action_registry.instantiate_action(agent_id, action_int, target_ips) + + def _apply_state_deltas(self, effects: Dict[str, ActionEffect]): + """Applies validated deltas to the GlobalNetworkState. + + Only called AFTER temporal collisions have been mathematically resolved. + """ + for agent_id, effect in effects.items(): + if effect.success: + if isinstance(effect.state_deltas, dict): + for delta_key, delta_val in effect.state_deltas.items(): + self.global_state.apply_delta(delta_key, delta_val) + elif isinstance(effect.state_deltas, list): + for delta_cmd in effect.state_deltas: + self.global_state.apply_delta(delta_cmd) + + def _calculate_reward( + self, agent_id: str, state, effect: ActionEffect = None + ) -> float: + """Delegates reward logic directly to the localized Scenario module.""" + return self.scenario.calculate_reward(agent_id, state, effect) + + def _extract_agent_infos(self, observations: dict, resolved_effects: dict) -> dict: + """Extracts security metrics for TensorBoard and CSV logging callbacks. + + Args: + observations: Dictionary of agent observations for this step. + resolved_effects: Dictionary of resolved action effects. + + Returns: + Dictionary mapping agent_id to an info dictionary with security metrics. + """ + infos = {} + for agent in list(observations.keys()): + agent_effect = resolved_effects.get(agent) + info: dict = {} + + # Count security-relevant events from this step + false_positives = 0 + successful_exploits = 0 + hosts_isolated = 0 + services_restored = 0 + + if agent_effect and agent_effect.success: + for delta_key, delta_val in agent_effect.state_deltas.items(): + if 'status' in delta_key and delta_val == 'isolated': + hosts_isolated += 1 + # Check if the isolated host was actually compromised + parts = delta_key.split('/') + if len(parts) >= 2: + ip = parts[1] + host = self.global_state.all_hosts.get(ip) + if host and host.compromised_by == 'None': + false_positives += 1 # Isolated a clean host + elif 'privilege' in delta_key and delta_val in ('User', 'Root'): + successful_exploits += 1 + elif 'status' in delta_key and delta_val == 'online': + services_restored += 1 + + info['false_positives'] = float(false_positives) + info['successful_exploits'] = float(successful_exploits) + info['hosts_isolated'] = float(hosts_isolated) + info['services_restored'] = float(services_restored) + + # Extra context for analysis + info['agent_energy'] = float(self.global_state.agent_energy.get(agent, 0)) + info['compromised_hosts'] = float( + sum( + 1 + for h in self.global_state.all_hosts.values() + if h.compromised_by != 'None' + ) + ) + info['isolated_hosts'] = float( + sum( + 1 + for h in self.global_state.all_hosts.values() + if h.status == 'isolated' + ) + ) + + infos[agent] = info + + return infos diff --git a/netforge_rl/scenarios/ransomware.py b/netforge_rl/scenarios/ransomware.py new file mode 100644 index 0000000..0a783d1 --- /dev/null +++ b/netforge_rl/scenarios/ransomware.py @@ -0,0 +1,202 @@ +from typing import TYPE_CHECKING, Dict +from netforge_rl.scenarios.base_scenario import BaseScenario + +if TYPE_CHECKING: + from netforge_rl.core.state import GlobalNetworkState + from netforge_rl.core.action import ActionEffect + + +class RansomwareScenario(BaseScenario): + """ + Scenario: Red Team aggressively deploys ransomware. + + Reward Design Principles: + ───────────────────────── + • Action-level rewards are given ONCE on the step the action succeeds. + • NO per-step state compounding (avoids runaway accumulation). + • Episode-end bonus/penalty based on final network state. + • Red and Blue reward magnitudes are balanced (~±5 per action). + """ + + def __init__(self, agents): + self.agents = agents + + def calculate_reward( + self, + agent_id: str, + global_state: 'GlobalNetworkState', + effect: 'ActionEffect' = None, + ) -> float: + reward = 0.0 + + # Small energy cost penalty for any action + if effect and getattr(effect, 'cost', 0) > 0: + reward -= effect.cost * 0.05 + + if 'red' in agent_id.lower(): + reward += self._red_reward(agent_id, global_state, effect) + elif 'blue' in agent_id.lower(): + reward += self._blue_reward(agent_id, global_state, effect) + + return reward + + # ── Red Team Rewards ────────────────────────────────────── + + def _red_reward( + self, + agent_id: str, + global_state: 'GlobalNetworkState', + effect: 'ActionEffect' = None, + ) -> float: + reward = 0.0 + + if not effect: + return reward + + # ── ONE-TIME action bonuses (only on success) ───────── + if effect.success and effect.state_deltas: + for delta_key, delta_val in effect.state_deltas.items(): + # Initial compromise (None → User) + if 'privilege' in delta_key and delta_val == 'User': + reward += 3.0 + + # Privilege escalation (User → Root) + elif 'privilege' in delta_key and delta_val == 'Root': + reward += 5.0 + + # Ransomware impact (system_integrity → compromised) + elif 'system_integrity' in delta_key and delta_val == 'compromised': + reward += 10.0 + + # Setting compromised_by (confirms exploitation) + elif 'compromised_by' in delta_key and delta_val != 'None': + reward += 2.0 + + # OT / SCADA Catastrophic Win + elif 'system_integrity' in delta_key and delta_val == 'kinetic_destruction': + reward += 10000.0 + + # ── Observation-based rewards ───────────────────────── + if effect.observation_data: + obs = effect.observation_data + + # Reconnaissance discoveries + if 'discovered_hosts' in obs: + discovered = obs['discovered_hosts'] + count = len(discovered) if isinstance(discovered, (list, set)) else 1 + reward += count * 0.5 + + if 'scan_results' in obs: + reward += 0.3 + + # Intelligence sharing + if 'shared' in obs: + reward += 1.0 + + # Penalties for failures + if 'Failed against Decoy' in str(obs.values()): + reward -= 3.0 + elif 'kernel panic' in str(obs.values()): + reward -= 5.0 + + # ── Failed action penalty ───────────────────────────── + if not effect.success: + reward -= 0.1 # Small penalty for wasted turn + + return reward + + # ── Blue Team Rewards ───────────────────────────────────── + + def _blue_reward( + self, + agent_id: str, + global_state: 'GlobalNetworkState', + effect: 'ActionEffect' = None, + ) -> float: + reward = 0.0 + + # ── ONE-TIME action bonuses ─────────────────────────── + if effect and effect.success and effect.state_deltas: + for delta_key, delta_val in effect.state_deltas.items(): + # Successful isolation + if 'status' in delta_key and delta_val == 'isolated': + ip = delta_key.split('/')[1] if '/' in delta_key else None + if ip: + host = global_state.all_hosts.get(ip) + if host and host.compromised_by != 'None': + reward += 5.0 # Correctly quarantined a compromised host + else: + reward -= 2.0 # False positive — isolated a clean host + + # Successful restoration/cleanup + elif 'privilege' in delta_key and delta_val == 'None': + reward += 3.0 # Successfully cleaned a host + + elif 'status' in delta_key and delta_val == 'online': + reward += 1.0 # Restored connectivity + + # Deployed a decoy + elif 'decoy' in delta_key and delta_val != 'inactive': + reward += 1.0 # Proactive defense + + # Failed to prevent Kinetic Destruction of SCADA framework + elif 'system_integrity' in delta_key and delta_val == 'kinetic_destruction': + reward -= 10000.0 + + # ── Per-step network health (small, bounded) ────────── + # Only count the RATIO of healthy hosts to avoid scale issues + total_hosts = max(len(global_state.all_hosts), 1) + healthy = sum( + 1 + for h in global_state.all_hosts.values() + if h.compromised_by == 'None' and h.status != 'isolated' + ) + health_ratio = healthy / total_hosts + reward += health_ratio * 1.0 # 0.0 to 1.0 per step + + # Small penalty proportional to compromised hosts + compromised = sum( + 1 + for h in global_state.all_hosts.values() + if h.compromised_by != 'None' and h.status != 'isolated' + ) + if compromised > 0: + reward -= (compromised / total_hosts) * 2.0 # 0.0 to -2.0 + + # ── Business Downtime Penalty (Economics) ───────────── + total_isolated = sum( + 1 for h in global_state.all_hosts.values() + if h.status == 'isolated' and h.subnet_cidr != '169.254.0.0/16' + ) + if total_isolated > 0: + business_loss = (total_isolated / total_hosts) * 5.0 + global_state.business_downtime_score += business_loss + reward -= business_loss + + # ── Failed action penalty ───────────────────────────── + if effect and not effect.success: + reward -= 0.1 + + return reward + + def check_termination(self, global_state: 'GlobalNetworkState') -> Dict[str, bool]: + terminate = {agent: False for agent in self.agents} + + # If all critical Corporate/Secure hosts are compromised, episode ends + target_hosts = [ + h + for h in global_state.all_hosts.values() + if h.subnet_cidr in ['10.0.0.0/24', '10.0.1.0/24'] + ] + + if target_hosts and all( + getattr(h, 'system_integrity', 'clean') == 'compromised' + for h in target_hosts + ): + return {agent: True for agent in self.agents} + + # Terminal Fail-State for Blue: A PLC has been physically destroyed + if any(getattr(h, 'system_integrity', 'clean') == 'kinetic_destruction' for h in global_state.all_hosts.values()): + return {agent: True for agent in self.agents} + + return terminate diff --git a/netforge_rl/topologies/network_generator.py b/netforge_rl/topologies/network_generator.py new file mode 100644 index 0000000..34e8300 --- /dev/null +++ b/netforge_rl/topologies/network_generator.py @@ -0,0 +1,156 @@ +import random +import yaml +from pathlib import Path +from typing import Optional +from netforge_rl.core.state import GlobalNetworkState, Subnet, Host + + +class NetworkGenerator: + """Procedurally generates or loads dynamic network topologies for MARL + + training. + + Prevents agents from overfitting to a static 10-node architecture. + """ + + def __init__(self, config_path: Optional[str] = None): + self.config_path = config_path + + def generate(self, seed: Optional[int] = None) -> GlobalNetworkState: + """Generates the architecture. + + If a config path was provided, loads deterministically. + Otherwise, procedurally generates a randomized topology. + """ + if seed is not None: + random.seed(seed) + + if self.config_path and Path(self.config_path).exists(): + return self._load_from_yaml(self.config_path) + + return self._generate_procedural() + + def _generate_procedural(self) -> GlobalNetworkState: + """Creates a randomized network using NetworkX hierarchical patterns. + + Enforces a constant size of 50 hosts for Neural Network dimension consistency. + Active topology spans 15-30 nodes; the rest are instantiated as inactive padding. + """ + import networkx as nx + + state = GlobalNetworkState() + G = nx.DiGraph() + + # Generate hierarchy parameters + num_subnets = random.randint(3, 4) + subnet_names = ['DMZ', 'Corporate', 'Secure', 'Guest'][:num_subnets] + base_ips = ['192.168.1', '10.0.0', '10.0.1', '172.16.0'][:num_subnets] + + # 25% Chance to spawn a critical Cyber-Physical OT Subnet + if random.random() < 0.25: + subnet_names.append('OT_Subnet') + base_ips.append('10.0.99') + + active_hosts = [] + domain_controllers = [] + + # Build Subnets and distribute hosts + for i, name in enumerate(subnet_names): + cidr = f'{base_ips[i]}.0/24' + subnet = Subnet(cidr=cidr, name=name) + state.add_subnet(subnet) + + # Weight more hosts into Corp and Secure zones + num_hosts = random.randint(3, 8) if name in ['Corporate', 'Secure'] else random.randint(2, 5) + + for j in range(1, num_hosts + 1): + host_ip = f'{base_ips[i]}.{j * random.randint(1, 3)}' + + # Check for duplicates due to random gap intervals + while host_ip in [h.ip for h in active_hosts]: + host_ip = f'{base_ips[i]}.{j * random.randint(1, 10)}' + + host = Host(ip=host_ip, hostname=f'{name}_Node_{j}', subnet_cidr=cidr) + + # Assign Decoys vs Real Systems + if random.random() < 0.15 and name != 'OT_Subnet': + host.decoy = random.choice(['Apache', 'SSHD', 'Tomcat', 'active']) + else: + if name == 'OT_Subnet': + chosen_os = 'PLC_Firmware' + chosen_services = ['Modbus', 'S7Comm'] + potential_cves = ['CVE-2010-2772', 'Stuxnet_0day'] + setattr(host, 'temperature', float(random.randint(40, 60))) + setattr(host, 'pressure', float(random.randint(90, 110))) + else: + profiles = [ + ('Windows_Server_2016', ['SMB', 'IIS'], ['MS17-010', 'CVE-2021-44228']), + ('Windows_10', ['RDP', 'SMB'], ['CVE-2019-0708', 'MS17-010']), + ('Linux_Ubuntu', ['SSH', 'Apache'], ['CVE-2021-44228', 'V4L2']), + ('Linux_CentOS', ['SSH', 'Tomcat'], ['CVE-2021-44228']), + ] + chosen_os, chosen_services, potential_cves = random.choice(profiles) + + host.os = chosen_os + host.services = chosen_services + host.cvss_score = round(random.uniform(3.5, 9.8), 1) + + # Human error dynamics: Linux admins fall for phishing less often than generalized Windows Corporate users + base_phish = random.uniform(0.1, 0.4) if 'Linux' in chosen_os else random.uniform(0.3, 0.9) + host.human_vulnerability_score = round(base_phish, 2) + + num_vulns = random.randint(0, min(2, len(potential_cves))) + host.vulnerabilities = random.sample(potential_cves, num_vulns) + + # Designate Domain Controllers only in Corp or Secure Windows servers + if 'Windows' in chosen_os and name in ['Corporate', 'Secure']: + if random.random() < 0.3: + domain_controllers.append(host) + + active_hosts.append(host) + state.register_host(host) + G.add_node(host.ip, type=name) + + # Assure at least 1 Domain Controller exists + if domain_controllers: + random.choice(domain_controllers).is_domain_controller = True + else: + # Force upgrade a random Windows host + win_hosts = [h for h in active_hosts if 'Windows' in h.os] + if win_hosts: + random.choice(win_hosts).is_domain_controller = True + + # Fill strictly to 50 nodes for Neural Network shape constant + padding_needed = 50 - len(state.all_hosts) + for p in range(padding_needed): + pad_ip = f'169.254.0.{p+1}' + pad_host = Host(ip=pad_ip, hostname=f'Pad_Node_{p}', subnet_cidr='169.254.0.0/16') + pad_host.status = 'isolated' # Native Action Masking bounds + state.register_host(pad_host) + + self._configure_procedural_vision(state) + return state + + def _configure_procedural_vision(self, state: GlobalNetworkState): + """Builds fog-of-war vision depending on the layout.""" + # Red baseline starts in DMZ + for host in state.all_hosts.values(): + if host.subnet_cidr == '192.168.1.0/24' and host.status != 'isolated': + state.update_knowledge('red_commander', host.ip) + state.update_knowledge('red_operator', host.ip) + break + + # Blue knows all active topology natively but is blind to zero-padded isolated objects + for host in state.all_hosts.values(): + if host.status != 'isolated': + state.update_knowledge('blue_commander', host.ip) + state.update_knowledge('blue_operator', host.ip) + + def _load_from_yaml(self, path: str) -> GlobalNetworkState: + """Loads a deterministic graph from a YAML configuration.""" + with open(path, 'r') as f: + _ = yaml.safe_load(f) + + # Implementation left for future expansion if YAML is required. + # Defaults to procedural if parsing fails. + return self._generate_procedural() From af35ad1f3715bbd2abdeb384ebaf24556c94cea7 Mon Sep 17 00:00:00 2001 From: Igor Jankowski Date: Mon, 30 Mar 2026 20:00:57 +0200 Subject: [PATCH 04/10] feat: advanced combat mechanics Added OverloadPLC termination rewards, DMZ SpearPhishing bypass, SecurityAwareness mitigation logic, and RAM-seeded Honeytoken Active Deception. --- netforge_rl/actions/blue/deception.py | 242 +++++++++++++++ netforge_rl/actions/blue/mitigation.py | 276 ++++++++++++++++++ netforge_rl/actions/red/kinetic.py | 75 +++++ netforge_rl/actions/red/social_engineering.py | 66 +++++ 4 files changed, 659 insertions(+) create mode 100644 netforge_rl/actions/blue/deception.py create mode 100644 netforge_rl/actions/blue/mitigation.py create mode 100644 netforge_rl/actions/red/kinetic.py create mode 100644 netforge_rl/actions/red/social_engineering.py diff --git a/netforge_rl/actions/blue/deception.py b/netforge_rl/actions/blue/deception.py new file mode 100644 index 0000000..a07c1ed --- /dev/null +++ b/netforge_rl/actions/blue/deception.py @@ -0,0 +1,242 @@ +from netforge_rl.core.action import BaseAction, ActionEffect +from netforge_rl.core.registry import action_registry + + + +@action_registry.register('blue_commander', 0) +class DeployDecoy(BaseAction): + """Deploys a generic high-interaction honeypot/decoy service to a target + + host. + + Simulates services like Femitter or blank Apache instances to bait Red Agent + exploitation attempts, subsequently flagging them in Blue telemetry. + + Args: + agent_id (str): Target Blue agent orchestrator. + target_ip (str): IP address hosting the new decoy daemon. + """ + + def __init__(self, agent_id: str, target_ip: str): + super().__init__(agent_id, target_ip=target_ip) + + def validate(self, global_state) -> bool: + """Validates target host existence natively within the network map. + + Args: + global_state (GlobalNetworkState): Simulation snapshot. + + Returns: + bool: Always True assuming orchestrator controls the infrastructure. + """ + return True + + def execute(self, global_state) -> ActionEffect: + """Updates the host configuration to actively broadcast a decoy + + profile. + + Args: + global_state (GlobalNetworkState): The current baseline state. + + Returns: + ActionEffect: A structural delta setting 'decoy' status to 'active'. + """ + return ActionEffect( + success=True, + state_deltas={f'hosts/{self.target_ip}/decoy': 'active'}, + observation_data={'decoy_deployed': self.target_ip}, + ) + + +@action_registry.register('blue_commander', 1) +class DecoyApache(BaseAction): + """Deploys a specifically profiled Apache Web Server (Port 80) honeypot. + + Deceives Nmap scans and absorbs associated web-based Remote File Inclusion (RFI) exploits. + + Args: + agent_id (str): Target Blue agent orchestrator. + target_ip (str): IP address hosting the new decoy daemon. + """ + + def __init__(self, agent_id: str, target_ip: str): + super().__init__(agent_id, target_ip=target_ip) + + def validate(self, global_state) -> bool: + """Validation pre-checks for port conflicts and architecture readiness. + + Args: + global_state: Baseline state. + + Returns: + bool: Action clearance bool. + """ + return True + + def execute(self, global_state) -> ActionEffect: + """Applies the decoy instantiation physics. + + Args: + global_state: Baseline state. + + Returns: + ActionEffect: Delta converting the host to an Apache decoy sinkhole. + """ + return ActionEffect( + success=True, + state_deltas={f'hosts/{self.target_ip}/decoy': 'Apache'}, + observation_data={'decoy_deployed': f'Apache on {self.target_ip}'}, + ) + + +@action_registry.register('blue_commander', 2) +class DecoySSHD(BaseAction): + """Deploys a fake SSH daemon (Port 22) honeypot specifically designed to + + bait brute force actions. + + Args: + agent_id (str): Target Blue agent orchestrator. + target_ip (str): IP address hosting the new decoy daemon. + """ + + def __init__(self, agent_id: str, target_ip: str): + super().__init__(agent_id, target_ip=target_ip) + + def validate(self, global_state) -> bool: + """ + Args: + global_state: Baseline state. + + Returns: + bool: Action clearance bool. + """ + return True + + def execute(self, global_state) -> ActionEffect: + """ + Args: + global_state: Baseline state. + + Returns: + ActionEffect: Sets the 'decoy' string to 'SSHD'. + """ + return ActionEffect( + success=True, + state_deltas={f'hosts/{self.target_ip}/decoy': 'SSHD'}, + observation_data={'decoy_deployed': f'SSHD on {self.target_ip}'}, + ) + + +@action_registry.register('blue_commander', 3) +class DecoyTomcat(BaseAction): + """Deploys a fake Tomcat server (Port 8080) to deceive application port + + scans. + + Args: + agent_id (str): Target Blue agent orchestrator. + target_ip (str): IP address hosting the new decoy daemon. + """ + + def __init__(self, agent_id: str, target_ip: str): + super().__init__(agent_id, target_ip=target_ip) + + def validate(self, global_state) -> bool: + """ + Args: + global_state: Baseline state. + + Returns: + bool: Action clearance bool. + """ + return True + + def execute(self, global_state) -> ActionEffect: + """ + Args: + global_state: Baseline state. + + Returns: + ActionEffect: Sets the 'decoy' string to 'Tomcat'. + """ + return ActionEffect( + success=True, + state_deltas={f'hosts/{self.target_ip}/decoy': 'Tomcat'}, + observation_data={'decoy_deployed': f'Tomcat on {self.target_ip}'}, + ) + + +@action_registry.register('blue_commander', 4) +class Misinform(BaseAction): + """Injects false host telemetry or alters logging infrastructure to feed + + Red agents fake data. + + Disrupts adversary situational awareness by generating spoofed observations. + + Args: + agent_id (str): Target Blue agent ID. + target_ip (str): IP address of the target infrastructure component. + """ + + def __init__(self, agent_id: str, target_ip: str): + super().__init__(agent_id, target_ip=target_ip) + + def validate(self, global_state) -> bool: + """ + Args: + global_state: Baseline state. + + Returns: + bool: Action clearance bool. + """ + return True + + def execute(self, global_state) -> ActionEffect: + """ + Args: + global_state: Baseline state. + + Returns: + ActionEffect: Activates misinformation campaign flags logically. + """ + return ActionEffect( + success=True, + state_deltas={f'hosts/{self.target_ip}/misinformation': 'active'}, + observation_data={ + 'alert': f'Misinformation campaign active on {self.target_ip}.' + }, + ) + +@action_registry.register('blue_commander', 5) +class DeployHoneytoken(BaseAction): + """ + Injects fake, highly-monitored credentials into the memory space of a real host. + + If a Red agent successfully compromises this host and attempts to perform + post-exploitation (e.g., Pass-the-Hash, credential dumping), they ingest the + Honeytoken instead. This triggers an immediate, 100% confidence SIEM Alert + exposing the Red agent's exact location natively. + """ + + def __init__(self, agent_id: str, target_ip: str): + super().__init__( + agent_id, + target_ip=target_ip, + cost=5, + financial_cost=50, + duration=1 + ) + + def validate(self, global_state) -> bool: + return self.target_ip in global_state.all_hosts + + def execute(self, global_state) -> ActionEffect: + return ActionEffect( + success=True, + state_deltas={f'hosts/{self.target_ip}/contains_honeytokens': True}, + observation_data={'alert': f'Honeytokens actively deployed in RAM on {self.target_ip}.'}, + eta=self.duration + ) diff --git a/netforge_rl/actions/blue/mitigation.py b/netforge_rl/actions/blue/mitigation.py new file mode 100644 index 0000000..e800d75 --- /dev/null +++ b/netforge_rl/actions/blue/mitigation.py @@ -0,0 +1,276 @@ +from netforge_rl.core.action import BaseAction, ActionEffect +from netforge_rl.core.registry import action_registry +from netforge_rl.core.commands import UpdateHostStatusCommand, DropSessionCommand, BlockPortCommand +from netforge_rl.core.commands import UpdateHostStatusCommand, DropSessionCommand, BlockPortCommand + + + +@action_registry.register('blue_operator', 0) +class IsolateHost(BaseAction): + """Disconnects a compromised host completely from the network + + infrastructure. + + This prevents lateral movement or data exfiltration but incurs an availability + penalty on the Blue Team's scoring mechanism. + + Args: + agent_id (str): The unique identifier of the defending Blue agent. + target_ip (str): The IP address of the host to dynamically isolate. + """ + + def __init__(self, agent_id: str, target_ip: str): + super().__init__(agent_id, target_ip=target_ip) + + def validate(self, global_state) -> bool: + """Validates if the target host theoretically exists and can be + + isolated. + + Args: + global_state (GlobalNetworkState): Engine baseline state. + + Returns: + bool: True if the action passes initial routing or authority checks. + """ + return True + + def execute(self, global_state) -> ActionEffect: + """Implements the zero-trust isolation delta across the specific node. + + Args: + global_state (GlobalNetworkState): Snapshot of current network topology. + + Returns: + ActionEffect: A state delta changing the node's status to 'isolated'. + """ + return ActionEffect( + success=True, + state_deltas=[ + UpdateHostStatusCommand(self.target_ip, 'isolated'), + DropSessionCommand(self.target_ip) + ], + observation_data={'alert': 'Host isolated securely.'}, + ) + + +@action_registry.register('blue_operator', 1) +class RestoreHost(BaseAction): + """Re-establishes network connectivity for a previously isolated host. + + Reverses the `IsolateHost` effect, bringing the node back online and + restoring critical business availability. + + Args: + agent_id (str): The unique identifier of the defending Blue agent. + target_ip (str): The isolated node's IP address. + """ + + def __init__(self, agent_id: str, target_ip: str): + super().__init__(agent_id, target_ip=target_ip) + + def validate(self, global_state) -> bool: + """Evaluates requirements for network restoration natively. + + Args: + global_state (GlobalNetworkState): Engine baseline state. + + Returns: + bool: Always True in base simulation constraints. + """ + return True + + def execute(self, global_state) -> ActionEffect: + """Removes the isolation quarantine delta from the designated host. + + Args: + global_state: Network configuration array. + + Returns: + ActionEffect: State delta returning the status to 'online'. + """ + return ActionEffect( + success=True, + state_deltas={ + f'hosts/{self.target_ip}/status': 'online', + f'hosts/{self.target_ip}/privilege': 'None', + f'hosts/{self.target_ip}/compromised_by': 'None', + }, + observation_data={'alert': 'Host restored and cleaned.'}, + ) + + +@action_registry.register('blue_operator', 4) +class Remove(BaseAction): + """Evicts unauthorized threat actors from a compromised element. + + Targets and kills anomalous processes, rolling local user privileges back + to a stable state without requiring a full system format. + + Args: + agent_id (str): Referencing Blue agent framework ID. + target_ip (str): Host identifier. + """ + + def __init__(self, agent_id: str, target_ip: str): + super().__init__(agent_id, target_ip=target_ip) + + def validate(self, global_state) -> bool: + """Checks Blue operational bounds prior to execution. + + Args: + global_state: Reference engine configuration. + + Returns: + bool: Action clearance bool. + """ + return True + + def execute(self, global_state) -> ActionEffect: + """Translates the threat eviction into a measurable privilege reduction + + delta. + + Args: + global_state: Reference engine configuration. + + Returns: + ActionEffect: State vector scrubbing privilege parameters down to 'None'. + """ + return ActionEffect( + success=True, + state_deltas={ + f'hosts/{self.target_ip}/privilege': 'None', + f'hosts/{self.target_ip}/compromised_by': 'None', + }, + observation_data={'alert': 'Unauthorized access removed.'}, + ) + + +@action_registry.register('blue_operator', 5) +class RestoreFromBackup(BaseAction): + """Executes a bare-metal imaging recovery to purge advanced persistent + + threats (APTs). + + An extreme but definitive mitigation vector that eradicates persistent malware, + but takes significantly more time and cost than localized `Remove` actions. + + Args: + agent_id (str): The orchestrating Blue agent ID string. + target_ip (str): The endpoint requiring catastrophic intervention. + """ + + def __init__(self, agent_id: str, target_ip: str): + super().__init__(agent_id, target_ip=target_ip) + + def validate(self, global_state) -> bool: + """Ensures execution feasibility regarding orchestration limits. + + Args: + global_state: Simulation context. + + Returns: + bool: Access valid. + """ + return True + + def execute(self, global_state) -> ActionEffect: + """Computes a comprehensive reversion of the host node's state back to + + pristine. + + Args: + global_state: Simulation context. + + Returns: + ActionEffect: Multi-delta payload restoring privileges, uptime, and system integrity simultaneously. + """ + return ActionEffect( + success=True, + state_deltas={ + f'hosts/{self.target_ip}/privilege': 'None', + f'hosts/{self.target_ip}/status': 'online', + f'hosts/{self.target_ip}/system_integrity': 'clean', + }, + observation_data={'alert': 'Host restored from backup image.'}, + ) + + +@action_registry.register('blue_operator', 6) +class ConfigureACL(BaseAction): + """ + Dynamically modifies the implicit routing Firewall to block specific port + traffic inbound to a protected subnet. + + Args: + agent_id (str): Operating Blue Agent ID. + target_subnet (str): The CIDR block of the subnet to protect (e.g., '10.0.1.0/24'). + port (int): The destination port to drop (e.g., 445). + """ + + def __init__(self, agent_id: str, target_subnet: str, port: int): + super().__init__(agent_id, target_ip=target_subnet, cost=2) + self.port = port + + def validate(self, global_state) -> bool: + """ + Validates if the target subnet exists in the architecture. + """ + return self.target_ip in global_state.subnets + + def execute(self, global_state) -> ActionEffect: + """ + Calculates the physics delta to apply the firewall block rule. + """ + return ActionEffect( + success=True, + state_deltas=[ + BlockPortCommand(self.target_ip, self.port) + ], + observation_data={ + 'alert': f'ACL configured: Drop Port {self.port} to {self.target_ip}' + }, + ) + +@action_registry.register('blue_operator', 7) +class SecurityAwarenessTraining(BaseAction): + """ + Deploys rapid, intensive anti-phishing training to a targeted subnet. + + Temporarily slashes the `human_vulnerability_score` of all users in the subset, + drastically lowering the success rate of Red Team SpearPhishing campaigns. + Costs significant Financial budget due to operational lost time. + """ + + def __init__(self, agent_id: str, target_subnet: str): + super().__init__( + agent_id, + target_ip=target_subnet, + cost=2, + financial_cost=2000, + duration=3 + ) + + def validate(self, global_state) -> bool: + return self.target_ip in global_state.subnets + + def execute(self, global_state) -> ActionEffect: + subnet = global_state.subnets.get(self.target_ip) + if not subnet: + return ActionEffect(success=False, state_deltas={}, observation_data={}) + + deltas = {} + for host in subnet.hosts.values(): + if hasattr(host, 'human_vulnerability_score'): + current_score = host.human_vulnerability_score + # Slash vulnerability by 80% + new_score = round(current_score * 0.2, 2) + deltas[f'hosts/{host.ip}/human_vulnerability_score'] = new_score + + return ActionEffect( + success=True, + state_deltas=deltas, + observation_data={'alert': f'Security Awareness Training completed on {self.target_ip}. Vulnerability drastically lowered.'}, + eta=self.duration + ) diff --git a/netforge_rl/actions/red/kinetic.py b/netforge_rl/actions/red/kinetic.py new file mode 100644 index 0000000..4c150c3 --- /dev/null +++ b/netforge_rl/actions/red/kinetic.py @@ -0,0 +1,75 @@ +from netforge_rl.core.action import BaseAction, ActionEffect +from netforge_rl.core.registry import action_registry + +@action_registry.register('red_operator', 20) +class OverloadPLC(BaseAction): + """Initiates a devastating Kinetic Impact on a compromised Cyber-Physical OT Node. + + If the target PLC is compromised to Root level and belongs to the Operational Technology + infrastructure, this action radically forces hardware temperatures past catastrophic thresholds. + + Args: + agent_id (str): Reference string for the agent executing the attack. + target_ip (str): IP address belonging to a PLC_Firmware host inside the OT_Subnet. + """ + + def __init__(self, agent_id: str, target_ip: str): + super().__init__( + agent_id, + target_ip=target_ip, + cost=20, # High Energy + financial_cost=1000, # High Financial Requirement + duration=10, + required_prior_state='DiscoverNetworkServices' + ) + + def validate(self, global_state) -> bool: + """Ensures the target exists, is routeable, and is ACTUALLY an OT device.""" + if not super().validate(global_state): + return False + + host = global_state.all_hosts.get(self.target_ip) + if not host: + return False + + # Target must be physically OT Infrastructure + if host.os != 'PLC_Firmware' or host.subnet_cidr != '10.0.99.0/24': + return False + + # Red must have maximum execution privileges over the mechanical interface + if host.privilege != 'Root': + return False + + return global_state.can_route_to(self.target_ip) + + def execute(self, global_state) -> ActionEffect: + import random + + host = global_state.all_hosts.get(self.target_ip) + if not host: + return ActionEffect(success=False, state_deltas={}, observation_data={}) + + current_temperature = getattr(host, 'temperature', 50.0) + + # Inject rapid thermal escalation +90 to +150 delta + spike = random.uniform(90.0, 150.0) + new_temp = current_temperature + spike + + deltas = { + f'hosts/{self.target_ip}/temperature': new_temp, + f'hosts/{self.target_ip}/system_integrity': 'kinetic_destruction' + } + + # Stuxnet-level alert severity for the Blue Team SIEM + obs_data = { + 'action': 'overload_plc', + 'status': 'kinetic_impact_achieved', + 'terminal_temperature': new_temp + } + + return ActionEffect( + success=True, + state_deltas=deltas, + observation_data=obs_data, + eta=self.duration + ) diff --git a/netforge_rl/actions/red/social_engineering.py b/netforge_rl/actions/red/social_engineering.py new file mode 100644 index 0000000..93c4cd6 --- /dev/null +++ b/netforge_rl/actions/red/social_engineering.py @@ -0,0 +1,66 @@ +import random +from netforge_rl.core.action import BaseAction, ActionEffect +from netforge_rl.core.commands import EstablishSessionCommand, UpdateHostPrivilegeCommand +from netforge_rl.core.registry import action_registry + +@action_registry.register('red_operator', 21) +class SpearPhishing(BaseAction): + """Executes a targeted Social Engineering campaign against a Corporate End-User. + + Unlike standard Exploits, SpearPhishing leverages email protocols and bypasses + perimeter firewalls and DMZ routing constraints entirely. Its success probability + is purely dictated by the `human_vulnerability_score` of the human operator + assigned to the generated Endpoint, simulating clicks on malicious attachments. + + Args: + agent_id (str): Reference ID of the Red operating unit. + target_ip (str): IP address of the target User Node (typically Corporate/Secure subnet). + """ + + def __init__(self, agent_id: str, target_ip: str): + super().__init__( + agent_id, + target_ip=target_ip, + cost=2, + financial_cost=50, # Costs minor operational budget to purchase domain infrastructure + duration=15, # High duration (waiting for users to organically check email) + required_prior_state=None # Can be shot blindly without structural discovery + ) + + def validate(self, global_state) -> bool: + """Overrides parent zone constraints to simulate out-of-band email protocol delivery.""" + if not self.target_ip or self.target_ip not in global_state.all_hosts: + return False + + host = global_state.all_hosts[self.target_ip] + # Nobody reads emails on PLCs or dedicated Servers; restricted to Windows Endpoints here + if 'Windows' not in getattr(host, 'os', ''): + return False + + return True + + def execute(self, global_state) -> ActionEffect: + host = global_state.all_hosts.get(self.target_ip) + + # Pull the phishability score generated procedurally + phish_chance = getattr(host, 'human_vulnerability_score', 0.1) + + if random.random() > phish_chance: + return ActionEffect( + success=False, + state_deltas=[], + observation_data={'phishing': 'failed', 'reason': 'user reported suspicious email'} + ) + + # Build OOP Delta List granting User-level reverse shell from the clicked attachment + deltas = [ + UpdateHostPrivilegeCommand(self.target_ip, 'User', compromised_by=self.agent_id), + EstablishSessionCommand(self.agent_id, self.target_ip, port=443) # Emulate C2 over HTTPS + ] + + return ActionEffect( + success=True, + state_deltas=deltas, + observation_data={'phishing': 'success', 'status': 'C2 Session Established via user execution'}, + eta=self.duration + ) From ec1354357c158c523f3b382882781f5e8e9b1668 Mon Sep 17 00:00:00 2001 From: Igor Jankowski Date: Mon, 30 Mar 2026 20:02:14 +0200 Subject: [PATCH 05/10] feat: scaling infrastructure to PyTorch LSTMs --- netforge_rl/models/recurrent_mask_model.py | 93 ++++++++++++++++++++++ 1 file changed, 93 insertions(+) create mode 100644 netforge_rl/models/recurrent_mask_model.py diff --git a/netforge_rl/models/recurrent_mask_model.py b/netforge_rl/models/recurrent_mask_model.py new file mode 100644 index 0000000..edc1fe3 --- /dev/null +++ b/netforge_rl/models/recurrent_mask_model.py @@ -0,0 +1,93 @@ +import numpy as np +import torch +from torch import nn + +from ray.rllib.models.torch.recurrent_net import RecurrentNetwork as TorchRNN +from ray.rllib.utils.annotations import override +from ray.rllib.utils.typing import ModelConfigDict, TensorType +from typing import Dict, List, Tuple + + +class MaskedLSTMModel(TorchRNN, nn.Module): + """ + A custom PyTorch model integrating native RLlib LSTM cells with strict Action Masking. + + We subclass TorchRNN to allow Ray to handle complex `seq_lens` padding and tensor + BPTT dimension tracking natively. We extract the mask out of the flattened array manually. + """ + + def __init__( + self, + obs_space, + action_space, + num_outputs: int, + model_config: ModelConfigDict, + name: str, + ): + nn.Module.__init__(self) + super().__init__(obs_space, action_space, num_outputs, model_config, name) + + self.cell_size = model_config.get("custom_model_config", {}).get("lstm_cell_size", 128) + + # 1. Feature Extractor (Dense Layers) + # Input size is 256 sliced from the flattened 318 Dict space + self.fc1 = nn.Linear(256, 128) + self.fc2 = nn.Linear(128, 128) + + # 2. LSTM Memory Unit + self.lstm = nn.LSTM( + input_size=128, + hidden_size=self.cell_size, + batch_first=True, + ) + + # 3. Action Type & Logit Masking Arrays + self.action_branch = nn.Linear(self.cell_size, num_outputs) + self.value_branch = nn.Linear(self.cell_size, 1) + + self._cur_value = None + + @override(TorchRNN) + def forward_rnn( + self, inputs: TensorType, state: List[TensorType], seq_lens: TensorType + ) -> Tuple[TensorType, List[TensorType]]: + + # Ray flatly concatenates spaces in alphanumeric order. + # action_mask Box(62) + # obs Box(256) + # Therefore: action_mask is [:62], obs is [62:] + action_mask = inputs[:, :, :62] + obs = inputs[:, :, 62:] + + # 1. Core Embeddings over Observation Sequence + x = nn.functional.relu(self.fc1(obs)) + x = nn.functional.relu(self.fc2(x)) + + # 2. Evaluate Temporal Memory + h_in, c_in = state[0].unsqueeze(0), state[1].unsqueeze(0) + x, (h_out, c_out) = self.lstm(x, (h_in, c_in)) + + # 3. Finalize Output Logit Distribution Branches + logits = self.action_branch(x) + self._cur_value = torch.reshape(self.value_branch(x), [-1]) + + # 4. Apply Action Mask dynamically over the sequence batch + masked_logits = torch.where( + action_mask == 0.0, + torch.tensor(-1e10, device=logits.device, dtype=logits.dtype), + logits + ) + + return masked_logits, [h_out.squeeze(0), c_out.squeeze(0)] + + @override(TorchRNN) + def value_function(self) -> TensorType: + assert self._cur_value is not None, "Evaluate forward_rnn() before value_function() call." + return self._cur_value + + @override(TorchRNN) + def get_initial_state(self) -> List[TensorType]: + return [ + torch.zeros(self.cell_size, dtype=torch.float32), + torch.zeros(self.cell_size, dtype=torch.float32) + ] From 28a56dc5e3efc8498421de23cb3323ae350d61dd Mon Sep 17 00:00:00 2001 From: Igor Jankowski Date: Mon, 30 Mar 2026 20:02:54 +0200 Subject: [PATCH 06/10] docs: formal SOTA capability tracking --- README.md | 64 ++++++++++++++++++++++++------------------------------- 1 file changed, 28 insertions(+), 36 deletions(-) diff --git a/README.md b/README.md index 95e47c2..37b09aa 100644 --- a/README.md +++ b/README.md @@ -6,14 +6,18 @@ Multi-Agent Reinforcement Learning (MARL) cybersecurity simulator mathematically **Project:** NetForge RL **GNN-based Policy Model:** https://github.com/elprofesoriqo/GNN-based-Policy-Model-for-MARL-Cyber -## Architectural Overhaul Notice +## Architectural Changes & State-of-the-Art Modeling -This repository represents a complete structural redesign of the original CybORG framework. I took ownership of this branch because the legacy CybORG environment was fundamentally restricted to single-agent, turn-based paradigms (utilizing nested OpenAI Gym wrappers) which artificially broke parallel gradients and hindered true Multi-Agent research. +This repository is a dramatic evolution from the legacy CybORG / CAGE challenge environment. While acknowledging the incredible fundamental work by DSTG, NetForge RL transitions the paradigm from a synchronous, fully observable game into a high-fidelity, physically constrained network simulation designed for real-world Sim-to-Real transfer. ### What is Different? -1. **Parallel Execution via PettingZoo:** The core simulator is now strictly built upon the `pettingzoo.ParallelEnv` standard instead of monolithic Gym wrappers. Red and Blue teams act in a simultaneous time vacuum, and the engine natively resolves their conflicting action intents. -2. **Abstract Action Engine:** Actions no longer mutate simulator state directly via complex monolithic switch statements. `BaseAction` computes an `ActionEffect` (JSON representation of physical network impact), which the core environment evaluates and securely commits. -3. **No Legacy Bloat:** I have deleted all obsolete OpenAI Gym references, redundant CAGE challenge sub-modules, and unneeded demo code. +1. **Interruptible Tick-Based Engine:** CybORG's instantaneous actions are gone. NetForge RL runs on an asynchronous `current_tick` clock. Actions have a `duration` natively. Real-time interruptions exist: if the SOC isolates a host mid-exfiltration, the attacker's action is aborted. +2. **Strict POMDP Isolation & Fog of War:** Defenders do not see the ground truth. They receive dynamic telemetry alerts generated by a newly implemented `siem_log_buffer` suffering from realistic `log_latency`. Background noise agents obfuscate true malicious alerts. +3. **MultiDiscrete Tensors & Procedural Networks:** To avoid static overfitting and combinatorial explosions, Action spaces utilize `MultiDiscrete` Arrays (e.g. `[ActionType, TargetIP]`). Topologies procedurally generate up to 50 active nodes utilizing padded masking dynamically. +4. **Attack Economics & Cost Mechanics:** Each agent is bounded by Operational Budgets (`agent_funds`, `agent_compute`). Reckless defensive isolation triggers massive Business Downtime mathematical penalties mirroring real-world SLA fines. +5. **Cyber-Physical (OT) Convergence:** Generating distinct `OT_Subnets` featuring `PLC` nodes mapping thermodynamic vulnerabilities. Red operators can inflict catastrophic Kinetic Impacts `(+10000/-10000 rewards)` overriding logical state tracking entirely. +6. **Social Engineering (Stochastics):** DMZ architectures can natively be bypassed by Red teams leveraging `SpearPhishing` arrays scaled against dynamically rolled `human_vulnerability_score` matrix properties. Blue counters this via explicit `SecurityAwarenessTraining` capital expenditure. +7. **Ray RLlib & PyTorch LSTMs:** Packaged natively with Custom PyTorch Models linking Recurrent Memory sequences (LSTMs) alongside mathematical boolean Action Masking dropping invalid tensor networks natively out-of-the-box. ### Simulator Architecture Flow @@ -37,10 +41,10 @@ graph TD The environment is designed to be highly plug-and-play. ```python -from marl_cyborg.environment.parallel_env import ParallelMarlCyborg +from netforge_rl.environment.parallel_env import NetForgeRLEnv # Instantiate the native PettingZoo environment -env = ParallelMarlCyborg(scenario_config={}) +env = NetForgeRLEnv(scenario_config={}) # Reset to get parallel Gymnasium boxes observations, infos = env.reset() @@ -53,50 +57,38 @@ print("Blue Box:", observations["Blue"]) The primary reason for this fork is extensibility. Want to add an *ARP Poisoning* attack? -Simply inherit the `BaseAction` inside `marl_cyborg/actions/network/arp_poison.py`, write how it modifies the theoretical `ActionEffect`, and the engine natively calculates the physics resolution. See `marl_cyborg.actions.network.ip_fragmentation.IPFragmentationAction` for a physical example of this structural implementation. +Simply inherit the `BaseAction` inside `netforge_rl/actions/network/arp_poison.py`, write how it modifies the theoretical `ActionEffect`, and the engine natively calculates the physics resolution. See `netforge_rl.actions.network.ip_fragmentation.IPFragmentationAction` for a physical example of this structural implementation. ## License & Accreditation This project is built upon the foundational work provided by the original CybORG contributors (CyberSecurityCRC / DSTG). The core internal simulator physics remain preserved, while the outward translation layers, action hierarchy, and Multi-Agent APIs have been entirely redesigned by Igor Jankowski. ## Repository Structure -- `marl_cyborg/`: Core simulation environment +- `netforge_rl/`: Core simulation environment - `actions/`: Contains definitions for all `BaseAction` implementations. - - `red_actions.py`: Red team offensive actions. - - `blue_actions.py`: Blue team defensive actions. - - `core/`: State, Observation, and Action abstract base classes. + - `agents/`: Contains specialized algorithmic actors like `GreenAgent` (Background Noise simulation). + - `core/`: State, Observation, and Action abstract base classes enforcing physical constraints. - `environment/`: - - `parallel_env.py`: The primary PettingZoo MARL environment. + - `parallel_env.py`: The primary asynchronous PettingZoo MARL environment. - `pcap_synthesizer.py`: Generates synthetic offline `.pcap` network traffic mappings. - `train_curriculum.py`: Example RL training script. - `test_physics.py`: Physics unit tests. ## Available Actions -All actions are natively available to the RL models through the environment's discrete action space (`Discrete(256)`). The engine dynamically scales and maps these 11 actions per team against all available network IPs. +All actions are natively available to the RL models through the environment's `MultiDiscrete` action space mapped seamlessly via PyTorch Logit structures. ### Red Team (Offensive) -1. **NetworkScan**: Scans a target subnet for active IP addresses. -2. **DiscoverRemoteSystems**: Performs a Ping Sweep to pinpoint active hosts. -3. **DiscoverNetworkServices**: Port scans a host to enumerate running services. -4. **ExploitRemoteService**: Exploits a vulnerability on a target IP to gain User privileges. -5. **PrivilegeEscalate**: Escalates from User to Root access. -6. **Impact**: Destroys/encrypts data on a compromised host (Ransomware/Wiper). -7. **ExploitBlueKeep**: Exploits RDP (CVE-2019-0708) on Port 3389. -8. **ExploitEternalBlue**: Exploits SMB (MS17-010) on Port 445. -9. **ExploitHTTP_RFI**: Remote File Inclusion exploit targeting Port 80. -10. **JuicyPotato**: Local privilege escalation via DCOM (Windows). -11. **V4L2KernelExploit**: Local privilege escalation via Video4Linux kernel vulns (Linux). +1. **NetworkScan / DiscoverRemoteSystems / DiscoverNetworkServices**: Passive/Active reconnaissance probing ports & ping sweeps. +2. **SpearPhishing**: Bypasses corporate structures directly exploiting human error factors inside user networks. +3. **ExploitRemoteService / ExploitEternalBlue...**: Gain user privileges weaponizing CVEs based on specific OS versions and open Ports. +4. **PrivilegeEscalate**: Pivot from constrained user constraints to `Root`/`System`. +5. **Impact**: Ransomware execution mapping standard IT failure metrics. +6. **OverloadPLC (Kinetic)**: Weaponizes thermodynamics on compromised OT Networks forcing episode kinetic destruction sequences. ### Blue Team (Defensive) -1. **IsolateHost**: Disconnects a host completely from the network. -2. **RestoreHost**: Brings an isolated host back online from a clean snapshot. -3. **Monitor**: Actively monitors traffic on a specific subnet or host for anomalies. -4. **Analyze**: Deep scans a specific host for malware signatures or unauthorized user activity. -5. **DeployDecoy**: Deploys a generic fake service (Apache/Tomcat/Femitter) to bait attackers. -6. **Remove**: Removes unauthorized user privileges. -7. **RestoreFromBackup**: Purges an infected host and restores it to a clean baseline from a backup. -8. **DecoyApache**: Deploys a fake Apache web server (Port 80) honeypot. -9. **DecoySSHD**: Deploys a fake SSH daemon (Port 22) honeypot. -10. **DecoyTomcat**: Deploys a fake Tomcat server (Port 8080) honeypot. -11. **Misinform**: Injects false host telemetry or alters logging to feed Red agents fake data. +1. **IsolateHost / RestoreHost**: Logical quarantining of suspected nodes (Incurs heavily tracked SLA Business downtime). +2. **Monitor / Analyze**: Asynchronous deep network/host scans bypassing standard physical delays. +3. **SecurityAwarenessTraining**: Burns financial budget mathematically slashing organic `human_vulnerability_scores` defending against phish payloads. +4. **DeployHoneytoken (Active Deception)**: Secretly seeds RAM-based tokens triggering massive unevadable 0-delay severity 10 SIEM alerts when parsed by automated Red lateral mapping capabilities. +5. **DecoyApache / DecoySSHD / DeployDecoy**: Deploys visible port-80/22 traps binding attacker compute resources across dead execution loops. From d182b943327e87220bfee348447f7a0ebcf8b207 Mon Sep 17 00:00:00 2001 From: Igor Jankowski Date: Mon, 30 Mar 2026 20:04:58 +0200 Subject: [PATCH 07/10] refactor: transition engine hierarchy to NetForge_RL Completely purged legacy MARL configurations. Registered procedurally-generated topologies, Dictionary POMDP observations, and ConflictResolution physics engines mapped securely for Ray executions. --- marl_cyborg/actions/__init__.py | 57 --- marl_cyborg/actions/blue/__init__.py | 24 - marl_cyborg/actions/blue/deception.py | 204 -------- marl_cyborg/actions/blue/mitigation.py | 221 -------- marl_cyborg/actions/red/__init__.py | 26 - marl_cyborg/agents/__init__.py | 3 - marl_cyborg/agents/b_line.py | 74 --- marl_cyborg/core/action.py | 62 --- marl_cyborg/core/observation.py | 101 ---- marl_cyborg/environment/__init__.py | 4 - marl_cyborg/environment/parallel_env.py | 477 ------------------ marl_cyborg/scenarios/ransomware.py | 180 ------- marl_cyborg/topologies/network_generator.py | 104 ---- {marl_cyborg => netforge_rl}/__init__.py | 11 +- .../actions/blue/analysis.py | 6 +- .../actions/network/ip_fragmentation.py | 4 +- .../actions/red/coordination.py | 5 +- .../actions/red/exploits.py | 108 ++-- .../actions/red/impact.py | 40 +- .../actions/red/privilege_escalation.py | 53 +- .../actions/red/reconnaissance.py | 16 +- .../core/agent_interface.py | 4 +- netforge_rl/core/commands.py | 180 +++++++ netforge_rl/core/physics.py | 64 +++ netforge_rl/core/registry.py | 88 ++++ {marl_cyborg => netforge_rl}/core/state.py | 44 +- netforge_rl/environment/__init__.py | 4 + .../environment/base_env.py | 9 +- .../environment/pcap_synthesizer.py | 2 +- .../scenarios/apt_espionage.py | 6 +- .../scenarios/base_scenario.py | 4 +- 31 files changed, 566 insertions(+), 1619 deletions(-) delete mode 100644 marl_cyborg/actions/__init__.py delete mode 100644 marl_cyborg/actions/blue/__init__.py delete mode 100644 marl_cyborg/actions/blue/deception.py delete mode 100644 marl_cyborg/actions/blue/mitigation.py delete mode 100644 marl_cyborg/actions/red/__init__.py delete mode 100644 marl_cyborg/agents/__init__.py delete mode 100644 marl_cyborg/agents/b_line.py delete mode 100644 marl_cyborg/core/action.py delete mode 100644 marl_cyborg/core/observation.py delete mode 100644 marl_cyborg/environment/__init__.py delete mode 100644 marl_cyborg/environment/parallel_env.py delete mode 100644 marl_cyborg/scenarios/ransomware.py delete mode 100644 marl_cyborg/topologies/network_generator.py rename {marl_cyborg => netforge_rl}/__init__.py (59%) rename {marl_cyborg => netforge_rl}/actions/blue/analysis.py (95%) rename {marl_cyborg => netforge_rl}/actions/network/ip_fragmentation.py (93%) rename {marl_cyborg => netforge_rl}/actions/red/coordination.py (93%) rename {marl_cyborg => netforge_rl}/actions/red/exploits.py (75%) rename {marl_cyborg => netforge_rl}/actions/red/impact.py (64%) rename {marl_cyborg => netforge_rl}/actions/red/privilege_escalation.py (73%) rename {marl_cyborg => netforge_rl}/actions/red/reconnaissance.py (90%) rename {marl_cyborg => netforge_rl}/core/agent_interface.py (86%) create mode 100644 netforge_rl/core/commands.py create mode 100644 netforge_rl/core/physics.py create mode 100644 netforge_rl/core/registry.py rename {marl_cyborg => netforge_rl}/core/state.py (77%) create mode 100644 netforge_rl/environment/__init__.py rename {marl_cyborg => netforge_rl}/environment/base_env.py (81%) rename {marl_cyborg => netforge_rl}/environment/pcap_synthesizer.py (99%) rename {marl_cyborg => netforge_rl}/scenarios/apt_espionage.py (94%) rename {marl_cyborg => netforge_rl}/scenarios/base_scenario.py (88%) diff --git a/marl_cyborg/actions/__init__.py b/marl_cyborg/actions/__init__.py deleted file mode 100644 index 348d266..0000000 --- a/marl_cyborg/actions/__init__.py +++ /dev/null @@ -1,57 +0,0 @@ -from .blue import ( - IsolateHost, - RestoreHost, - Monitor, - Analyze, - DeployDecoy, - Remove, - RestoreFromBackup, - DecoyApache, - DecoySSHD, - DecoyTomcat, - Misinform, - ConfigureACL, -) -from .red import ( - NetworkScan, - DiscoverRemoteSystems, - DiscoverNetworkServices, - ExploitRemoteService, - PrivilegeEscalate, - Impact, - ExploitBlueKeep, - ExploitEternalBlue, - ExploitHTTP_RFI, - JuicyPotato, - V4L2KernelExploit, - KillProcess, - ShareIntelligence, -) - -__all__ = [ - 'IsolateHost', - 'RestoreHost', - 'Monitor', - 'Analyze', - 'DeployDecoy', - 'Remove', - 'RestoreFromBackup', - 'DecoyApache', - 'DecoySSHD', - 'DecoyTomcat', - 'Misinform', - 'ConfigureACL', - 'NetworkScan', - 'DiscoverRemoteSystems', - 'DiscoverNetworkServices', - 'ExploitRemoteService', - 'PrivilegeEscalate', - 'Impact', - 'ExploitBlueKeep', - 'ExploitEternalBlue', - 'ExploitHTTP_RFI', - 'JuicyPotato', - 'V4L2KernelExploit', - 'KillProcess', - 'ShareIntelligence', -] diff --git a/marl_cyborg/actions/blue/__init__.py b/marl_cyborg/actions/blue/__init__.py deleted file mode 100644 index e797bf0..0000000 --- a/marl_cyborg/actions/blue/__init__.py +++ /dev/null @@ -1,24 +0,0 @@ -from .mitigation import ( - IsolateHost, - RestoreHost, - Remove, - RestoreFromBackup, - ConfigureACL, -) -from .analysis import Monitor, Analyze -from .deception import DeployDecoy, DecoyApache, DecoySSHD, DecoyTomcat, Misinform - -__all__ = [ - 'IsolateHost', - 'RestoreHost', - 'Remove', - 'RestoreFromBackup', - 'Monitor', - 'Analyze', - 'DeployDecoy', - 'DecoyApache', - 'DecoySSHD', - 'DecoyTomcat', - 'Misinform', - 'ConfigureACL', -] diff --git a/marl_cyborg/actions/blue/deception.py b/marl_cyborg/actions/blue/deception.py deleted file mode 100644 index 3873220..0000000 --- a/marl_cyborg/actions/blue/deception.py +++ /dev/null @@ -1,204 +0,0 @@ -from marl_cyborg.core.action import BaseAction, ActionEffect - - -class DeployDecoy(BaseAction): - """Deploys a generic high-interaction honeypot/decoy service to a target - - host. - - Simulates services like Femitter or blank Apache instances to bait Red Agent - exploitation attempts, subsequently flagging them in Blue telemetry. - - Args: - agent_id (str): Target Blue agent orchestrator. - target_ip (str): IP address hosting the new decoy daemon. - """ - - def __init__(self, agent_id: str, target_ip: str): - super().__init__(agent_id, target_ip=target_ip) - - def validate(self, global_state) -> bool: - """Validates target host existence natively within the network map. - - Args: - global_state (GlobalNetworkState): Simulation snapshot. - - Returns: - bool: Always True assuming orchestrator controls the infrastructure. - """ - return True - - def execute(self, global_state) -> ActionEffect: - """Updates the host configuration to actively broadcast a decoy - - profile. - - Args: - global_state (GlobalNetworkState): The current baseline state. - - Returns: - ActionEffect: A structural delta setting 'decoy' status to 'active'. - """ - return ActionEffect( - success=True, - state_deltas={f'hosts/{self.target_ip}/decoy': 'active'}, - observation_data={'decoy_deployed': self.target_ip}, - ) - - -class DecoyApache(BaseAction): - """Deploys a specifically profiled Apache Web Server (Port 80) honeypot. - - Deceives Nmap scans and absorbs associated web-based Remote File Inclusion (RFI) exploits. - - Args: - agent_id (str): Target Blue agent orchestrator. - target_ip (str): IP address hosting the new decoy daemon. - """ - - def __init__(self, agent_id: str, target_ip: str): - super().__init__(agent_id, target_ip=target_ip) - - def validate(self, global_state) -> bool: - """Validation pre-checks for port conflicts and architecture readiness. - - Args: - global_state: Baseline state. - - Returns: - bool: Action clearance bool. - """ - return True - - def execute(self, global_state) -> ActionEffect: - """Applies the decoy instantiation physics. - - Args: - global_state: Baseline state. - - Returns: - ActionEffect: Delta converting the host to an Apache decoy sinkhole. - """ - return ActionEffect( - success=True, - state_deltas={f'hosts/{self.target_ip}/decoy': 'Apache'}, - observation_data={'decoy_deployed': f'Apache on {self.target_ip}'}, - ) - - -class DecoySSHD(BaseAction): - """Deploys a fake SSH daemon (Port 22) honeypot specifically designed to - - bait brute force actions. - - Args: - agent_id (str): Target Blue agent orchestrator. - target_ip (str): IP address hosting the new decoy daemon. - """ - - def __init__(self, agent_id: str, target_ip: str): - super().__init__(agent_id, target_ip=target_ip) - - def validate(self, global_state) -> bool: - """ - Args: - global_state: Baseline state. - - Returns: - bool: Action clearance bool. - """ - return True - - def execute(self, global_state) -> ActionEffect: - """ - Args: - global_state: Baseline state. - - Returns: - ActionEffect: Sets the 'decoy' string to 'SSHD'. - """ - return ActionEffect( - success=True, - state_deltas={f'hosts/{self.target_ip}/decoy': 'SSHD'}, - observation_data={'decoy_deployed': f'SSHD on {self.target_ip}'}, - ) - - -class DecoyTomcat(BaseAction): - """Deploys a fake Tomcat server (Port 8080) to deceive application port - - scans. - - Args: - agent_id (str): Target Blue agent orchestrator. - target_ip (str): IP address hosting the new decoy daemon. - """ - - def __init__(self, agent_id: str, target_ip: str): - super().__init__(agent_id, target_ip=target_ip) - - def validate(self, global_state) -> bool: - """ - Args: - global_state: Baseline state. - - Returns: - bool: Action clearance bool. - """ - return True - - def execute(self, global_state) -> ActionEffect: - """ - Args: - global_state: Baseline state. - - Returns: - ActionEffect: Sets the 'decoy' string to 'Tomcat'. - """ - return ActionEffect( - success=True, - state_deltas={f'hosts/{self.target_ip}/decoy': 'Tomcat'}, - observation_data={'decoy_deployed': f'Tomcat on {self.target_ip}'}, - ) - - -class Misinform(BaseAction): - """Injects false host telemetry or alters logging infrastructure to feed - - Red agents fake data. - - Disrupts adversary situational awareness by generating spoofed observations. - - Args: - agent_id (str): Target Blue agent ID. - target_ip (str): IP address of the target infrastructure component. - """ - - def __init__(self, agent_id: str, target_ip: str): - super().__init__(agent_id, target_ip=target_ip) - - def validate(self, global_state) -> bool: - """ - Args: - global_state: Baseline state. - - Returns: - bool: Action clearance bool. - """ - return True - - def execute(self, global_state) -> ActionEffect: - """ - Args: - global_state: Baseline state. - - Returns: - ActionEffect: Activates misinformation campaign flags logically. - """ - return ActionEffect( - success=True, - state_deltas={f'hosts/{self.target_ip}/misinformation': 'active'}, - observation_data={ - 'alert': f'Misinformation campaign active on {self.target_ip}.' - }, - ) diff --git a/marl_cyborg/actions/blue/mitigation.py b/marl_cyborg/actions/blue/mitigation.py deleted file mode 100644 index bc39cd5..0000000 --- a/marl_cyborg/actions/blue/mitigation.py +++ /dev/null @@ -1,221 +0,0 @@ -from marl_cyborg.core.action import BaseAction, ActionEffect - - -class IsolateHost(BaseAction): - """Disconnects a compromised host completely from the network - - infrastructure. - - This prevents lateral movement or data exfiltration but incurs an availability - penalty on the Blue Team's scoring mechanism. - - Args: - agent_id (str): The unique identifier of the defending Blue agent. - target_ip (str): The IP address of the host to dynamically isolate. - """ - - def __init__(self, agent_id: str, target_ip: str): - super().__init__(agent_id, target_ip=target_ip) - - def validate(self, global_state) -> bool: - """Validates if the target host theoretically exists and can be - - isolated. - - Args: - global_state (GlobalNetworkState): Engine baseline state. - - Returns: - bool: True if the action passes initial routing or authority checks. - """ - return True - - def execute(self, global_state) -> ActionEffect: - """Implements the zero-trust isolation delta across the specific node. - - Args: - global_state (GlobalNetworkState): Snapshot of current network topology. - - Returns: - ActionEffect: A state delta changing the node's status to 'isolated'. - """ - return ActionEffect( - success=True, - state_deltas={f'hosts/{self.target_ip}/status': 'isolated'}, - observation_data={'alert': 'Host isolated securely.'}, - ) - - -class RestoreHost(BaseAction): - """Re-establishes network connectivity for a previously isolated host. - - Reverses the `IsolateHost` effect, bringing the node back online and - restoring critical business availability. - - Args: - agent_id (str): The unique identifier of the defending Blue agent. - target_ip (str): The isolated node's IP address. - """ - - def __init__(self, agent_id: str, target_ip: str): - super().__init__(agent_id, target_ip=target_ip) - - def validate(self, global_state) -> bool: - """Evaluates requirements for network restoration natively. - - Args: - global_state (GlobalNetworkState): Engine baseline state. - - Returns: - bool: Always True in base simulation constraints. - """ - return True - - def execute(self, global_state) -> ActionEffect: - """Removes the isolation quarantine delta from the designated host. - - Args: - global_state: Network configuration array. - - Returns: - ActionEffect: State delta returning the status to 'online'. - """ - return ActionEffect( - success=True, - state_deltas={ - f'hosts/{self.target_ip}/status': 'online', - f'hosts/{self.target_ip}/privilege': 'None', - f'hosts/{self.target_ip}/compromised_by': 'None', - }, - observation_data={'alert': 'Host restored and cleaned.'}, - ) - - -class Remove(BaseAction): - """Evicts unauthorized threat actors from a compromised element. - - Targets and kills anomalous processes, rolling local user privileges back - to a stable state without requiring a full system format. - - Args: - agent_id (str): Referencing Blue agent framework ID. - target_ip (str): Host identifier. - """ - - def __init__(self, agent_id: str, target_ip: str): - super().__init__(agent_id, target_ip=target_ip) - - def validate(self, global_state) -> bool: - """Checks Blue operational bounds prior to execution. - - Args: - global_state: Reference engine configuration. - - Returns: - bool: Action clearance bool. - """ - return True - - def execute(self, global_state) -> ActionEffect: - """Translates the threat eviction into a measurable privilege reduction - - delta. - - Args: - global_state: Reference engine configuration. - - Returns: - ActionEffect: State vector scrubbing privilege parameters down to 'None'. - """ - return ActionEffect( - success=True, - state_deltas={ - f'hosts/{self.target_ip}/privilege': 'None', - f'hosts/{self.target_ip}/compromised_by': 'None', - }, - observation_data={'alert': 'Unauthorized access removed.'}, - ) - - -class RestoreFromBackup(BaseAction): - """Executes a bare-metal imaging recovery to purge advanced persistent - - threats (APTs). - - An extreme but definitive mitigation vector that eradicates persistent malware, - but takes significantly more time and cost than localized `Remove` actions. - - Args: - agent_id (str): The orchestrating Blue agent ID string. - target_ip (str): The endpoint requiring catastrophic intervention. - """ - - def __init__(self, agent_id: str, target_ip: str): - super().__init__(agent_id, target_ip=target_ip) - - def validate(self, global_state) -> bool: - """Ensures execution feasibility regarding orchestration limits. - - Args: - global_state: Simulation context. - - Returns: - bool: Access valid. - """ - return True - - def execute(self, global_state) -> ActionEffect: - """Computes a comprehensive reversion of the host node's state back to - - pristine. - - Args: - global_state: Simulation context. - - Returns: - ActionEffect: Multi-delta payload restoring privileges, uptime, and system integrity simultaneously. - """ - return ActionEffect( - success=True, - state_deltas={ - f'hosts/{self.target_ip}/privilege': 'None', - f'hosts/{self.target_ip}/status': 'online', - f'hosts/{self.target_ip}/system_integrity': 'clean', - }, - observation_data={'alert': 'Host restored from backup image.'}, - ) - - -class ConfigureACL(BaseAction): - """ - Dynamically modifies the implicit routing Firewall to block specific port - traffic inbound to a protected subnet. - - Args: - agent_id (str): Operating Blue Agent ID. - target_subnet (str): The CIDR block of the subnet to protect (e.g., '10.0.1.0/24'). - port (int): The destination port to drop (e.g., 445). - """ - - def __init__(self, agent_id: str, target_subnet: str, port: int): - super().__init__(agent_id, target_ip=target_subnet, cost=2) - self.port = port - - def validate(self, global_state) -> bool: - """ - Validates if the target subnet exists in the architecture. - """ - return self.target_ip in global_state.subnets - - def execute(self, global_state) -> ActionEffect: - """ - Calculates the physics delta to apply the firewall block rule. - """ - safe_subnet = self.target_ip.replace('/', '_slash_') - return ActionEffect( - success=True, - state_deltas={f'firewall/block/{safe_subnet}/{self.port}': 'True'}, - observation_data={ - 'alert': f'ACL configured: Drop Port {self.port} to {self.target_ip}' - }, - ) diff --git a/marl_cyborg/actions/red/__init__.py b/marl_cyborg/actions/red/__init__.py deleted file mode 100644 index 6924d08..0000000 --- a/marl_cyborg/actions/red/__init__.py +++ /dev/null @@ -1,26 +0,0 @@ -from .reconnaissance import NetworkScan, DiscoverRemoteSystems, DiscoverNetworkServices -from .exploits import ( - ExploitRemoteService, - ExploitBlueKeep, - ExploitEternalBlue, - ExploitHTTP_RFI, -) -from .privilege_escalation import PrivilegeEscalate, JuicyPotato, V4L2KernelExploit -from .impact import Impact, KillProcess -from .coordination import ShareIntelligence - -__all__ = [ - 'NetworkScan', - 'DiscoverRemoteSystems', - 'DiscoverNetworkServices', - 'ExploitRemoteService', - 'ExploitBlueKeep', - 'ExploitEternalBlue', - 'ExploitHTTP_RFI', - 'PrivilegeEscalate', - 'JuicyPotato', - 'V4L2KernelExploit', - 'Impact', - 'KillProcess', - 'ShareIntelligence', -] diff --git a/marl_cyborg/agents/__init__.py b/marl_cyborg/agents/__init__.py deleted file mode 100644 index 939a67a..0000000 --- a/marl_cyborg/agents/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .b_line import BLineAgent - -__all__ = ['BLineAgent'] diff --git a/marl_cyborg/agents/b_line.py b/marl_cyborg/agents/b_line.py deleted file mode 100644 index f3d0d13..0000000 --- a/marl_cyborg/agents/b_line.py +++ /dev/null @@ -1,74 +0,0 @@ -from typing import Any -import random -import numpy as np - - -class BLineAgent: - """ - Scripted Red Agent that executes the exact B-Line killchain: - - DiscoverRemoteSystems -> DiscoverNetworkServices -> ExploitRemoteService -> PrivilegeEscalate -> Impact - """ - - def __init__(self, agent_id: str): - self.agent_id = agent_id - self.known_subnets = ['10.0.0.0/24', '192.168.1.0/24', '10.0.1.0/24'] - self.known_hosts = [] - self.exploited_hosts = [] - self.root_hosts = [] - self.impacted_hosts = [] - self.step_count = 0 - - def get_action(self, observation: np.ndarray, global_state) -> Any: - from marl_cyborg.actions import ( - DiscoverRemoteSystems, - DiscoverNetworkServices, - ExploitRemoteService, - PrivilegeEscalate, - Impact, - ) - - self.step_count += 1 - - # 1. Discover Subnets if we don't know network yet - if not self.known_hosts or self.step_count < 3: - target_subnet = self.known_subnets[ - self.step_count % len(self.known_subnets) - ] - # Oracle gets to peek at true state to simulate finding hosts - for host in global_state.all_hosts.values(): - if ( - host.subnet_cidr == target_subnet - and host.ip not in self.known_hosts - ): - self.known_hosts.append(host.ip) - return DiscoverRemoteSystems(self.agent_id, target_subnet) - - # 2. Exploit known hosts if reachable - unexploited = [ - h - for h in self.known_hosts - if h not in self.exploited_hosts and global_state.can_route_to(h) - ] - if unexploited: - target = random.choice(unexploited) - self.exploited_hosts.append(target) - return ExploitRemoteService(self.agent_id, target) - - # 3. Escalate privileges - unescalated = [h for h in self.exploited_hosts if h not in self.root_hosts] - if unescalated: - target = random.choice(unescalated) - self.root_hosts.append(target) - return PrivilegeEscalate(self.agent_id, target) - - # 4. Impact fully compromised nodes - unimpacted = [h for h in self.root_hosts if h not in self.impacted_hosts] - if unimpacted: - target = random.choice(unimpacted) - self.impacted_hosts.append(target) - return Impact(self.agent_id, target) - - # Fallback to random scanning - target = random.choice(self.known_hosts) if self.known_hosts else '127.0.0.1' - return DiscoverNetworkServices(self.agent_id, target) diff --git a/marl_cyborg/core/action.py b/marl_cyborg/core/action.py deleted file mode 100644 index 1305546..0000000 --- a/marl_cyborg/core/action.py +++ /dev/null @@ -1,62 +0,0 @@ -from abc import ABC, abstractmethod -from typing import Dict, Any, Optional, TYPE_CHECKING - -if TYPE_CHECKING: - from marl_cyborg.core.state import GlobalNetworkState - - -class ActionEffect: - """Encapsulates the resulting state changes from an action for conflict - - resolution. - """ - - def __init__( - self, - success: bool, - state_deltas: Dict[str, Any], - observation_data: Dict[str, Any], - eta: int = 0, - ): - self.success = success - self.state_deltas = state_deltas - self.observation_data = observation_data - self.eta = eta - - -class BaseAction(ABC): - """Modular Base Action for the MARL CybORG Environment. - - All highly specific network attacks (Layer 2 - Layer 7) inherit from this class. - """ - - def __init__( - self, - agent_id: str, - target_ip: Optional[str] = None, - source_ip: Optional[str] = None, - cost: int = 1, - ): - self.agent_id = agent_id - self.target_ip = target_ip - self.source_ip = source_ip - self.cost = cost - - @abstractmethod - def validate(self, global_state: 'GlobalNetworkState') -> bool: - """Checks if the action is physically possible in the current network - - state (e.g., is there a route? - - is the port open?). - """ - pass - - @abstractmethod - def execute(self, global_state: 'GlobalNetworkState') -> ActionEffect: - """Computes the theoretical effect of the action. - - Note: State is NOT mutated directly here. Mutations are returned via ActionEffect - to allow the Environment to resolve simultaneous multi-agent collisions. - """ - pass diff --git a/marl_cyborg/core/observation.py b/marl_cyborg/core/observation.py deleted file mode 100644 index 03430a3..0000000 --- a/marl_cyborg/core/observation.py +++ /dev/null @@ -1,101 +0,0 @@ -import numpy as np -from typing import Any, List - - -class BaseObservation: - """Represents the local view of the network from a single Agent's - - perspective. - - In MARL, Red and Blue teams receive fundamentally different subsets - of the global state. - """ - - def __init__(self, agent_id: str): - self.agent_id = agent_id - self.visible_hosts = {} - self.detected_anomalies = [] - self.active_sessions = [] - - # Array of floats representing the Commander's directive (e.g., target subnet index) - self.objective_vector = np.zeros(5, dtype=np.float32) - - # Tracks anomalies like 802.11 Deauths, Fragmented IP packets, etc. - self.network_telemetry = {} - - def update_from_state(self, global_state: Any, action_effects: List[Any]): - """Filters the global state down to only what is observable by this - - specific agent. - - Since we don't have the fully simulated GlobalNetworkState yet, - we generate dynamic dummy telemetry that structurally mimics the - CAGE challenge dictionaries. - """ - # Parse realistic data from the OOP GlobalNetworkState - if global_state: - # Enforce True Partial Observability (Fog of War) - # Agents only receive tensor data for hosts within their active knowledge graph - known_ips = global_state.agent_knowledge.get(self.agent_id, set()) - for ip in known_ips: - if ip in global_state.all_hosts: - host = global_state.all_hosts[ip] - self.visible_hosts[ip] = { - 'state': 'compromised' - if host.privilege in ['User', 'Root'] - else 'clean', - 'status': host.status, - 'decoy': host.decoy, # For Blue Team sensor logic - } - - if 'commander' in self.agent_id.lower(): - self.network_telemetry['global_alert_level'] = np.random.uniform(0, 1) - self.network_telemetry['total_isolated_subnets'] = np.random.randint(0, 5) - - if 'operator' in self.agent_id.lower(): - self.objective_vector[2] = 1.0 - - def to_numpy(self, max_size: int = 256) -> np.ndarray: - """Serializes the object-oriented observation into a fixed-size Tensor - - for RL Neural Networks. - - This must be mathematically rigorous. If a node isn't seen, its - index must be explicitly 0. - """ - vector = np.zeros(max_size, dtype=np.float32) - idx = 0 - - if 'global_alert_level' in self.network_telemetry and idx < max_size: - vector[idx] = self.network_telemetry['global_alert_level'] - idx += 1 - - if 'total_isolated_subnets' in self.network_telemetry and idx < max_size: - vector[idx] = ( - float(self.network_telemetry['total_isolated_subnets']) / 10.0 - ) # Normalized - idx += 1 - - for val in self.objective_vector: - if idx < max_size: - vector[idx] = val - idx += 1 - - for ip, data in self.visible_hosts.items(): - if idx + 2 >= max_size: - break - - ip_val = float(ip.split('.')[-1]) / 255.0 # Normalize IP tail - state_val = ( - 1.0 - if data.get('state') == 'compromised' - else -1.0 - if data.get('state') == 'clean' - else 0.0 - ) - - vector[idx] = ip_val - vector[idx + 1] = state_val - idx += 2 - - return vector diff --git a/marl_cyborg/environment/__init__.py b/marl_cyborg/environment/__init__.py deleted file mode 100644 index 030e31c..0000000 --- a/marl_cyborg/environment/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from .base_env import BaseMarlCyborg -from .parallel_env import ParallelMarlCyborg - -__all__ = ['BaseMarlCyborg', 'ParallelMarlCyborg'] diff --git a/marl_cyborg/environment/parallel_env.py b/marl_cyborg/environment/parallel_env.py deleted file mode 100644 index 00c8363..0000000 --- a/marl_cyborg/environment/parallel_env.py +++ /dev/null @@ -1,477 +0,0 @@ -from typing import Dict, Tuple -import numpy as np -import gymnasium as gym - -from marl_cyborg.core.action import BaseAction, ActionEffect -from marl_cyborg.core.observation import BaseObservation -from marl_cyborg.environment.base_env import BaseMarlCyborg -from marl_cyborg.topologies.network_generator import NetworkGenerator - - -class ParallelMarlCyborg(BaseMarlCyborg): - """MARL Environment for CybORG. - - Follows the PettingZoo Parallel API standard for simultaneous Multi- - Agent execution and relies exclusively on Gymnasium spaces natively. - """ - - metadata = {'render_modes': ['ansi'], 'name': 'marl_cyborg_v3'} - - def __init__(self, scenario_config: dict): - # Default to procedural generation if no specific architecture config is provided - topology_path = ( - scenario_config.get('topology_path') if scenario_config else None - ) - self.network_generator = NetworkGenerator(config_path=topology_path) - - scenario_type = ( - scenario_config.get('scenario_type', 'ransomware') - if scenario_config - else 'ransomware' - ) - self.possible_agents = [ - 'red_commander', - 'red_operator', - 'blue_commander', - 'blue_operator', - ] - self.agents = self.possible_agents[:] - - if scenario_type.lower() == 'ransomware': - from marl_cyborg.scenarios.ransomware import RansomwareScenario - - self.scenario = RansomwareScenario(self.agents) - else: - from marl_cyborg.scenarios.apt_espionage import AptEspionageScenario - - self.scenario = AptEspionageScenario(self.agents) - - self.global_state = self.network_generator.generate() - - # Native Gymnasium Spaces for PettingZoo API - self.observation_spaces = { - agent: gym.spaces.Box(low=-1.0, high=1.0, shape=(256,), dtype=np.float32) - for agent in self.possible_agents - } - self.action_spaces = { - agent: gym.spaces.Discrete( - 256 - ) # Expanded to natively support advanced actions across 40+ IPs - for agent in self.possible_agents - } - self.max_steps = 100 - self.current_step = 0 - - def reset( - self, seed=None, options=None - ) -> Tuple[Dict[str, np.ndarray], Dict[str, dict]]: - """Resets the network state to initial configuration natively - - (Gymnasium style + PettingZoo). - """ - self.global_state = self.network_generator.generate(seed=seed) - self.agents = self.possible_agents[:] - self.global_state.agent_energy = {agent: 50 for agent in self.agents} - observations = {} - for agent_id in self.agents: - obs = BaseObservation(agent_id) - obs.update_from_state(self.global_state, []) - observations[agent_id] = obs.to_numpy(max_size=256) - self.current_step = 0 - - return observations, {agent: {} for agent in self.agents} - - def observation_space(self, agent): - return self.observation_spaces[agent] - - def action_space(self, agent): - return self.action_spaces[agent] - - def action_mask(self, agent: str) -> np.ndarray: - """Returns a binary mask denoting valid and distinct action integers for the agent, - pruning out computationally redundant modulo duplicates. - """ - mask = np.zeros(self.action_spaces[agent].n, dtype=np.int8) - - target_ips = sorted(list(self.global_state.all_hosts.keys())) - num_targets = len(target_ips) if target_ips else 1 - - if 'red' in agent.lower(): - valid_groups = 4 if 'commander' in agent.lower() else 9 - else: - valid_groups = 5 if 'commander' in agent.lower() else 7 - - max_valid_action = min(valid_groups * num_targets, self.action_spaces[agent].n) - mask[:max_valid_action] = 1 - return mask - - def step( - self, agent_actions: Dict[str, int] - ) -> Tuple[ - Dict[str, BaseObservation], - Dict[str, float], - Dict[str, bool], - Dict[str, bool], - Dict[str, dict], - ]: - """ - Simultaneous Step Execution Logic: - - 1. VALIDATION: Check if actions are physically possible. - 2. EXECUTION: Compute intended state changes (ActionEffects) WITHOUT mutating state yet. - 3. CONFLICT RESOLUTION: E.g., if Blue drops a connection while Red exploits it, Blue wins. - 4. MUTATION: Apply final resolved effects to the true global state. - 5. OBSERVATION: Re-calculate what each agent can see. - """ - intended_effects = {} - for agent, action_int in agent_actions.items(): - # Validate temporal locks - if self.current_step < self.global_state.agent_locked_until.get(agent, 0): - intended_effects[agent] = ActionEffect( - success=False, - state_deltas={}, - observation_data={ - 'error': 'Agent locked executing previous action' - }, - ) - continue - - if isinstance(action_int, BaseAction): - action = action_int - else: - action = self._decode_action(agent, int(action_int)) - - # Validate temporal energy constraints - if self.global_state.agent_energy.get(agent, 0) < action.cost: - intended_effects[agent] = ActionEffect( - success=False, - state_deltas={}, - observation_data={'error': 'Insufficient Energy'}, - ) - continue - - # Expend energy for the action regardless of success - self.global_state.agent_energy[agent] -= action.cost - - if action.validate(self.global_state): - effect = action.execute(self.global_state) - if getattr(effect, 'eta', 0) > 0: - self.global_state.agent_locked_until[agent] = ( - self.current_step + effect.eta - ) - self.global_state.pending_effects.append( - (self.current_step + effect.eta, agent, effect) - ) - intended_effects[agent] = ActionEffect( - success=False, - state_deltas={}, - observation_data={ - 'status': f'Executing action... ETA {effect.eta} steps' - }, - ) - else: - intended_effects[agent] = effect - else: - intended_effects[agent] = ActionEffect( - success=False, - state_deltas={}, - observation_data={'exploit': 'validation failed natively'}, - ) - - # Process delayed effects that have arrived natively - remaining_pending = [] - for eta_step, p_agent, p_effect in self.global_state.pending_effects: - if self.current_step >= eta_step: - intended_effects[p_agent] = ( - p_effect # Overlay onto their intended effect - ) - else: - remaining_pending.append((eta_step, p_agent, p_effect)) - self.global_state.pending_effects = remaining_pending - - resolved_effects = self._resolve_conflicts(intended_effects) - - self._apply_state_deltas(resolved_effects) - - observations = {} - rewards = {} - terminate = self.scenario.check_termination(self.global_state) - self.current_step += 1 - - # Trigger dynamic topology mutations mid-episode - if self.current_step % 40 == 0: - self.global_state.reallocate_dhcp() - - is_truncated = self.current_step >= self.max_steps - truncate = {agent: is_truncated for agent in self.agents} - - for agent in self.agents: - obs = BaseObservation(agent) - obs.update_from_state(self.global_state, resolved_effects) - - obs_array = obs.to_numpy(max_size=256) - if 'operator' in agent: - commander_id = agent.replace('operator', 'commander') - if commander_id in agent_actions: - cmd_action = agent_actions[commander_id] - # Normalize the Discrete(100) action to a float between 0.0 and 1.0 - cmd_val = ( - (float(cmd_action) / 100.0) - if not isinstance(cmd_action, BaseAction) - else 1.0 - ) - obs_array[0] = cmd_val - - observations[agent] = obs_array - - # Reward shaping applied here natively factoring in immediate action outcomes - agent_effect = resolved_effects.get(agent) - rewards[agent] = self._calculate_reward( - agent, self.global_state, agent_effect - ) - - self.agents = [ - agent - for agent in self.agents - if not terminate[agent] and not truncate[agent] - ] - - # ── Build info dicts with security metrics for callbacks ── - infos = self._extract_agent_infos(observations, resolved_effects) - - return observations, rewards, terminate, truncate, infos - - def render(self): - """Standard PettingZoo GUI logging render hook.""" - pass - - def _decode_action(self, agent_id: str, action_int: int) -> BaseAction: - from marl_cyborg.actions import ( - IsolateHost, - RestoreHost, - Monitor, - Analyze, - DeployDecoy, - Remove, - RestoreFromBackup, - DecoyApache, - DecoySSHD, - DecoyTomcat, - Misinform, - NetworkScan, - DiscoverRemoteSystems, - DiscoverNetworkServices, - ExploitRemoteService, - PrivilegeEscalate, - Impact, - ExploitBlueKeep, - ExploitEternalBlue, - ExploitHTTP_RFI, - JuicyPotato, - V4L2KernelExploit, - KillProcess, - ShareIntelligence, - ConfigureACL, - ) - - target_ips = sorted(list(self.global_state.all_hosts.keys())) - if not target_ips: - target_ips = ['127.0.0.1'] - - target_ip = target_ips[action_int % len(target_ips)] - action_group = action_int // len(target_ips) - - if 'red' in agent_id.lower(): - if 'commander' in agent_id.lower(): - action_type = action_group % 4 - if action_type == 0: - return NetworkScan(agent_id, '10.0.0.0/24') - elif action_type == 1: - return DiscoverRemoteSystems(agent_id, '10.0.0.0/24') - elif action_type == 2: - return DiscoverNetworkServices(agent_id, target_ip) - else: - return ShareIntelligence(agent_id, 'red_operator') - else: - action_type = action_group % 9 - if action_type == 0: - return ExploitRemoteService(agent_id, target_ip) - elif action_type == 1: - return PrivilegeEscalate(agent_id, target_ip) - elif action_type == 2: - return Impact(agent_id, target_ip) - elif action_type == 3: - return ExploitBlueKeep(agent_id, target_ip) - elif action_type == 4: - return ExploitEternalBlue(agent_id, target_ip) - elif action_type == 5: - return ExploitHTTP_RFI(agent_id, target_ip) - elif action_type == 6: - return JuicyPotato(agent_id, target_ip) - elif action_type == 7: - return V4L2KernelExploit(agent_id, target_ip) - else: - return KillProcess(agent_id, target_ip) - else: - if 'commander' in agent_id.lower(): - action_type = action_group % 5 - if action_type == 0: - return DeployDecoy(agent_id, target_ip) - elif action_type == 1: - return DecoyApache(agent_id, target_ip) - elif action_type == 2: - return DecoySSHD(agent_id, target_ip) - elif action_type == 3: - return DecoyTomcat(agent_id, target_ip) - else: - return Misinform(agent_id, target_ip) - else: - action_type = action_group % 7 - if action_type == 0: - return IsolateHost(agent_id, target_ip) - elif action_type == 1: - return RestoreHost(agent_id, target_ip) - elif action_type == 2: - return Monitor(agent_id, target_ip) - elif action_type == 3: - return Analyze(agent_id, target_ip) - elif action_type == 4: - return Remove(agent_id, target_ip) - elif action_type == 5: - return RestoreFromBackup(agent_id, target_ip) - else: - subnet = '.'.join(target_ip.split('.')[:3]) + '.0/24' - return ConfigureACL(agent_id, target_subnet=subnet, port=445) - - def _resolve_conflicts( - self, effects: Dict[str, ActionEffect] - ) -> Dict[str, ActionEffect]: - """Core physics engine. - - Mathematically resolves simultaneous temporal collisions. - Priority: Blue Defensive actions generally supersede Red Offensive actions - on the exact same network node if executed in the exact same fraction of a second. - """ - # Separate offensive and defensive intents - red_agents = [a for a in effects if 'red' in a.lower()] - blue_agents = [a for a in effects if 'blue' in a.lower()] - - # 1. Compile all Blue defensive targets and actions for this timestep - blue_defended_nodes = {} - for blue_id in blue_agents: - eff = effects[blue_id] - if eff.success: - # E.g., eff.state_deltas might contain: {"hosts.10.0.0.5.port.80": "closed"} - for delta_key, delta_val in eff.state_deltas.items(): - if 'hosts/' in delta_key: - target_ip = delta_key.split('/')[1] - if target_ip not in blue_defended_nodes: - blue_defended_nodes[target_ip] = [] - blue_defended_nodes[target_ip].append(delta_val) - - # 2. Evaluate Red attacks against the compiled simultaneous defenses - for red_id in red_agents: - red_eff = effects[red_id] - if not red_eff.success: - continue # Already failed natively, ignore - - collision_detected = False - for delta_key in list(red_eff.state_deltas.keys()): - if 'hosts/' in delta_key: - target_ip = delta_key.split('/')[1] - - # If Red is targeting a node that Blue is simultaneously modifying - if target_ip in blue_defended_nodes: - # For now, we apply a hard Zero-Trust temporal priority: Blue Defense always wins ties - collision_detected = True - break - - if collision_detected: - # Nullify Red's attack effect entirely and alert the network telemetry - effects[red_id].success = False - effects[red_id].state_deltas = {} - effects[red_id].observation_data['alert'] = ( - 'TEMPORAL_COLLISION_DEFENSE_SUPREMACY' - ) - - return effects - - def _apply_state_deltas(self, effects: Dict[str, ActionEffect]): - """Applies validated deltas to the GlobalNetworkState. - - Only called AFTER temporal collisions have been mathematically - resolved. - """ - for agent_id, effect in effects.items(): - if effect.success: - for delta_key, delta_val in effect.state_deltas.items(): - self.global_state.apply_delta(delta_key, delta_val) - - def _calculate_reward( - self, agent_id: str, state, effect: ActionEffect = None - ) -> float: - """Delegates reward logic directly to the localized Scenario module.""" - return self.scenario.calculate_reward(agent_id, state, effect) - - def _extract_agent_infos(self, observations: dict, resolved_effects: dict) -> dict: - """Extracts security metrics for TensorBoard and CSV logging callbacks. - - Args: - observations: Dictionary of agent observations for this step. - resolved_effects: Dictionary of resolved action effects. - - Returns: - Dictionary mapping agent_id to an info dictionary with security metrics. - """ - infos = {} - for agent in list(observations.keys()): - agent_effect = resolved_effects.get(agent) - info: dict = {} - - # Count security-relevant events from this step - false_positives = 0 - successful_exploits = 0 - hosts_isolated = 0 - services_restored = 0 - - if agent_effect and agent_effect.success: - for delta_key, delta_val in agent_effect.state_deltas.items(): - if 'status' in delta_key and delta_val == 'isolated': - hosts_isolated += 1 - # Check if the isolated host was actually compromised - parts = delta_key.split('/') - if len(parts) >= 2: - ip = parts[1] - host = self.global_state.all_hosts.get(ip) - if host and host.compromised_by == 'None': - false_positives += 1 # Isolated a clean host - elif 'privilege' in delta_key and delta_val in ('User', 'Root'): - successful_exploits += 1 - elif 'status' in delta_key and delta_val == 'online': - services_restored += 1 - - info['false_positives'] = float(false_positives) - info['successful_exploits'] = float(successful_exploits) - info['hosts_isolated'] = float(hosts_isolated) - info['services_restored'] = float(services_restored) - - # Extra context for analysis - info['agent_energy'] = float(self.global_state.agent_energy.get(agent, 0)) - info['compromised_hosts'] = float( - sum( - 1 - for h in self.global_state.all_hosts.values() - if h.compromised_by != 'None' - ) - ) - info['isolated_hosts'] = float( - sum( - 1 - for h in self.global_state.all_hosts.values() - if h.status == 'isolated' - ) - ) - - infos[agent] = info - - return infos diff --git a/marl_cyborg/scenarios/ransomware.py b/marl_cyborg/scenarios/ransomware.py deleted file mode 100644 index 49b907a..0000000 --- a/marl_cyborg/scenarios/ransomware.py +++ /dev/null @@ -1,180 +0,0 @@ -from typing import TYPE_CHECKING, Dict -from marl_cyborg.scenarios.base_scenario import BaseScenario - -if TYPE_CHECKING: - from marl_cyborg.core.state import GlobalNetworkState - from marl_cyborg.core.action import ActionEffect - - -class RansomwareScenario(BaseScenario): - """ - Scenario: Red Team aggressively deploys ransomware. - - Reward Design Principles: - ───────────────────────── - • Action-level rewards are given ONCE on the step the action succeeds. - • NO per-step state compounding (avoids runaway accumulation). - • Episode-end bonus/penalty based on final network state. - • Red and Blue reward magnitudes are balanced (~±5 per action). - """ - - def __init__(self, agents): - self.agents = agents - - def calculate_reward( - self, - agent_id: str, - global_state: 'GlobalNetworkState', - effect: 'ActionEffect' = None, - ) -> float: - reward = 0.0 - - # Small energy cost penalty for any action - if effect and getattr(effect, 'cost', 0) > 0: - reward -= effect.cost * 0.05 - - if 'red' in agent_id.lower(): - reward += self._red_reward(agent_id, global_state, effect) - elif 'blue' in agent_id.lower(): - reward += self._blue_reward(agent_id, global_state, effect) - - return reward - - # ── Red Team Rewards ────────────────────────────────────── - - def _red_reward( - self, - agent_id: str, - global_state: 'GlobalNetworkState', - effect: 'ActionEffect' = None, - ) -> float: - reward = 0.0 - - if not effect: - return reward - - # ── ONE-TIME action bonuses (only on success) ───────── - if effect.success and effect.state_deltas: - for delta_key, delta_val in effect.state_deltas.items(): - # Initial compromise (None → User) - if 'privilege' in delta_key and delta_val == 'User': - reward += 3.0 - - # Privilege escalation (User → Root) - elif 'privilege' in delta_key and delta_val == 'Root': - reward += 5.0 - - # Ransomware impact (system_integrity → compromised) - elif 'system_integrity' in delta_key and delta_val == 'compromised': - reward += 10.0 - - # Setting compromised_by (confirms exploitation) - elif 'compromised_by' in delta_key and delta_val != 'None': - reward += 2.0 - - # ── Observation-based rewards ───────────────────────── - if effect.observation_data: - obs = effect.observation_data - - # Reconnaissance discoveries - if 'discovered_hosts' in obs: - discovered = obs['discovered_hosts'] - count = len(discovered) if isinstance(discovered, (list, set)) else 1 - reward += count * 0.5 - - if 'scan_results' in obs: - reward += 0.3 - - # Intelligence sharing - if 'shared' in obs: - reward += 1.0 - - # Penalties for failures - if 'Failed against Decoy' in str(obs.values()): - reward -= 3.0 - elif 'kernel panic' in str(obs.values()): - reward -= 5.0 - - # ── Failed action penalty ───────────────────────────── - if not effect.success: - reward -= 0.1 # Small penalty for wasted turn - - return reward - - # ── Blue Team Rewards ───────────────────────────────────── - - def _blue_reward( - self, - agent_id: str, - global_state: 'GlobalNetworkState', - effect: 'ActionEffect' = None, - ) -> float: - reward = 0.0 - - # ── ONE-TIME action bonuses ─────────────────────────── - if effect and effect.success and effect.state_deltas: - for delta_key, delta_val in effect.state_deltas.items(): - # Successful isolation - if 'status' in delta_key and delta_val == 'isolated': - ip = delta_key.split('/')[1] if '/' in delta_key else None - if ip: - host = global_state.all_hosts.get(ip) - if host and host.compromised_by != 'None': - reward += 5.0 # Correctly quarantined a compromised host - else: - reward -= 2.0 # False positive — isolated a clean host - - # Successful restoration/cleanup - elif 'privilege' in delta_key and delta_val == 'None': - reward += 3.0 # Successfully cleaned a host - - elif 'status' in delta_key and delta_val == 'online': - reward += 1.0 # Restored connectivity - - # Deployed a decoy - elif 'decoy' in delta_key and delta_val != 'inactive': - reward += 1.0 # Proactive defense - - # ── Per-step network health (small, bounded) ────────── - # Only count the RATIO of healthy hosts to avoid scale issues - total_hosts = max(len(global_state.all_hosts), 1) - healthy = sum( - 1 - for h in global_state.all_hosts.values() - if h.compromised_by == 'None' and h.status != 'isolated' - ) - health_ratio = healthy / total_hosts - reward += health_ratio * 1.0 # 0.0 to 1.0 per step - - # Small penalty proportional to compromised hosts - compromised = sum( - 1 - for h in global_state.all_hosts.values() - if h.compromised_by != 'None' and h.status != 'isolated' - ) - if compromised > 0: - reward -= (compromised / total_hosts) * 2.0 # 0.0 to -2.0 - - # ── Failed action penalty ───────────────────────────── - if effect and not effect.success: - reward -= 0.1 - - return reward - - def check_termination(self, global_state: 'GlobalNetworkState') -> Dict[str, bool]: - terminate = {agent: False for agent in self.agents} - - # If all critical Corporate/Secure hosts are compromised, episode ends - target_hosts = [ - h - for h in global_state.all_hosts.values() - if h.subnet_cidr in ['10.0.0.0/24', '10.0.1.0/24'] - ] - - if target_hosts and all( - getattr(h, 'system_integrity', 'clean') == 'compromised' - for h in target_hosts - ): - return {agent: True for agent in self.agents} - - return terminate diff --git a/marl_cyborg/topologies/network_generator.py b/marl_cyborg/topologies/network_generator.py deleted file mode 100644 index 6334a0c..0000000 --- a/marl_cyborg/topologies/network_generator.py +++ /dev/null @@ -1,104 +0,0 @@ -import random -import yaml -from pathlib import Path -from typing import Optional -from marl_cyborg.core.state import GlobalNetworkState, Subnet, Host - - -class NetworkGenerator: - """Procedurally generates or loads dynamic network topologies for MARL - - training. - - Prevents agents from overfitting to a static 10-node architecture. - """ - - def __init__(self, config_path: Optional[str] = None): - self.config_path = config_path - - def generate(self, seed: Optional[int] = None) -> GlobalNetworkState: - """Generates the architecture. - - If a config path was provided, loads deterministically. - Otherwise, procedurally generates a randomized topology. - """ - if seed is not None: - random.seed(seed) - - if self.config_path and Path(self.config_path).exists(): - return self._load_from_yaml(self.config_path) - - return self._generate_procedural() - - def _generate_procedural(self) -> GlobalNetworkState: - """Creates a randomized network with 2-4 subnets and 5-15 hosts. - - Randomizes IP bounds and initial decoy placements. - """ - state = GlobalNetworkState() - - # Determine number of subnets (e.g., DMZ, Corp, Secure, Guest) - num_subnets = random.randint(2, 4) - subnet_names = ['DMZ', 'Corporate', 'Secure', 'Guest'] - base_ips = ['192.168.1', '10.0.0', '10.0.1', '172.16.0'] - - for i in range(num_subnets): - cidr = f'{base_ips[i]}.0/24' - subnet = Subnet(cidr=cidr, name=subnet_names[i]) - state.add_subnet(subnet) - - # 2 to 6 hosts per subnet - num_hosts = random.randint(2, 6) - for j in range(1, num_hosts + 1): - host_ip = f'{base_ips[i]}.{j * random.randint(1, 5)}' - host = Host( - ip=host_ip, hostname=f'{subnet_names[i]}_Node_{j}', subnet_cidr=cidr - ) - - # Randomly place a Blue Team decoy (15% chance) - if random.random() < 0.15: - host.decoy = random.choice(['Apache', 'SSHD', 'Tomcat', 'active']) - else: - # Assign legitimate OS profiles and CVEs to real hosts - profiles = [ - ('Windows_10', ['RDP', 'SMB'], ['CVE-2019-0708', 'MS17-010']), - ( - 'Windows_Server_2016', - ['SMB', 'IIS'], - ['MS17-010', 'CVE-2021-44228'], - ), - ('Linux_Ubuntu', ['SSH', 'Apache'], ['CVE-2021-44228', 'V4L2']), - ('Linux_CentOS', ['SSH', 'Tomcat'], ['CVE-2021-44228']), - ] - chosen_os, chosen_services, potential_cves = random.choice(profiles) - host.os = chosen_os - host.services = chosen_services - # Randomly assign 0 to 2 specific vulnerabilities from the valid pool to prevent guaranteed exploitation - num_vulns = random.randint(0, min(2, len(potential_cves))) - host.vulnerabilities = random.sample(potential_cves, num_vulns) - - state.register_host(host) - - # Ensure Red always knows at least one entry node (DMZ) at step 0 - if '192.168.1.0/24' in state.subnets: - dmz_hosts = list(state.subnets['192.168.1.0/24'].hosts.values()) - if dmz_hosts: - # Add default knowledge for Red Commander - state.update_knowledge('red_commander', dmz_hosts[0].ip) - state.update_knowledge('red_operator', dmz_hosts[0].ip) - - # Blue knows everything initially - for host in state.all_hosts.values(): - state.update_knowledge('blue_commander', host.ip) - state.update_knowledge('blue_operator', host.ip) - - return state - - def _load_from_yaml(self, path: str) -> GlobalNetworkState: - """Loads a deterministic graph from a YAML configuration.""" - with open(path, 'r') as f: - _ = yaml.safe_load(f) - - # Implementation left for future expansion if YAML is required. - # Defaults to procedural if parsing fails. - return self._generate_procedural() diff --git a/marl_cyborg/__init__.py b/netforge_rl/__init__.py similarity index 59% rename from marl_cyborg/__init__.py rename to netforge_rl/__init__.py index 3a33d0d..aae6533 100644 --- a/marl_cyborg/__init__.py +++ b/netforge_rl/__init__.py @@ -1,17 +1,10 @@ -"""MARL_CybORG v3.0 Library Multi-Agent Cybersecurity Simulator based on - -CybORG. -""" - -__version__ = '3.0.0' - -from .environment.parallel_env import ParallelMarlCyborg +from .environment.parallel_env import NetForgeRLEnv from .core.action import BaseAction, ActionEffect from .core.state import GlobalNetworkState, Host, Subnet from .core.observation import BaseObservation __all__ = [ - 'ParallelMarlCyborg', + 'NetForgeRLEnv', 'BaseAction', 'ActionEffect', 'GlobalNetworkState', diff --git a/marl_cyborg/actions/blue/analysis.py b/netforge_rl/actions/blue/analysis.py similarity index 95% rename from marl_cyborg/actions/blue/analysis.py rename to netforge_rl/actions/blue/analysis.py index 6ddf60b..7862c56 100644 --- a/marl_cyborg/actions/blue/analysis.py +++ b/netforge_rl/actions/blue/analysis.py @@ -1,6 +1,9 @@ -from marl_cyborg.core.action import BaseAction, ActionEffect +from netforge_rl.core.action import BaseAction, ActionEffect +from netforge_rl.core.registry import action_registry + +@action_registry.register('blue_operator', 2) class Monitor(BaseAction): """Deploys active traffic analysis scanning on a specific subnet or host. @@ -64,6 +67,7 @@ def execute(self, global_state) -> ActionEffect: ) +@action_registry.register('blue_operator', 3) class Analyze(BaseAction): """Executes a forensic deep scan of a specific host for malware indicators diff --git a/marl_cyborg/actions/network/ip_fragmentation.py b/netforge_rl/actions/network/ip_fragmentation.py similarity index 93% rename from marl_cyborg/actions/network/ip_fragmentation.py rename to netforge_rl/actions/network/ip_fragmentation.py index 22a37e9..38c0f1b 100644 --- a/marl_cyborg/actions/network/ip_fragmentation.py +++ b/netforge_rl/actions/network/ip_fragmentation.py @@ -1,9 +1,9 @@ from typing import TYPE_CHECKING -from marl_cyborg.core.action import BaseAction, ActionEffect +from netforge_rl.core.action import BaseAction, ActionEffect if TYPE_CHECKING: - from marl_cyborg.core.state import GlobalNetworkState + from netforge_rl.core.state import GlobalNetworkState class IPFragmentationAction(BaseAction): diff --git a/marl_cyborg/actions/red/coordination.py b/netforge_rl/actions/red/coordination.py similarity index 93% rename from marl_cyborg/actions/red/coordination.py rename to netforge_rl/actions/red/coordination.py index 5253e62..915dda2 100644 --- a/marl_cyborg/actions/red/coordination.py +++ b/netforge_rl/actions/red/coordination.py @@ -1,6 +1,9 @@ -from marl_cyborg.core.action import BaseAction, ActionEffect +from netforge_rl.core.action import BaseAction, ActionEffect +from netforge_rl.core.registry import action_registry + +@action_registry.register('red_commander', 3) class ShareIntelligence(BaseAction): """Explicitly shares the current agent's 'Fog of War' knowledge graph with diff --git a/marl_cyborg/actions/red/exploits.py b/netforge_rl/actions/red/exploits.py similarity index 75% rename from marl_cyborg/actions/red/exploits.py rename to netforge_rl/actions/red/exploits.py index bb7bd9a..1e13d61 100644 --- a/marl_cyborg/actions/red/exploits.py +++ b/netforge_rl/actions/red/exploits.py @@ -1,6 +1,10 @@ -from marl_cyborg.core.action import BaseAction, ActionEffect +from netforge_rl.core.action import BaseAction, ActionEffect +from netforge_rl.core.registry import action_registry +from netforge_rl.core.commands import UpdateHostPrivilegeCommand, EstablishSessionCommand + +@action_registry.register('red_operator', 0) class ExploitRemoteService(BaseAction): """Attempts to weaponize a generic remote code execution vulnerability on a @@ -16,7 +20,13 @@ class ExploitRemoteService(BaseAction): """ def __init__(self, agent_id: str, target_ip: str, port: int = 80): - super().__init__(agent_id, target_ip=target_ip) + super().__init__( + agent_id, + target_ip=target_ip, + cost=5, + duration=5, + required_prior_state='DiscoverNetworkServices' + ) self.port = port def validate(self, global_state) -> bool: @@ -30,53 +40,43 @@ def validate(self, global_state) -> bool: Returns: bool: True if physically routable, False if blocked by the physics engine. """ + if not super().validate(global_state): + return False return global_state.can_route_to(self.target_ip) - def execute(self, global_state) -> ActionEffect: - """Executes the payload impact mathematical deltas on the host's - - privilege table. Fails if no inherent vulnerabilities are found. - - Args: - global_state (GlobalNetworkState): State snapshot prior to temporal resolution. - - Returns: - ActionEffect: A structured delta mapping dictating a compromise to 'User'. - """ import random host = global_state.all_hosts.get(self.target_ip) - if host and host.vulnerabilities: - roll = random.random() - if roll < 0.15: - return ActionEffect( - success=False, - state_deltas={}, - observation_data={'exploit': 'failed silently'}, - ) - elif roll < 0.25: - return ActionEffect( - success=False, - state_deltas={f'hosts/{self.target_ip}/status': 'kernel_panic'}, - observation_data={'exploit': 'failed - kernel panic'}, - ) + if not host or not host.vulnerabilities: + return ActionEffect(success=False, state_deltas=[], observation_data={}) + + # CVSS-Weighted Stochastics (1.0 = 100% success on 10.0 CVSS, 0.2 = 20% on 2.0 CVSS) + cvss = getattr(host, 'cvss_score', 5.0) # Default average vulnerability logic + probability_of_success = cvss / 10.0 + if host.decoy == 'active' or random.random() > probability_of_success: return ActionEffect( - success=True, - state_deltas={ - f'hosts/{self.target_ip}/privilege': 'User', - f'hosts/{self.target_ip}/compromised_by': self.agent_id, - }, - observation_data={'exploit': 'success'}, + success=False, + state_deltas=[], + observation_data={'failed_exploit': self.target_ip, 'reason': 'stochastic_cvss_failure'}, ) - return ActionEffect( - success=False, - state_deltas={}, - observation_data={'exploit': 'failed - target lacks vulnerabilities'}, - ) + # Build OOP Delta List + deltas = [ + UpdateHostPrivilegeCommand(self.target_ip, 'User', compromised_by=self.agent_id), + EstablishSessionCommand(self.agent_id, self.target_ip, port=self.port) + ] + obs_data = { + 'exploit': self.target_ip, + 'status': 'User_Access_Gained', + 'active_session_established': True + } + return ActionEffect( + success=True, state_deltas=deltas, observation_data=obs_data, eta=self.duration + ) +@action_registry.register('red_operator', 3) class ExploitBlueKeep(BaseAction): """Executes the CVE-2019-0708 (BlueKeep) vulnerability against Remote @@ -90,7 +90,13 @@ class ExploitBlueKeep(BaseAction): """ def __init__(self, agent_id: str, target_ip: str): - super().__init__(agent_id, target_ip=target_ip) + super().__init__( + agent_id, + target_ip=target_ip, + cost=3, + duration=4, + required_prior_state='DiscoverNetworkServices' + ) def validate(self, global_state) -> bool: """Verifies logical network accessibility traversing through DMZs. @@ -101,6 +107,8 @@ def validate(self, global_state) -> bool: Returns: bool: True if Port 3389 routing is valid. """ + if not super().validate(global_state): + return False return global_state.can_route_to(self.target_ip) def execute(self, global_state) -> ActionEffect: @@ -150,6 +158,7 @@ def execute(self, global_state) -> ActionEffect: ) +@action_registry.register('red_operator', 4) class ExploitEternalBlue(BaseAction): """Executes the MS17-010 (EternalBlue) exploit targeting poorly configured @@ -163,7 +172,13 @@ class ExploitEternalBlue(BaseAction): """ def __init__(self, agent_id: str, target_ip: str): - super().__init__(agent_id, target_ip=target_ip) + super().__init__( + agent_id, + target_ip=target_ip, + cost=4, + duration=6, + required_prior_state='DiscoverNetworkServices' + ) def validate(self, global_state) -> bool: """Ensures target accessibility within standard MARL routing @@ -176,6 +191,8 @@ def validate(self, global_state) -> bool: Returns: bool: Evaluation boolean for execution clearance. """ + if not super().validate(global_state): + return False return global_state.can_route_to(self.target_ip) def execute(self, global_state) -> ActionEffect: @@ -225,6 +242,7 @@ def execute(self, global_state) -> ActionEffect: ) +@action_registry.register('red_operator', 5) class ExploitHTTP_RFI(BaseAction): """Simulates a Remote File Inclusion (RFI) web application attack vector @@ -239,10 +257,18 @@ class ExploitHTTP_RFI(BaseAction): """ def __init__(self, agent_id: str, target_ip: str): - super().__init__(agent_id, target_ip=target_ip) + super().__init__( + agent_id, + target_ip=target_ip, + cost=3, + duration=3, + required_prior_state='DiscoverNetworkServices' + ) def validate(self, global_state) -> bool: """Requires valid routing to the web interface.""" + if not super().validate(global_state): + return False return global_state.can_route_to(self.target_ip) def execute(self, global_state) -> ActionEffect: diff --git a/marl_cyborg/actions/red/impact.py b/netforge_rl/actions/red/impact.py similarity index 64% rename from marl_cyborg/actions/red/impact.py rename to netforge_rl/actions/red/impact.py index e6f63cf..e3ebb89 100644 --- a/marl_cyborg/actions/red/impact.py +++ b/netforge_rl/actions/red/impact.py @@ -1,6 +1,9 @@ -from marl_cyborg.core.action import BaseAction, ActionEffect +from netforge_rl.core.action import BaseAction, ActionEffect +from netforge_rl.core.registry import action_registry + +@action_registry.register('red_operator', 2) class Impact(BaseAction): """Executes the final localized objective of the Cyber Kill Chain (e.g., @@ -46,6 +49,7 @@ def execute(self, global_state) -> ActionEffect: ) +@action_registry.register('red_operator', 8) class KillProcess(BaseAction): """ Terminates a specific process (e.g., EDR sensor) on a compromised host. @@ -77,3 +81,37 @@ def execute(self, global_state) -> ActionEffect: state_deltas={f'hosts/{self.target_ip}/edr_active': False}, observation_data={'kill_process': 'EDR blinded'}, ) + + +@action_registry.register('red_operator', 10) +class ExfiltrateData(BaseAction): + """Exfiltrates sensitive data out of a compromised node. + + Generates enormous network traffic traversing the egress point. + Susceptible to Volumetric SIEM detection based on dynamic SNR thresholds. + """ + + def __init__(self, agent_id: str, target_ip: str): + super().__init__(agent_id, target_ip=target_ip, duration=3) + + def validate(self, global_state) -> bool: + host = global_state.all_hosts.get(self.target_ip) + if not host or host.privilege not in ['User', 'Root']: + return False + return global_state.can_route_to(self.target_ip) + + def execute(self, global_state) -> ActionEffect: + from netforge_rl.core.commands import ConsumeBandwidthCommand + + host = global_state.all_hosts.get(self.target_ip) + target_subnet = host.subnet_cidr if host else 'unknown' + + deltas = [ + ConsumeBandwidthCommand(target_subnet, amount=500) + ] + + return ActionEffect( + success=True, + state_deltas=deltas, + observation_data={'action': 'exfiltrated_data_chunk'} + ) diff --git a/marl_cyborg/actions/red/privilege_escalation.py b/netforge_rl/actions/red/privilege_escalation.py similarity index 73% rename from marl_cyborg/actions/red/privilege_escalation.py rename to netforge_rl/actions/red/privilege_escalation.py index 1b5906b..984a3ec 100644 --- a/marl_cyborg/actions/red/privilege_escalation.py +++ b/netforge_rl/actions/red/privilege_escalation.py @@ -1,6 +1,9 @@ -from marl_cyborg.core.action import BaseAction, ActionEffect +from netforge_rl.core.action import BaseAction, ActionEffect +from netforge_rl.core.registry import action_registry + +@action_registry.register('red_operator', 1) class PrivilegeEscalate(BaseAction): """Executes a generic local privilege escalation exploit on a compromised @@ -50,6 +53,7 @@ def execute(self, global_state) -> ActionEffect: ) +@action_registry.register('red_operator', 6) class JuicyPotato(BaseAction): """Simulates the JuicyPotato local privilege escalation vector leveraging @@ -103,6 +107,7 @@ def execute(self, global_state) -> ActionEffect: ) +@action_registry.register('red_operator', 7) class V4L2KernelExploit(BaseAction): """Executes a specific kernel-level vulnerability via Video4Linux (V4L2) on @@ -156,3 +161,49 @@ def execute(self, global_state) -> ActionEffect: }, observation_data={'privilege': 'V4L2 Kernel escalated'}, ) + + +@action_registry.register('red_operator', 9) +class PassTheHash(BaseAction): + """Executes a lateral movement attack bypassing authentication using + Kerberos / NTLM hashes extracted from a Domain Controller. + + Args: + agent_id (str): Reference to the executing Red operator. + target_ip (str): Target IPv4 string (can be un-exploited if DC is cracked). + """ + + def __init__(self, agent_id: str, target_ip: str): + super().__init__(agent_id, target_ip=target_ip, cost=1) + + def validate(self, global_state) -> bool: + """Validates if the agent has previously Rooted ANY Domain Controller.""" + has_dc_hash = False + for host in global_state.all_hosts.values(): + if host.is_domain_controller and host.privilege in ['Root', 'SYSTEM']: + if host.compromised_by == self.agent_id: + has_dc_hash = True + break + + if not has_dc_hash: + return False + + return global_state.can_route_to(self.target_ip) + + def execute(self, global_state) -> ActionEffect: + """Applies instantaneous SYSTEM access based on Golden Ticket leverage. + + Returns: + ActionEffect: Elevated root control unconditionally on target node. + """ + from netforge_rl.core.commands import UpdateHostPrivilegeCommand + + deltas = [ + UpdateHostPrivilegeCommand(self.target_ip, 'Root', compromised_by=self.agent_id) + ] + + return ActionEffect( + success=True, + state_deltas=deltas, + observation_data={'privilege': 'Pass-The-Hash lateral pivot successful.'}, + ) diff --git a/marl_cyborg/actions/red/reconnaissance.py b/netforge_rl/actions/red/reconnaissance.py similarity index 90% rename from marl_cyborg/actions/red/reconnaissance.py rename to netforge_rl/actions/red/reconnaissance.py index ee96d2d..6a4ff8f 100644 --- a/marl_cyborg/actions/red/reconnaissance.py +++ b/netforge_rl/actions/red/reconnaissance.py @@ -1,6 +1,9 @@ -from marl_cyborg.core.action import BaseAction, ActionEffect +from netforge_rl.core.action import BaseAction, ActionEffect +from netforge_rl.core.registry import action_registry + +@action_registry.register('red_commander', 0) class NetworkScan(BaseAction): """Executes a wide network scan across a specified subnet to map active IP @@ -49,6 +52,7 @@ def execute(self, global_state) -> ActionEffect: ) +@action_registry.register('red_commander', 1) class DiscoverRemoteSystems(BaseAction): """Executes a targeted Ping Sweep against a subnet to explicitly identify @@ -108,6 +112,7 @@ def execute(self, global_state) -> ActionEffect: ) +@action_registry.register('red_commander', 2) class DiscoverNetworkServices(BaseAction): """Executes an intrusive port scan against a specific host to enumerate @@ -121,7 +126,7 @@ class DiscoverNetworkServices(BaseAction): """ def __init__(self, agent_id: str, target_ip: str): - super().__init__(agent_id, target_ip=target_ip, cost=2) + super().__init__(agent_id, target_ip=target_ip, cost=2, duration=3) def validate(self, global_state) -> bool: """Confirms target host is active and packet routing is unblocked by @@ -161,8 +166,11 @@ def execute(self, global_state) -> ActionEffect: obs_data['os'] = host.os obs_data['vulnerabilities'] = host.vulnerabilities - # Update knowledge that we scanned this host - knowledge_deltas = {f'knowledge/{self.agent_id}/{self.target_ip}': 'True'} + # Update knowledge that we scanned this host and add to history + knowledge_deltas = { + f'knowledge/{self.agent_id}/{self.target_ip}': 'True', + f'history/{self.agent_id}/DiscoverNetworkServices:{self.target_ip}': 'add' + } return ActionEffect( success=True, state_deltas=knowledge_deltas, observation_data=obs_data diff --git a/marl_cyborg/core/agent_interface.py b/netforge_rl/core/agent_interface.py similarity index 86% rename from marl_cyborg/core/agent_interface.py rename to netforge_rl/core/agent_interface.py index 262c325..016063e 100644 --- a/marl_cyborg/core/agent_interface.py +++ b/netforge_rl/core/agent_interface.py @@ -1,7 +1,7 @@ from abc import ABC, abstractmethod from typing import Optional -from marl_cyborg.core.observation import BaseObservation -from marl_cyborg.core.action import BaseAction +from netforge_rl.core.observation import BaseObservation +from netforge_rl.core.action import BaseAction class AgentInterface(ABC): diff --git a/netforge_rl/core/commands.py b/netforge_rl/core/commands.py new file mode 100644 index 0000000..b4924a2 --- /dev/null +++ b/netforge_rl/core/commands.py @@ -0,0 +1,180 @@ +from abc import ABC, abstractmethod +from typing import Any, Optional + + +class IStateDeltaCommand(ABC): + """Abstract Command interface for Object-Oriented state mutation. + Allows for decoupled physics application dynamically processed by the Resolve engine. + """ + @abstractmethod + def execute(self, global_state: Any): + pass + + @property + @abstractmethod + def target_ip(self) -> Optional[str]: + """Exposes the primary target IP so the Action Resolver can detect temporal collisions.""" + pass + + +class UpdateKnowledgeCommand(IStateDeltaCommand): + def __init__(self, agent_id: str, ip: str, value: Any = True): + self.agent_id = agent_id + self._target_ip = ip + self.value = value + + @property + def target_ip(self) -> Optional[str]: + return self._target_ip + + def execute(self, global_state: Any): + global_state.update_knowledge(self.agent_id, self.target_ip) + + +class UpdateHostPrivilegeCommand(IStateDeltaCommand): + def __init__(self, ip: str, privilege: str, compromised_by: Optional[str] = None): + self._target_ip = ip + self.privilege = privilege + self.compromised_by = compromised_by + + @property + def target_ip(self) -> Optional[str]: + return self._target_ip + + def execute(self, global_state: Any): + if self._target_ip in global_state.all_hosts: + host = global_state.all_hosts[self._target_ip] + host.privilege = self.privilege + if self.compromised_by: + host.compromised_by = self.compromised_by + + +class UpdateHostStatusCommand(IStateDeltaCommand): + def __init__(self, ip: str, status: str): + self._target_ip = ip + self.status = status + + @property + def target_ip(self) -> Optional[str]: + return self._target_ip + + def execute(self, global_state: Any): + if self._target_ip in global_state.all_hosts: + global_state.all_hosts[self._target_ip].status = self.status + + +class UpdateServiceCommand(IStateDeltaCommand): + def __init__(self, ip: str, service: str, action: str = 'remove'): + self._target_ip = ip + self.service = service + self.action = action + + @property + def target_ip(self) -> Optional[str]: + return self._target_ip + + def execute(self, global_state: Any): + if self._target_ip in global_state.all_hosts: + host = global_state.all_hosts[self._target_ip] + if self.action == 'remove' and self.service in host.services: + host.services.remove(self.service) + elif self.action == 'add' and self.service not in host.services: + host.services.append(self.service) + + +class BlockPortCommand(IStateDeltaCommand): + def __init__(self, subnet: str, port: int): + self.subnet = subnet + self.port = port + + @property + def target_ip(self) -> Optional[str]: + return None # Targets a subnet firewall, not a single node + + def execute(self, global_state: Any): + if 'global' not in global_state.firewalls: + # We import here to avoid circular dependencies if needed depending on global state structure + from netforge_rl.core.state import Firewall + global_state.firewalls['global'] = Firewall('global') + global_state.firewalls['global'].block_port(self.subnet, self.port) + + +class AddHistoryCommand(IStateDeltaCommand): + def __init__(self, agent_id: str, record: str): + self.agent_id = agent_id + self.record = record + + @property + def target_ip(self) -> Optional[str]: + return None # Targeting agent logic + + def execute(self, global_state: Any): + if self.agent_id not in global_state.action_history: + global_state.action_history[self.agent_id] = set() + global_state.action_history[self.agent_id].add(self.record) + + +class UpdateDecoyCommand(IStateDeltaCommand): + def __init__(self, ip: str, decoy_type: str): + self._target_ip = ip + self.decoy_type = decoy_type + + @property + def target_ip(self) -> Optional[str]: + return self._target_ip + + def execute(self, global_state: Any): + if self._target_ip in global_state.all_hosts: + global_state.all_hosts[self._target_ip].decoy = self.decoy_type + + +class EstablishSessionCommand(IStateDeltaCommand): + def __init__(self, agent_id: str, ip: str, port: int): + self.agent_id = agent_id + self._target_ip = ip + self.port = port + + @property + def target_ip(self) -> Optional[str]: + return self._target_ip + + def execute(self, global_state: Any): + if self.agent_id not in global_state.active_sessions: + global_state.active_sessions[self.agent_id] = [] + global_state.active_sessions[self.agent_id].append({'ip': self._target_ip, 'port': self.port}) + + +class DropSessionCommand(IStateDeltaCommand): + def __init__(self, ip: str): + self._target_ip = ip + + @property + def target_ip(self) -> Optional[str]: + return self._target_ip + + def execute(self, global_state: Any): + for agent_id, sessions in global_state.active_sessions.items(): + global_state.active_sessions[agent_id] = [s for s in sessions if s['ip'] != self._target_ip] + + +class ConsumeBandwidthCommand(IStateDeltaCommand): + def __init__(self, subnet: str, amount: int): + self.subnet = subnet + self.amount = amount + + @property + def target_ip(self) -> Optional[str]: + return None # Targets a subnet-wide telemetry pipe, not a single node + + def execute(self, global_state: Any): + if self.subnet not in global_state.subnet_bandwidth: + global_state.subnet_bandwidth[self.subnet] = 0 + + global_state.subnet_bandwidth[self.subnet] += self.amount + + # Volumetric SIEM Trigger Rule + # If any subnet spikes above 1000 units in a single tick, generate a SIEM log. + if global_state.subnet_bandwidth[self.subnet] > 1000: + volumetric_alert = {'type': 'volumetric_anomaly', 'subnet': self.subnet, 'severity': 'High'} + if volumetric_alert not in global_state.siem_log_buffer: + global_state.siem_log_buffer.append(volumetric_alert) diff --git a/netforge_rl/core/physics.py b/netforge_rl/core/physics.py new file mode 100644 index 0000000..5131bdb --- /dev/null +++ b/netforge_rl/core/physics.py @@ -0,0 +1,64 @@ +from typing import Dict, List +from netforge_rl.core.action import ActionEffect + + +class ConflictResolutionEngine: + """Strategy pattern engine defining the physical constraints of action collisions. + Mathematically resolves simultaneous temporal collisions. + """ + + @staticmethod + def resolve(effects: Dict[str, ActionEffect]) -> Dict[str, ActionEffect]: + """Core physics engine. + + Priority: Blue Defensive actions generally supersede Red Offensive actions + on the exact same network node if executed in the exact same elapsed fractional tick. + """ + red_agents = [a for a in effects if 'red' in a.lower()] + blue_agents = [a for a in effects if 'blue' in a.lower()] + + # 1. Compile all Blue defensive targets + blue_defended_nodes = {} + for blue_id in blue_agents: + eff = effects[blue_id] + if eff.success: + if isinstance(eff.state_deltas, dict): + for delta_key in eff.state_deltas.keys(): + if 'hosts/' in delta_key: + target_ip = delta_key.split('/')[1] + blue_defended_nodes[target_ip] = True + elif isinstance(eff.state_deltas, list): + for delta_obj in eff.state_deltas: + if getattr(delta_obj, 'target_ip', None): + blue_defended_nodes[delta_obj.target_ip] = True + + # 2. Evaluate Red attacks against the compiled simultaneous defenses + for red_id in red_agents: + red_eff = effects[red_id] + if not red_eff.success: + continue + + collision_detected = False + + # Check dictionary deltas + if isinstance(red_eff.state_deltas, dict): + for delta_key in list(red_eff.state_deltas.keys()): + if 'hosts/' in delta_key: + target_ip = delta_key.split('/')[1] + if target_ip in blue_defended_nodes: + collision_detected = True + break + # Check command object deltas + elif isinstance(red_eff.state_deltas, list): + for delta_obj in red_eff.state_deltas: + if getattr(delta_obj, 'target_ip', None) in blue_defended_nodes: + collision_detected = True + break + + if collision_detected: + # Nullify Red's attack effect entirely and alert the network telemetry + effects[red_id].success = False + effects[red_id].state_deltas = [] if isinstance(red_eff.state_deltas, list) else {} + effects[red_id].observation_data['alert'] = 'TEMPORAL_COLLISION_DEFENSE_SUPREMACY' + + return effects diff --git a/netforge_rl/core/registry.py b/netforge_rl/core/registry.py new file mode 100644 index 0000000..56dfa06 --- /dev/null +++ b/netforge_rl/core/registry.py @@ -0,0 +1,88 @@ +from typing import Dict, Type, Optional, Callable +import inspect + + +class ActionRegistry: + """A Factory Registry for dynamically tracking and instantiating + BaseAction subclasses without monolithic if/else blocks. + + Adheres strictly to the Open-Closed Principle. + """ + + def __init__(self): + # Maps (team, action_group_id) -> ActionClass + self._actions: Dict[str, Dict[int, Type]] = { + 'red': {}, + 'red_commander': {}, + 'blue': {}, + 'blue_commander': {} + } + + def register(self, team: str, group_id: int) -> Callable: + """Class decorator for registering an Action.""" + def decorator(cls): + if team not in self._actions: + self._actions[team] = {} + self._actions[team][group_id] = cls + return cls + return decorator + + def get_action_class(self, agent_id: str, group_id: int) -> Optional[Type]: + """Retrieves the class constructor for a specific integer offset.""" + if 'red' in agent_id.lower(): + team = 'red_commander' if 'commander' in agent_id.lower() else 'red' + else: + team = 'blue_commander' if 'commander' in agent_id.lower() else 'blue' + + return self._actions.get(team, {}).get(group_id) + + def instantiate_action(self, agent_id: str, action_data: object, target_ips: list) -> Optional[object]: + """Factory method to resolve the generic action payload to an instance. + + Supports legacy integer decoding or advanced Hierarchical MultiDiscrete + arrays: [action_type_id, target_ip_index]. + """ + if not target_ips: + target_ips = ['127.0.0.1'] + + if isinstance(action_data, (list, tuple)) or type(action_data).__name__ == 'ndarray': + # Hierarchical MultiDiscrete format + action_type_id = int(action_data[0]) + target_index = int(action_data[1]) + target_ip = target_ips[target_index % len(target_ips)] + else: + # Legacy PettingZoo flat discrete space math + action_int = int(action_data) + target_ip = target_ips[action_int % len(target_ips)] + action_group = action_int // len(target_ips) + + if 'red' in agent_id.lower(): + mod = 4 if 'commander' in agent_id.lower() else 11 + else: + mod = 5 if 'commander' in agent_id.lower() else 7 + + action_type_id = action_group % mod + + ActionCls = self.get_action_class(agent_id, action_type_id) + if not ActionCls: + return None + + # Pass required kwargs dynamically based on the action archetype + # Determine accepted arguments dynamically + sig = inspect.signature(ActionCls.__init__) + params = sig.parameters + + kwargs = {"agent_id": agent_id} + if "target_ip" in params: + kwargs["target_ip"] = target_ip + elif "target_subnet" in params: + # Approximate subnet from target_ip for actions requiring Subnets + parts = target_ip.split('.') + kwargs["target_subnet"] = f"{parts[0]}.{parts[1]}.{parts[2]}.0/24" + elif "target_agent_id" in params: + # Map target_agent_id randomly or conventionally for Coordination actions + kwargs["target_agent_id"] = "red_operator" if agent_id == "red_commander" else "red_commander" + + return ActionCls(**kwargs) + +action_registry = ActionRegistry() diff --git a/marl_cyborg/core/state.py b/netforge_rl/core/state.py similarity index 77% rename from marl_cyborg/core/state.py rename to netforge_rl/core/state.py index 65cec5e..71f61e7 100644 --- a/marl_cyborg/core/state.py +++ b/netforge_rl/core/state.py @@ -1,4 +1,4 @@ -from typing import Dict, Set +from typing import Dict, Set, Any class Host: @@ -14,6 +14,9 @@ def __init__(self, ip: str, hostname: str, subnet_cidr: str): self.os: str = 'Unknown' # OS profile assigned by NetworkGenerator self.services: list = [] # Running services (SSH, SMB, etc.) self.vulnerabilities: list = [] # CVEs present on this host + self.is_domain_controller: bool = False # Allows Pass-the-Hash if Rooted + self.human_vulnerability_score: float = 0.5 # Phishability indicator (0.0 to 1.0) + self.contains_honeytokens: bool = False # Triggers 100% confidence active deception traps def __repr__(self): return ( @@ -58,9 +61,19 @@ def __init__(self): self.agent_knowledge: Dict[str, Set[str]] = {} # Tracks remaining energy/budget for temporal action constraints self.agent_energy: Dict[str, int] = {} + # Advanced Attack Economics Constraints + self.agent_funds: Dict[str, int] = {} + self.agent_compute: Dict[str, int] = {} + self.business_downtime_score: float = 0.0 + # Tracks asynchronous execution locks (ETA system) self.agent_locked_until: Dict[str, int] = {} + self.action_history: Dict[str, set] = {} self.pending_effects: list = [] + self.siem_log_buffer: list = [] + self.current_tick: int = 0 + self.active_sessions: Dict[str, list] = {} + self.subnet_bandwidth: Dict[str, int] = {} def update_knowledge(self, agent_id: str, ip: str): """Adds an IP address to the agent's knowledge graph.""" @@ -80,12 +93,24 @@ def register_host(self, host: Host): if host.subnet_cidr in self.subnets: self.subnets[host.subnet_cidr].add_host(host) - def apply_delta(self, delta_key: str, delta_value: str): - """Dynamically mutates the network graph based on dot-notation paths. - - Example: apply_delta("hosts/10.0.0.5/status", "isolated") - Example: apply_delta("knowledge/red_agent_0/10.0.0.5", "True") + def apply_delta(self, delta_key: Any, delta_value: Any = None): + """Dynamically mutates the network graph. + + Now supports standard OOP `IStateDeltaCommand` objects executing their + own state mutations, while retaining legacy string-path parsing for compatibility. """ + # Command Pattern Standard Execution + if hasattr(delta_key, 'execute') and callable(getattr(delta_key, 'execute')): + delta_key.execute(self) + return + + # Legacy String parsing (Deprecation Path) + if not isinstance(delta_key, str): + from netforge_rl.core.commands import IStateDeltaCommand + if isinstance(delta_key, IStateDeltaCommand): + delta_key.execute(self) + return + parts = delta_key.split('/') if parts[0] == 'hosts' and len(parts) == 3: ip = parts[1] @@ -109,6 +134,13 @@ def apply_delta(self, delta_key: str, delta_value: str): self.firewalls['global'] = Firewall('global') self.firewalls['global'].block_port(subnet, port) + elif parts[0] == 'history' and len(parts) == 3: + agent_id = parts[1] + record = parts[2] + if agent_id not in self.action_history: + self.action_history[agent_id] = set() + self.action_history[agent_id].add(record) + def can_route_to(self, target_ip: str, port: int = None) -> bool: """Evaluates complex network topology rules for routing reachability and explicit firewall port blocks. diff --git a/netforge_rl/environment/__init__.py b/netforge_rl/environment/__init__.py new file mode 100644 index 0000000..06d267d --- /dev/null +++ b/netforge_rl/environment/__init__.py @@ -0,0 +1,4 @@ +from .base_env import BaseNetForgeRLEnv +from .parallel_env import NetForgeRLEnv + +__all__ = ['BaseNetForgeRLEnv', 'NetForgeRLEnv'] diff --git a/marl_cyborg/environment/base_env.py b/netforge_rl/environment/base_env.py similarity index 81% rename from marl_cyborg/environment/base_env.py rename to netforge_rl/environment/base_env.py index 189fe01..6873e6c 100644 --- a/marl_cyborg/environment/base_env.py +++ b/netforge_rl/environment/base_env.py @@ -4,7 +4,7 @@ from typing import Dict, Tuple, Any -class BaseMarlCyborg(ParallelEnv, abc.ABC): +class BaseNetForgeRLEnv(ParallelEnv, abc.ABC): """Abstract Base Class for all Continuous-Time MARL environments in CybORG. This guarantees that future environments (e.g., custom network @@ -44,10 +44,3 @@ def step( """ pass - @abc.abstractmethod - def _resolve_conflicts(self, intended_effects: Dict[str, Any]) -> Dict[str, Any]: - """Sub-classes must implement their own conflict resolution metric for - - simultaneous collisions. - """ - pass diff --git a/marl_cyborg/environment/pcap_synthesizer.py b/netforge_rl/environment/pcap_synthesizer.py similarity index 99% rename from marl_cyborg/environment/pcap_synthesizer.py rename to netforge_rl/environment/pcap_synthesizer.py index a44a92d..ded8c16 100644 --- a/marl_cyborg/environment/pcap_synthesizer.py +++ b/netforge_rl/environment/pcap_synthesizer.py @@ -22,7 +22,7 @@ class PCAPSynthesizer: - """Translates abstract RL actions (marl_cyborg_v3) into modeled Scapy + """Translates abstract RL actions (netforge_rl_v3) into modeled Scapy packets for offline IDS ML model training. diff --git a/marl_cyborg/scenarios/apt_espionage.py b/netforge_rl/scenarios/apt_espionage.py similarity index 94% rename from marl_cyborg/scenarios/apt_espionage.py rename to netforge_rl/scenarios/apt_espionage.py index da2dea1..48ba6a0 100644 --- a/marl_cyborg/scenarios/apt_espionage.py +++ b/netforge_rl/scenarios/apt_espionage.py @@ -1,9 +1,9 @@ from typing import TYPE_CHECKING, Dict -from marl_cyborg.scenarios.base_scenario import BaseScenario +from netforge_rl.scenarios.base_scenario import BaseScenario if TYPE_CHECKING: - from marl_cyborg.core.state import GlobalNetworkState - from marl_cyborg.core.action import ActionEffect + from netforge_rl.core.state import GlobalNetworkState + from netforge_rl.core.action import ActionEffect class AptEspionageScenario(BaseScenario): diff --git a/marl_cyborg/scenarios/base_scenario.py b/netforge_rl/scenarios/base_scenario.py similarity index 88% rename from marl_cyborg/scenarios/base_scenario.py rename to netforge_rl/scenarios/base_scenario.py index fd9ef8e..911d0ac 100644 --- a/marl_cyborg/scenarios/base_scenario.py +++ b/netforge_rl/scenarios/base_scenario.py @@ -2,8 +2,8 @@ from typing import TYPE_CHECKING, Dict if TYPE_CHECKING: - from marl_cyborg.core.state import GlobalNetworkState - from marl_cyborg.core.action import ActionEffect + from netforge_rl.core.state import GlobalNetworkState + from netforge_rl.core.action import ActionEffect class BaseScenario(ABC): From 103992b99a569598dd9e4e5f0340bf4e05e02062 Mon Sep 17 00:00:00 2001 From: Igor Jankowski Date: Mon, 30 Mar 2026 20:05:19 +0200 Subject: [PATCH 08/10] chore: map build configurations and changelog to Phase 5 architecture --- changelog.md | 4 ++-- pyproject.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/changelog.md b/changelog.md index 5778ca5..b1d303c 100644 --- a/changelog.md +++ b/changelog.md @@ -1,11 +1,11 @@ # Changelog -All notable changes to the `marl_cyborg` project will be documented in this file. +All notable changes to the `netforge_rl` project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). ## [3.0.0] - 2026-02-28 ### Added -- **PettingZoo API Core Integration**: Created `marl_cyborg/environment/parallel_env.py` substituting the legacy wrapper paradigm with `pettingzoo.ParallelEnv`, explicitly allowing concurrent multi-agent action steps. +- **PettingZoo API Core Integration**: Created `netforge_rl/environment/parallel_env.py` substituting the legacy wrapper paradigm with `pettingzoo.ParallelEnv`, explicitly allowing concurrent multi-agent action steps. - **Gymnasium Box Compatibility**: All spaces natively map to `gymnasium.spaces` APIs instead of arbitrary nested classes. - **`BaseAction` / `BaseObservation` Abstract Hierarchy**: Abstracted action mutation. Cyber attacks no longer edit the state directly, but rather return a theoretical JSON impact via `ActionEffect` allowing the environment to resolve simultaneity conflicts natively. - **Python 3.12 Support (Native)**: Enforced via the new `pyproject.toml` definition. diff --git a/pyproject.toml b/pyproject.toml index 92b9245..3332c22 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,7 +3,7 @@ requires = ["setuptools>=61.0.0", "wheel"] build-backend = "setuptools.build_meta" [project] -name = "marl_cyborg" +name = "netforge_rl" version = "3.0.0" description = "Multi-Agent Cybersecurity Simulator based on CybORG" authors = [ From 1764fb851db32064e5ed4c850fd3e5203b4261a5 Mon Sep 17 00:00:00 2001 From: Igor Jankowski Date: Mon, 30 Mar 2026 20:20:34 +0200 Subject: [PATCH 09/10] Ruff reformat --- netforge_rl/actions/__init__.py | 5 ++++- netforge_rl/actions/blue/mitigation.py | 1 - netforge_rl/actions/red/__init__.py | 2 ++ netforge_rl/core/physics.py | 2 +- netforge_rl/environment/parallel_env.py | 1 - netforge_rl/models/recurrent_mask_model.py | 3 +-- 6 files changed, 8 insertions(+), 6 deletions(-) diff --git a/netforge_rl/actions/__init__.py b/netforge_rl/actions/__init__.py index 48f5125..6796da0 100644 --- a/netforge_rl/actions/__init__.py +++ b/netforge_rl/actions/__init__.py @@ -56,8 +56,11 @@ 'KillProcess', 'ShareIntelligence', 'OverloadPLC', + 'SecurityAwarenessTraining', + 'DeployHoneytoken', ] from .blue import SecurityAwarenessTraining - from .blue import DeployHoneytoken + +__all__.extend(['SecurityAwarenessTraining', 'DeployHoneytoken']) diff --git a/netforge_rl/actions/blue/mitigation.py b/netforge_rl/actions/blue/mitigation.py index e800d75..0085a93 100644 --- a/netforge_rl/actions/blue/mitigation.py +++ b/netforge_rl/actions/blue/mitigation.py @@ -1,7 +1,6 @@ from netforge_rl.core.action import BaseAction, ActionEffect from netforge_rl.core.registry import action_registry from netforge_rl.core.commands import UpdateHostStatusCommand, DropSessionCommand, BlockPortCommand -from netforge_rl.core.commands import UpdateHostStatusCommand, DropSessionCommand, BlockPortCommand diff --git a/netforge_rl/actions/red/__init__.py b/netforge_rl/actions/red/__init__.py index 5e0b35f..ee3063b 100644 --- a/netforge_rl/actions/red/__init__.py +++ b/netforge_rl/actions/red/__init__.py @@ -25,6 +25,8 @@ 'KillProcess', 'ShareIntelligence', 'OverloadPLC', + 'SpearPhishing', ] from .social_engineering import SpearPhishing + diff --git a/netforge_rl/core/physics.py b/netforge_rl/core/physics.py index 5131bdb..29c9b4b 100644 --- a/netforge_rl/core/physics.py +++ b/netforge_rl/core/physics.py @@ -1,4 +1,4 @@ -from typing import Dict, List +from typing import Dict from netforge_rl.core.action import ActionEffect diff --git a/netforge_rl/environment/parallel_env.py b/netforge_rl/environment/parallel_env.py index 267db11..8d936cf 100644 --- a/netforge_rl/environment/parallel_env.py +++ b/netforge_rl/environment/parallel_env.py @@ -7,7 +7,6 @@ from netforge_rl.core.registry import action_registry from netforge_rl.core.physics import ConflictResolutionEngine from netforge_rl.environment.base_env import BaseNetForgeRLEnv -import netforge_rl.actions # Triggers decorator registration from netforge_rl.topologies.network_generator import NetworkGenerator from netforge_rl.agents.green_agent import GreenAgent diff --git a/netforge_rl/models/recurrent_mask_model.py b/netforge_rl/models/recurrent_mask_model.py index edc1fe3..f8ea0f3 100644 --- a/netforge_rl/models/recurrent_mask_model.py +++ b/netforge_rl/models/recurrent_mask_model.py @@ -1,11 +1,10 @@ -import numpy as np import torch from torch import nn from ray.rllib.models.torch.recurrent_net import RecurrentNetwork as TorchRNN from ray.rllib.utils.annotations import override from ray.rllib.utils.typing import ModelConfigDict, TensorType -from typing import Dict, List, Tuple +from typing import List, Tuple class MaskedLSTMModel(TorchRNN, nn.Module): From 3247406664ebbe59e87975c8a8325c1faae6ea6d Mon Sep 17 00:00:00 2001 From: Igor Jankowski Date: Mon, 30 Mar 2026 20:25:55 +0200 Subject: [PATCH 10/10] Ruff reformat --- netforge_rl/actions/blue/__init__.py | 9 +- netforge_rl/actions/blue/analysis.py | 1 - netforge_rl/actions/blue/deception.py | 20 ++-- netforge_rl/actions/blue/mitigation.py | 28 ++--- netforge_rl/actions/red/__init__.py | 1 - netforge_rl/actions/red/coordination.py | 1 - netforge_rl/actions/red/exploits.py | 67 ++++++----- netforge_rl/actions/red/impact.py | 17 ++- netforge_rl/actions/red/kinetic.py | 21 ++-- .../actions/red/privilege_escalation.py | 11 +- netforge_rl/actions/red/reconnaissance.py | 3 +- netforge_rl/actions/red/social_engineering.py | 50 +++++--- netforge_rl/agents/green_agent.py | 42 ++++--- netforge_rl/core/action.py | 14 ++- netforge_rl/core/commands.py | 20 +++- netforge_rl/core/observation.py | 18 ++- netforge_rl/core/physics.py | 12 +- netforge_rl/core/registry.py | 54 +++++---- netforge_rl/core/state.py | 15 ++- netforge_rl/environment/base_env.py | 1 - netforge_rl/environment/parallel_env.py | 111 ++++++++++++------ netforge_rl/models/recurrent_mask_model.py | 27 +++-- netforge_rl/scenarios/ransomware.py | 18 ++- netforge_rl/topologies/network_generator.py | 66 +++++++---- 24 files changed, 388 insertions(+), 239 deletions(-) diff --git a/netforge_rl/actions/blue/__init__.py b/netforge_rl/actions/blue/__init__.py index 1e9deeb..5f82275 100644 --- a/netforge_rl/actions/blue/__init__.py +++ b/netforge_rl/actions/blue/__init__.py @@ -7,7 +7,14 @@ SecurityAwarenessTraining, ) from .analysis import Monitor, Analyze -from .deception import DeployDecoy, DecoyApache, DecoySSHD, DecoyTomcat, Misinform, DeployHoneytoken +from .deception import ( + DeployDecoy, + DecoyApache, + DecoySSHD, + DecoyTomcat, + Misinform, + DeployHoneytoken, +) __all__ = [ 'IsolateHost', diff --git a/netforge_rl/actions/blue/analysis.py b/netforge_rl/actions/blue/analysis.py index 7862c56..13b8f94 100644 --- a/netforge_rl/actions/blue/analysis.py +++ b/netforge_rl/actions/blue/analysis.py @@ -2,7 +2,6 @@ from netforge_rl.core.registry import action_registry - @action_registry.register('blue_operator', 2) class Monitor(BaseAction): """Deploys active traffic analysis scanning on a specific subnet or host. diff --git a/netforge_rl/actions/blue/deception.py b/netforge_rl/actions/blue/deception.py index a07c1ed..545d6fc 100644 --- a/netforge_rl/actions/blue/deception.py +++ b/netforge_rl/actions/blue/deception.py @@ -2,7 +2,6 @@ from netforge_rl.core.registry import action_registry - @action_registry.register('blue_commander', 0) class DeployDecoy(BaseAction): """Deploys a generic high-interaction honeypot/decoy service to a target @@ -210,24 +209,21 @@ def execute(self, global_state) -> ActionEffect: }, ) + @action_registry.register('blue_commander', 5) class DeployHoneytoken(BaseAction): """ Injects fake, highly-monitored credentials into the memory space of a real host. - If a Red agent successfully compromises this host and attempts to perform - post-exploitation (e.g., Pass-the-Hash, credential dumping), they ingest the - Honeytoken instead. This triggers an immediate, 100% confidence SIEM Alert + If a Red agent successfully compromises this host and attempts to perform + post-exploitation (e.g., Pass-the-Hash, credential dumping), they ingest the + Honeytoken instead. This triggers an immediate, 100% confidence SIEM Alert exposing the Red agent's exact location natively. """ def __init__(self, agent_id: str, target_ip: str): super().__init__( - agent_id, - target_ip=target_ip, - cost=5, - financial_cost=50, - duration=1 + agent_id, target_ip=target_ip, cost=5, financial_cost=50, duration=1 ) def validate(self, global_state) -> bool: @@ -237,6 +233,8 @@ def execute(self, global_state) -> ActionEffect: return ActionEffect( success=True, state_deltas={f'hosts/{self.target_ip}/contains_honeytokens': True}, - observation_data={'alert': f'Honeytokens actively deployed in RAM on {self.target_ip}.'}, - eta=self.duration + observation_data={ + 'alert': f'Honeytokens actively deployed in RAM on {self.target_ip}.' + }, + eta=self.duration, ) diff --git a/netforge_rl/actions/blue/mitigation.py b/netforge_rl/actions/blue/mitigation.py index 0085a93..e5aaa06 100644 --- a/netforge_rl/actions/blue/mitigation.py +++ b/netforge_rl/actions/blue/mitigation.py @@ -1,7 +1,10 @@ from netforge_rl.core.action import BaseAction, ActionEffect from netforge_rl.core.registry import action_registry -from netforge_rl.core.commands import UpdateHostStatusCommand, DropSessionCommand, BlockPortCommand - +from netforge_rl.core.commands import ( + UpdateHostStatusCommand, + DropSessionCommand, + BlockPortCommand, +) @action_registry.register('blue_operator', 0) @@ -47,7 +50,7 @@ def execute(self, global_state) -> ActionEffect: success=True, state_deltas=[ UpdateHostStatusCommand(self.target_ip, 'isolated'), - DropSessionCommand(self.target_ip) + DropSessionCommand(self.target_ip), ], observation_data={'alert': 'Host isolated securely.'}, ) @@ -224,19 +227,18 @@ def execute(self, global_state) -> ActionEffect: """ return ActionEffect( success=True, - state_deltas=[ - BlockPortCommand(self.target_ip, self.port) - ], + state_deltas=[BlockPortCommand(self.target_ip, self.port)], observation_data={ 'alert': f'ACL configured: Drop Port {self.port} to {self.target_ip}' }, ) + @action_registry.register('blue_operator', 7) class SecurityAwarenessTraining(BaseAction): """ Deploys rapid, intensive anti-phishing training to a targeted subnet. - + Temporarily slashes the `human_vulnerability_score` of all users in the subset, drastically lowering the success rate of Red Team SpearPhishing campaigns. Costs significant Financial budget due to operational lost time. @@ -244,11 +246,7 @@ class SecurityAwarenessTraining(BaseAction): def __init__(self, agent_id: str, target_subnet: str): super().__init__( - agent_id, - target_ip=target_subnet, - cost=2, - financial_cost=2000, - duration=3 + agent_id, target_ip=target_subnet, cost=2, financial_cost=2000, duration=3 ) def validate(self, global_state) -> bool: @@ -270,6 +268,8 @@ def execute(self, global_state) -> ActionEffect: return ActionEffect( success=True, state_deltas=deltas, - observation_data={'alert': f'Security Awareness Training completed on {self.target_ip}. Vulnerability drastically lowered.'}, - eta=self.duration + observation_data={ + 'alert': f'Security Awareness Training completed on {self.target_ip}. Vulnerability drastically lowered.' + }, + eta=self.duration, ) diff --git a/netforge_rl/actions/red/__init__.py b/netforge_rl/actions/red/__init__.py index ee3063b..5e754e1 100644 --- a/netforge_rl/actions/red/__init__.py +++ b/netforge_rl/actions/red/__init__.py @@ -29,4 +29,3 @@ ] from .social_engineering import SpearPhishing - diff --git a/netforge_rl/actions/red/coordination.py b/netforge_rl/actions/red/coordination.py index 915dda2..6c4b15f 100644 --- a/netforge_rl/actions/red/coordination.py +++ b/netforge_rl/actions/red/coordination.py @@ -2,7 +2,6 @@ from netforge_rl.core.registry import action_registry - @action_registry.register('red_commander', 3) class ShareIntelligence(BaseAction): """Explicitly shares the current agent's 'Fog of War' knowledge graph with diff --git a/netforge_rl/actions/red/exploits.py b/netforge_rl/actions/red/exploits.py index 1e13d61..786eee0 100644 --- a/netforge_rl/actions/red/exploits.py +++ b/netforge_rl/actions/red/exploits.py @@ -1,7 +1,9 @@ from netforge_rl.core.action import BaseAction, ActionEffect from netforge_rl.core.registry import action_registry -from netforge_rl.core.commands import UpdateHostPrivilegeCommand, EstablishSessionCommand - +from netforge_rl.core.commands import ( + UpdateHostPrivilegeCommand, + EstablishSessionCommand, +) @action_registry.register('red_operator', 0) @@ -21,11 +23,11 @@ class ExploitRemoteService(BaseAction): def __init__(self, agent_id: str, target_ip: str, port: int = 80): super().__init__( - agent_id, - target_ip=target_ip, - cost=5, - duration=5, - required_prior_state='DiscoverNetworkServices' + agent_id, + target_ip=target_ip, + cost=5, + duration=5, + required_prior_state='DiscoverNetworkServices', ) self.port = port @@ -43,6 +45,7 @@ def validate(self, global_state) -> bool: if not super().validate(global_state): return False return global_state.can_route_to(self.target_ip) + def execute(self, global_state) -> ActionEffect: import random @@ -58,24 +61,34 @@ def execute(self, global_state) -> ActionEffect: return ActionEffect( success=False, state_deltas=[], - observation_data={'failed_exploit': self.target_ip, 'reason': 'stochastic_cvss_failure'}, + observation_data={ + 'failed_exploit': self.target_ip, + 'reason': 'stochastic_cvss_failure', + }, ) # Build OOP Delta List deltas = [ - UpdateHostPrivilegeCommand(self.target_ip, 'User', compromised_by=self.agent_id), - EstablishSessionCommand(self.agent_id, self.target_ip, port=self.port) + UpdateHostPrivilegeCommand( + self.target_ip, 'User', compromised_by=self.agent_id + ), + EstablishSessionCommand(self.agent_id, self.target_ip, port=self.port), ] obs_data = { 'exploit': self.target_ip, 'status': 'User_Access_Gained', - 'active_session_established': True + 'active_session_established': True, } return ActionEffect( - success=True, state_deltas=deltas, observation_data=obs_data, eta=self.duration + success=True, + state_deltas=deltas, + observation_data=obs_data, + eta=self.duration, ) + + @action_registry.register('red_operator', 3) class ExploitBlueKeep(BaseAction): """Executes the CVE-2019-0708 (BlueKeep) vulnerability against Remote @@ -91,11 +104,11 @@ class ExploitBlueKeep(BaseAction): def __init__(self, agent_id: str, target_ip: str): super().__init__( - agent_id, - target_ip=target_ip, - cost=3, - duration=4, - required_prior_state='DiscoverNetworkServices' + agent_id, + target_ip=target_ip, + cost=3, + duration=4, + required_prior_state='DiscoverNetworkServices', ) def validate(self, global_state) -> bool: @@ -173,11 +186,11 @@ class ExploitEternalBlue(BaseAction): def __init__(self, agent_id: str, target_ip: str): super().__init__( - agent_id, - target_ip=target_ip, - cost=4, - duration=6, - required_prior_state='DiscoverNetworkServices' + agent_id, + target_ip=target_ip, + cost=4, + duration=6, + required_prior_state='DiscoverNetworkServices', ) def validate(self, global_state) -> bool: @@ -258,11 +271,11 @@ class ExploitHTTP_RFI(BaseAction): def __init__(self, agent_id: str, target_ip: str): super().__init__( - agent_id, - target_ip=target_ip, - cost=3, - duration=3, - required_prior_state='DiscoverNetworkServices' + agent_id, + target_ip=target_ip, + cost=3, + duration=3, + required_prior_state='DiscoverNetworkServices', ) def validate(self, global_state) -> bool: diff --git a/netforge_rl/actions/red/impact.py b/netforge_rl/actions/red/impact.py index e3ebb89..604a730 100644 --- a/netforge_rl/actions/red/impact.py +++ b/netforge_rl/actions/red/impact.py @@ -2,7 +2,6 @@ from netforge_rl.core.registry import action_registry - @action_registry.register('red_operator', 2) class Impact(BaseAction): """Executes the final localized objective of the Cyber Kill Chain (e.g., @@ -86,8 +85,8 @@ def execute(self, global_state) -> ActionEffect: @action_registry.register('red_operator', 10) class ExfiltrateData(BaseAction): """Exfiltrates sensitive data out of a compromised node. - - Generates enormous network traffic traversing the egress point. + + Generates enormous network traffic traversing the egress point. Susceptible to Volumetric SIEM detection based on dynamic SNR thresholds. """ @@ -102,16 +101,14 @@ def validate(self, global_state) -> bool: def execute(self, global_state) -> ActionEffect: from netforge_rl.core.commands import ConsumeBandwidthCommand - + host = global_state.all_hosts.get(self.target_ip) target_subnet = host.subnet_cidr if host else 'unknown' - - deltas = [ - ConsumeBandwidthCommand(target_subnet, amount=500) - ] - + + deltas = [ConsumeBandwidthCommand(target_subnet, amount=500)] + return ActionEffect( success=True, state_deltas=deltas, - observation_data={'action': 'exfiltrated_data_chunk'} + observation_data={'action': 'exfiltrated_data_chunk'}, ) diff --git a/netforge_rl/actions/red/kinetic.py b/netforge_rl/actions/red/kinetic.py index 4c150c3..f70bb2e 100644 --- a/netforge_rl/actions/red/kinetic.py +++ b/netforge_rl/actions/red/kinetic.py @@ -1,6 +1,7 @@ from netforge_rl.core.action import BaseAction, ActionEffect from netforge_rl.core.registry import action_registry + @action_registry.register('red_operator', 20) class OverloadPLC(BaseAction): """Initiates a devastating Kinetic Impact on a compromised Cyber-Physical OT Node. @@ -15,23 +16,23 @@ class OverloadPLC(BaseAction): def __init__(self, agent_id: str, target_ip: str): super().__init__( - agent_id, - target_ip=target_ip, + agent_id, + target_ip=target_ip, cost=20, # High Energy financial_cost=1000, # High Financial Requirement - duration=10, - required_prior_state='DiscoverNetworkServices' + duration=10, + required_prior_state='DiscoverNetworkServices', ) def validate(self, global_state) -> bool: """Ensures the target exists, is routeable, and is ACTUALLY an OT device.""" if not super().validate(global_state): return False - + host = global_state.all_hosts.get(self.target_ip) if not host: return False - + # Target must be physically OT Infrastructure if host.os != 'PLC_Firmware' or host.subnet_cidr != '10.0.99.0/24': return False @@ -50,26 +51,26 @@ def execute(self, global_state) -> ActionEffect: return ActionEffect(success=False, state_deltas={}, observation_data={}) current_temperature = getattr(host, 'temperature', 50.0) - + # Inject rapid thermal escalation +90 to +150 delta spike = random.uniform(90.0, 150.0) new_temp = current_temperature + spike deltas = { f'hosts/{self.target_ip}/temperature': new_temp, - f'hosts/{self.target_ip}/system_integrity': 'kinetic_destruction' + f'hosts/{self.target_ip}/system_integrity': 'kinetic_destruction', } # Stuxnet-level alert severity for the Blue Team SIEM obs_data = { 'action': 'overload_plc', 'status': 'kinetic_impact_achieved', - 'terminal_temperature': new_temp + 'terminal_temperature': new_temp, } return ActionEffect( success=True, state_deltas=deltas, observation_data=obs_data, - eta=self.duration + eta=self.duration, ) diff --git a/netforge_rl/actions/red/privilege_escalation.py b/netforge_rl/actions/red/privilege_escalation.py index 984a3ec..9e195d0 100644 --- a/netforge_rl/actions/red/privilege_escalation.py +++ b/netforge_rl/actions/red/privilege_escalation.py @@ -2,7 +2,6 @@ from netforge_rl.core.registry import action_registry - @action_registry.register('red_operator', 1) class PrivilegeEscalate(BaseAction): """Executes a generic local privilege escalation exploit on a compromised @@ -165,7 +164,7 @@ def execute(self, global_state) -> ActionEffect: @action_registry.register('red_operator', 9) class PassTheHash(BaseAction): - """Executes a lateral movement attack bypassing authentication using + """Executes a lateral movement attack bypassing authentication using Kerberos / NTLM hashes extracted from a Domain Controller. Args: @@ -184,10 +183,10 @@ def validate(self, global_state) -> bool: if host.compromised_by == self.agent_id: has_dc_hash = True break - + if not has_dc_hash: return False - + return global_state.can_route_to(self.target_ip) def execute(self, global_state) -> ActionEffect: @@ -199,7 +198,9 @@ def execute(self, global_state) -> ActionEffect: from netforge_rl.core.commands import UpdateHostPrivilegeCommand deltas = [ - UpdateHostPrivilegeCommand(self.target_ip, 'Root', compromised_by=self.agent_id) + UpdateHostPrivilegeCommand( + self.target_ip, 'Root', compromised_by=self.agent_id + ) ] return ActionEffect( diff --git a/netforge_rl/actions/red/reconnaissance.py b/netforge_rl/actions/red/reconnaissance.py index 6a4ff8f..00ef012 100644 --- a/netforge_rl/actions/red/reconnaissance.py +++ b/netforge_rl/actions/red/reconnaissance.py @@ -2,7 +2,6 @@ from netforge_rl.core.registry import action_registry - @action_registry.register('red_commander', 0) class NetworkScan(BaseAction): """Executes a wide network scan across a specified subnet to map active IP @@ -169,7 +168,7 @@ def execute(self, global_state) -> ActionEffect: # Update knowledge that we scanned this host and add to history knowledge_deltas = { f'knowledge/{self.agent_id}/{self.target_ip}': 'True', - f'history/{self.agent_id}/DiscoverNetworkServices:{self.target_ip}': 'add' + f'history/{self.agent_id}/DiscoverNetworkServices:{self.target_ip}': 'add', } return ActionEffect( diff --git a/netforge_rl/actions/red/social_engineering.py b/netforge_rl/actions/red/social_engineering.py index 93c4cd6..1f763b9 100644 --- a/netforge_rl/actions/red/social_engineering.py +++ b/netforge_rl/actions/red/social_engineering.py @@ -1,15 +1,19 @@ import random from netforge_rl.core.action import BaseAction, ActionEffect -from netforge_rl.core.commands import EstablishSessionCommand, UpdateHostPrivilegeCommand +from netforge_rl.core.commands import ( + EstablishSessionCommand, + UpdateHostPrivilegeCommand, +) from netforge_rl.core.registry import action_registry + @action_registry.register('red_operator', 21) class SpearPhishing(BaseAction): """Executes a targeted Social Engineering campaign against a Corporate End-User. Unlike standard Exploits, SpearPhishing leverages email protocols and bypasses perimeter firewalls and DMZ routing constraints entirely. Its success probability - is purely dictated by the `human_vulnerability_score` of the human operator + is purely dictated by the `human_vulnerability_score` of the human operator assigned to the generated Endpoint, simulating clicks on malicious attachments. Args: @@ -19,48 +23,58 @@ class SpearPhishing(BaseAction): def __init__(self, agent_id: str, target_ip: str): super().__init__( - agent_id, - target_ip=target_ip, - cost=2, + agent_id, + target_ip=target_ip, + cost=2, financial_cost=50, # Costs minor operational budget to purchase domain infrastructure duration=15, # High duration (waiting for users to organically check email) - required_prior_state=None # Can be shot blindly without structural discovery + required_prior_state=None, # Can be shot blindly without structural discovery ) def validate(self, global_state) -> bool: """Overrides parent zone constraints to simulate out-of-band email protocol delivery.""" if not self.target_ip or self.target_ip not in global_state.all_hosts: return False - + host = global_state.all_hosts[self.target_ip] # Nobody reads emails on PLCs or dedicated Servers; restricted to Windows Endpoints here if 'Windows' not in getattr(host, 'os', ''): return False - + return True def execute(self, global_state) -> ActionEffect: host = global_state.all_hosts.get(self.target_ip) - + # Pull the phishability score generated procedurally phish_chance = getattr(host, 'human_vulnerability_score', 0.1) if random.random() > phish_chance: return ActionEffect( - success=False, - state_deltas=[], - observation_data={'phishing': 'failed', 'reason': 'user reported suspicious email'} + success=False, + state_deltas=[], + observation_data={ + 'phishing': 'failed', + 'reason': 'user reported suspicious email', + }, ) # Build OOP Delta List granting User-level reverse shell from the clicked attachment deltas = [ - UpdateHostPrivilegeCommand(self.target_ip, 'User', compromised_by=self.agent_id), - EstablishSessionCommand(self.agent_id, self.target_ip, port=443) # Emulate C2 over HTTPS + UpdateHostPrivilegeCommand( + self.target_ip, 'User', compromised_by=self.agent_id + ), + EstablishSessionCommand( + self.agent_id, self.target_ip, port=443 + ), # Emulate C2 over HTTPS ] return ActionEffect( - success=True, - state_deltas=deltas, - observation_data={'phishing': 'success', 'status': 'C2 Session Established via user execution'}, - eta=self.duration + success=True, + state_deltas=deltas, + observation_data={ + 'phishing': 'success', + 'status': 'C2 Session Established via user execution', + }, + eta=self.duration, ) diff --git a/netforge_rl/agents/green_agent.py b/netforge_rl/agents/green_agent.py index 0d20f29..ccae9fb 100644 --- a/netforge_rl/agents/green_agent.py +++ b/netforge_rl/agents/green_agent.py @@ -9,7 +9,7 @@ class GreenAgent: It operates on a Day/Night cycle across the simulated business hours. """ - def __init__(self, agent_id: str = "green_agent_0"): + def __init__(self, agent_id: str = 'green_agent_0'): self.agent_id = agent_id def generate_noise(self, current_tick: int, global_state: Any) -> Dict[str, Any]: @@ -41,24 +41,34 @@ def generate_noise(self, current_tick: int, global_state: Any) -> Dict[str, Any] source = random.choice(hosts) target = random.choice(hosts) if source.ip != target.ip: - noise_logs.append({ - 'type': 'benign_traffic', - 'source': source.ip, - 'target': target.ip, - 'protocol': random.choice(['TCP', 'UDP', 'HTTP', 'DNS']), - 'severity': 0 - }) + noise_logs.append( + { + 'type': 'benign_traffic', + 'source': source.ip, + 'target': target.ip, + 'protocol': random.choice(['TCP', 'UDP', 'HTTP', 'DNS']), + 'severity': 0, + } + ) if random.random() < probability_of_false_positive: # Generate a false positive anomaly that could trip Blue's SIEM target = random.choice(hosts) - noise_logs.append({ - 'type': 'anomaly', - 'source': 'unknown_external', - 'target': target.ip, - 'signature': random.choice(['Failed_Login_Spike', 'Malformed_Packet', 'Suspicious_User_Agent']), - 'severity': random.randint(1, 4), - 'false_positive': True - }) + noise_logs.append( + { + 'type': 'anomaly', + 'source': 'unknown_external', + 'target': target.ip, + 'signature': random.choice( + [ + 'Failed_Login_Spike', + 'Malformed_Packet', + 'Suspicious_User_Agent', + ] + ), + 'severity': random.randint(1, 4), + 'false_positive': True, + } + ) return {'alerts': noise_logs} diff --git a/netforge_rl/core/action.py b/netforge_rl/core/action.py index ed0604a..eb60ac7 100644 --- a/netforge_rl/core/action.py +++ b/netforge_rl/core/action.py @@ -61,7 +61,7 @@ def validate(self, global_state: 'GlobalNetworkState') -> bool: if self.required_prior_state: # Check Action History state logic agent_history = global_state.action_history.get(self.agent_id, set()) - expected_record = f"{self.required_prior_state}:{self.target_ip}" + expected_record = f'{self.required_prior_state}:{self.target_ip}' if expected_record not in agent_history: return False @@ -70,8 +70,16 @@ def validate(self, global_state: 'GlobalNetworkState') -> bool: # Simple declarative Zone constraints example if 'red' in self.agent_id.lower() and host.subnet_cidr == '10.0.1.0/24': # Secure Data targets cannot be touched without pivoting via DMZ or Internal User privileges first - has_dmz = any(h.privilege in ['User', 'Root'] for h in global_state.all_hosts.values() if h.subnet_cidr == '192.168.1.0/24') - has_internal = any(h.privilege in ['User', 'Root'] for h in global_state.all_hosts.values() if h.subnet_cidr == '10.0.0.0/24') + has_dmz = any( + h.privilege in ['User', 'Root'] + for h in global_state.all_hosts.values() + if h.subnet_cidr == '192.168.1.0/24' + ) + has_internal = any( + h.privilege in ['User', 'Root'] + for h in global_state.all_hosts.values() + if h.subnet_cidr == '10.0.0.0/24' + ) if not (has_dmz or has_internal): return False diff --git a/netforge_rl/core/commands.py b/netforge_rl/core/commands.py index b4924a2..82f45f4 100644 --- a/netforge_rl/core/commands.py +++ b/netforge_rl/core/commands.py @@ -6,6 +6,7 @@ class IStateDeltaCommand(ABC): """Abstract Command interface for Object-Oriented state mutation. Allows for decoupled physics application dynamically processed by the Resolve engine. """ + @abstractmethod def execute(self, global_state: Any): pass @@ -95,6 +96,7 @@ def execute(self, global_state: Any): if 'global' not in global_state.firewalls: # We import here to avoid circular dependencies if needed depending on global state structure from netforge_rl.core.state import Firewall + global_state.firewalls['global'] = Firewall('global') global_state.firewalls['global'].block_port(self.subnet, self.port) @@ -141,7 +143,9 @@ def target_ip(self) -> Optional[str]: def execute(self, global_state: Any): if self.agent_id not in global_state.active_sessions: global_state.active_sessions[self.agent_id] = [] - global_state.active_sessions[self.agent_id].append({'ip': self._target_ip, 'port': self.port}) + global_state.active_sessions[self.agent_id].append( + {'ip': self._target_ip, 'port': self.port} + ) class DropSessionCommand(IStateDeltaCommand): @@ -154,7 +158,9 @@ def target_ip(self) -> Optional[str]: def execute(self, global_state: Any): for agent_id, sessions in global_state.active_sessions.items(): - global_state.active_sessions[agent_id] = [s for s in sessions if s['ip'] != self._target_ip] + global_state.active_sessions[agent_id] = [ + s for s in sessions if s['ip'] != self._target_ip + ] class ConsumeBandwidthCommand(IStateDeltaCommand): @@ -169,12 +175,16 @@ def target_ip(self) -> Optional[str]: def execute(self, global_state: Any): if self.subnet not in global_state.subnet_bandwidth: global_state.subnet_bandwidth[self.subnet] = 0 - + global_state.subnet_bandwidth[self.subnet] += self.amount - + # Volumetric SIEM Trigger Rule # If any subnet spikes above 1000 units in a single tick, generate a SIEM log. if global_state.subnet_bandwidth[self.subnet] > 1000: - volumetric_alert = {'type': 'volumetric_anomaly', 'subnet': self.subnet, 'severity': 'High'} + volumetric_alert = { + 'type': 'volumetric_anomaly', + 'subnet': self.subnet, + 'severity': 'High', + } if volumetric_alert not in global_state.siem_log_buffer: global_state.siem_log_buffer.append(volumetric_alert) diff --git a/netforge_rl/core/observation.py b/netforge_rl/core/observation.py index fdc2e8c..728422a 100644 --- a/netforge_rl/core/observation.py +++ b/netforge_rl/core/observation.py @@ -22,7 +22,7 @@ def __init__(self, agent_id: str): # Tracks anomalies like 802.11 Deauths, Fragmented IP packets, etc. self.network_telemetry = {} - + # SIEM Logs self.siem_alerts = [] @@ -43,9 +43,9 @@ def update_from_state(self, global_state: Any, action_effects: List[Any]): for ip in known_ips: if ip in global_state.all_hosts: host = global_state.all_hosts[ip] - + if 'blue' in self.agent_id.lower(): - # Strict POMDP: Blue cannot see physical truth vectors. + # Strict POMDP: Blue cannot see physical truth vectors. # They must rely on SIEM telemetry alone for detection. self.visible_hosts[ip] = { 'state': 'unknown', @@ -55,7 +55,9 @@ def update_from_state(self, global_state: Any, action_effects: List[Any]): else: # Red Team directly monitors nodes they root. self.visible_hosts[ip] = { - 'state': 'compromised' if host.privilege in ['User', 'Root'] else 'clean', + 'state': 'compromised' + if host.privilege in ['User', 'Root'] + else 'clean', 'status': host.status, 'decoy': 'unknown', } @@ -64,7 +66,9 @@ def update_from_state(self, global_state: Any, action_effects: List[Any]): # Pull SIEM logs that have arrived (arrival_tick <= current_tick) if hasattr(global_state, 'siem_log_buffer'): for log in global_state.siem_log_buffer: - if log.get('arrival_tick', 0) <= getattr(global_state, 'current_tick', 0): + if log.get('arrival_tick', 0) <= getattr( + global_state, 'current_tick', 0 + ): self.siem_alerts.append(log) self.network_telemetry['global_alert_level'] = np.random.uniform(0, 1) @@ -96,7 +100,9 @@ def to_numpy(self, max_size: int = 256) -> np.ndarray: idx += 1 if 'active_alerts' in self.network_telemetry and idx < max_size: - vector[idx] = float(min(self.network_telemetry['active_alerts'] / 20.0, 1.0)) + vector[idx] = float( + min(self.network_telemetry['active_alerts'] / 20.0, 1.0) + ) idx += 1 for val in self.objective_vector: diff --git a/netforge_rl/core/physics.py b/netforge_rl/core/physics.py index 29c9b4b..9ba2bb8 100644 --- a/netforge_rl/core/physics.py +++ b/netforge_rl/core/physics.py @@ -6,7 +6,7 @@ class ConflictResolutionEngine: """Strategy pattern engine defining the physical constraints of action collisions. Mathematically resolves simultaneous temporal collisions. """ - + @staticmethod def resolve(effects: Dict[str, ActionEffect]) -> Dict[str, ActionEffect]: """Core physics engine. @@ -39,7 +39,7 @@ def resolve(effects: Dict[str, ActionEffect]) -> Dict[str, ActionEffect]: continue collision_detected = False - + # Check dictionary deltas if isinstance(red_eff.state_deltas, dict): for delta_key in list(red_eff.state_deltas.keys()): @@ -58,7 +58,11 @@ def resolve(effects: Dict[str, ActionEffect]) -> Dict[str, ActionEffect]: if collision_detected: # Nullify Red's attack effect entirely and alert the network telemetry effects[red_id].success = False - effects[red_id].state_deltas = [] if isinstance(red_eff.state_deltas, list) else {} - effects[red_id].observation_data['alert'] = 'TEMPORAL_COLLISION_DEFENSE_SUPREMACY' + effects[red_id].state_deltas = ( + [] if isinstance(red_eff.state_deltas, list) else {} + ) + effects[red_id].observation_data['alert'] = ( + 'TEMPORAL_COLLISION_DEFENSE_SUPREMACY' + ) return effects diff --git a/netforge_rl/core/registry.py b/netforge_rl/core/registry.py index 56dfa06..ded6f4a 100644 --- a/netforge_rl/core/registry.py +++ b/netforge_rl/core/registry.py @@ -3,28 +3,30 @@ class ActionRegistry: - """A Factory Registry for dynamically tracking and instantiating + """A Factory Registry for dynamically tracking and instantiating BaseAction subclasses without monolithic if/else blocks. - + Adheres strictly to the Open-Closed Principle. """ - + def __init__(self): # Maps (team, action_group_id) -> ActionClass self._actions: Dict[str, Dict[int, Type]] = { 'red': {}, 'red_commander': {}, 'blue': {}, - 'blue_commander': {} + 'blue_commander': {}, } - + def register(self, team: str, group_id: int) -> Callable: """Class decorator for registering an Action.""" + def decorator(cls): if team not in self._actions: self._actions[team] = {} self._actions[team][group_id] = cls return cls + return decorator def get_action_class(self, agent_id: str, group_id: int) -> Optional[Type]: @@ -33,19 +35,24 @@ def get_action_class(self, agent_id: str, group_id: int) -> Optional[Type]: team = 'red_commander' if 'commander' in agent_id.lower() else 'red' else: team = 'blue_commander' if 'commander' in agent_id.lower() else 'blue' - + return self._actions.get(team, {}).get(group_id) - def instantiate_action(self, agent_id: str, action_data: object, target_ips: list) -> Optional[object]: + def instantiate_action( + self, agent_id: str, action_data: object, target_ips: list + ) -> Optional[object]: """Factory method to resolve the generic action payload to an instance. - - Supports legacy integer decoding or advanced Hierarchical MultiDiscrete + + Supports legacy integer decoding or advanced Hierarchical MultiDiscrete arrays: [action_type_id, target_ip_index]. """ if not target_ips: target_ips = ['127.0.0.1'] - if isinstance(action_data, (list, tuple)) or type(action_data).__name__ == 'ndarray': + if ( + isinstance(action_data, (list, tuple)) + or type(action_data).__name__ == 'ndarray' + ): # Hierarchical MultiDiscrete format action_type_id = int(action_data[0]) target_index = int(action_data[1]) @@ -55,34 +62,37 @@ def instantiate_action(self, agent_id: str, action_data: object, target_ips: lis action_int = int(action_data) target_ip = target_ips[action_int % len(target_ips)] action_group = action_int // len(target_ips) - + if 'red' in agent_id.lower(): mod = 4 if 'commander' in agent_id.lower() else 11 else: mod = 5 if 'commander' in agent_id.lower() else 7 - + action_type_id = action_group % mod - + ActionCls = self.get_action_class(agent_id, action_type_id) if not ActionCls: return None - + # Pass required kwargs dynamically based on the action archetype # Determine accepted arguments dynamically sig = inspect.signature(ActionCls.__init__) params = sig.parameters - - kwargs = {"agent_id": agent_id} - if "target_ip" in params: - kwargs["target_ip"] = target_ip - elif "target_subnet" in params: + + kwargs = {'agent_id': agent_id} + if 'target_ip' in params: + kwargs['target_ip'] = target_ip + elif 'target_subnet' in params: # Approximate subnet from target_ip for actions requiring Subnets parts = target_ip.split('.') - kwargs["target_subnet"] = f"{parts[0]}.{parts[1]}.{parts[2]}.0/24" - elif "target_agent_id" in params: + kwargs['target_subnet'] = f'{parts[0]}.{parts[1]}.{parts[2]}.0/24' + elif 'target_agent_id' in params: # Map target_agent_id randomly or conventionally for Coordination actions - kwargs["target_agent_id"] = "red_operator" if agent_id == "red_commander" else "red_commander" + kwargs['target_agent_id'] = ( + 'red_operator' if agent_id == 'red_commander' else 'red_commander' + ) return ActionCls(**kwargs) + action_registry = ActionRegistry() diff --git a/netforge_rl/core/state.py b/netforge_rl/core/state.py index 71f61e7..0458575 100644 --- a/netforge_rl/core/state.py +++ b/netforge_rl/core/state.py @@ -15,8 +15,12 @@ def __init__(self, ip: str, hostname: str, subnet_cidr: str): self.services: list = [] # Running services (SSH, SMB, etc.) self.vulnerabilities: list = [] # CVEs present on this host self.is_domain_controller: bool = False # Allows Pass-the-Hash if Rooted - self.human_vulnerability_score: float = 0.5 # Phishability indicator (0.0 to 1.0) - self.contains_honeytokens: bool = False # Triggers 100% confidence active deception traps + self.human_vulnerability_score: float = ( + 0.5 # Phishability indicator (0.0 to 1.0) + ) + self.contains_honeytokens: bool = ( + False # Triggers 100% confidence active deception traps + ) def __repr__(self): return ( @@ -65,7 +69,7 @@ def __init__(self): self.agent_funds: Dict[str, int] = {} self.agent_compute: Dict[str, int] = {} self.business_downtime_score: float = 0.0 - + # Tracks asynchronous execution locks (ETA system) self.agent_locked_until: Dict[str, int] = {} self.action_history: Dict[str, set] = {} @@ -95,8 +99,8 @@ def register_host(self, host: Host): def apply_delta(self, delta_key: Any, delta_value: Any = None): """Dynamically mutates the network graph. - - Now supports standard OOP `IStateDeltaCommand` objects executing their + + Now supports standard OOP `IStateDeltaCommand` objects executing their own state mutations, while retaining legacy string-path parsing for compatibility. """ # Command Pattern Standard Execution @@ -107,6 +111,7 @@ def apply_delta(self, delta_key: Any, delta_value: Any = None): # Legacy String parsing (Deprecation Path) if not isinstance(delta_key, str): from netforge_rl.core.commands import IStateDeltaCommand + if isinstance(delta_key, IStateDeltaCommand): delta_key.execute(self) return diff --git a/netforge_rl/environment/base_env.py b/netforge_rl/environment/base_env.py index 6873e6c..07ca0fc 100644 --- a/netforge_rl/environment/base_env.py +++ b/netforge_rl/environment/base_env.py @@ -43,4 +43,3 @@ def step( Must strictly return: (observations, rewards, terminations, truncations, infos) """ pass - diff --git a/netforge_rl/environment/parallel_env.py b/netforge_rl/environment/parallel_env.py index 8d936cf..d44eb96 100644 --- a/netforge_rl/environment/parallel_env.py +++ b/netforge_rl/environment/parallel_env.py @@ -32,7 +32,9 @@ def __init__(self, scenario_config: dict): if scenario_config else 'ransomware' ) - self.log_latency = scenario_config.get('log_latency', 2) if scenario_config else 2 + self.log_latency = ( + scenario_config.get('log_latency', 2) if scenario_config else 2 + ) self.green_agent = GreenAgent() self.possible_agents = [ 'red_commander', @@ -56,14 +58,22 @@ def __init__(self, scenario_config: dict): # Native Gymnasium Spaces for PettingZoo API + RLlib Mapping self.observation_spaces = { - agent: gym.spaces.Dict({ - "obs": gym.spaces.Box(low=-1.0, high=1.0, shape=(256,), dtype=np.float32), - "action_mask": gym.spaces.Box(low=0, high=1, shape=(62,), dtype=np.int8) # 12 action types + 50 IPs - }) + agent: gym.spaces.Dict( + { + 'obs': gym.spaces.Box( + low=-1.0, high=1.0, shape=(256,), dtype=np.float32 + ), + 'action_mask': gym.spaces.Box( + low=0, high=1, shape=(62,), dtype=np.int8 + ), # 12 action types + 50 IPs + } + ) for agent in self.possible_agents } self.action_spaces = { - agent: gym.spaces.MultiDiscrete([12, 50]) # [Action Type (max 12), Target IP Index (max 50 padded)] + agent: gym.spaces.MultiDiscrete( + [12, 50] + ) # [Action Type (max 12), Target IP Index (max 50 padded)] for agent in self.possible_agents } self.max_ticks = 1000 @@ -80,7 +90,9 @@ def reset( self.global_state = self.network_generator.generate(seed=seed) self.agents = self.possible_agents[:] self.global_state.agent_energy = {agent: 50 for agent in self.agents} - self.global_state.agent_funds = {agent: 10000 if 'blue' in agent else 5000 for agent in self.agents} + self.global_state.agent_funds = { + agent: 10000 if 'blue' in agent else 5000 for agent in self.agents + } self.global_state.agent_compute = {agent: 1000 for agent in self.agents} self.global_state.business_downtime_score = 0.0 observations = {} @@ -88,8 +100,8 @@ def reset( obs = BaseObservation(agent_id) obs.update_from_state(self.global_state, []) observations[agent_id] = { - "obs": obs.to_numpy(max_size=256), - "action_mask": self.action_mask(agent_id) + 'obs': obs.to_numpy(max_size=256), + 'action_mask': self.action_mask(agent_id), } self.current_tick = 0 self.event_queue = [] @@ -116,12 +128,12 @@ def action_mask(self, agent: str) -> np.ndarray: else: valid_action_types = 5 if 'commander' in agent.lower() else 7 mask[:valid_action_types] = 1 - + # 2. Target IP Dimension (12-61) target_ips = sorted(list(self.global_state.all_hosts.keys())) num_targets = min(len(target_ips), 50) - mask[12:12 + num_targets] = 1 - + mask[12 : 12 + num_targets] = 1 + return mask def step( @@ -158,7 +170,9 @@ def step( action = action_int else: target_ips = sorted(list(self.global_state.all_hosts.keys())) - action = action_registry.instantiate_action(agent, action_int, target_ips) + action = action_registry.instantiate_action( + agent, action_int, target_ips + ) if action is None: continue # Invalid action/unmapped action bounds @@ -181,27 +195,37 @@ def step( # Generate intended effect (though state might shift by completion time) effect = action.execute(self.global_state) - + self.global_state.agent_locked_until[agent] = completion_tick - self.event_queue.append({ - 'completion_tick': completion_tick, - 'agent': agent, - 'action': action, - 'effect': effect, - 'target_ip': getattr(action, 'target_ip', None) - }) + self.event_queue.append( + { + 'completion_tick': completion_tick, + 'agent': agent, + 'action': action, + 'effect': effect, + 'target_ip': getattr(action, 'target_ip', None), + } + ) # 2. INTERRUPTION LOGIC (e.g., IsolateHost Immediately Cancels Ongoing Attacks) for event in list(self.event_queue): - if type(event['action']).__name__ == "IsolateHost" and event['completion_tick'] > self.current_tick: + if ( + type(event['action']).__name__ == 'IsolateHost' + and event['completion_tick'] > self.current_tick + ): # Isolate is queued or starting now; interrupt Red target_to_isolate = event['target_ip'] for red_event in list(self.event_queue): - if 'red' in red_event['agent'].lower() and red_event['target_ip'] == target_to_isolate: + if ( + 'red' in red_event['agent'].lower() + and red_event['target_ip'] == target_to_isolate + ): if red_event in self.event_queue: self.event_queue.remove(red_event) # Unlock Red agent since their attack was disrupted - self.global_state.agent_locked_until[red_event['agent']] = self.current_tick + self.global_state.agent_locked_until[red_event['agent']] = ( + self.current_tick + ) # 3. ADVANCE TIME self.current_tick += 1 @@ -209,7 +233,9 @@ def step( self.global_state.subnet_bandwidth.clear() # GENERATE BACKGROUND NOISE & DELAYED ALERTS - noise_data = self.green_agent.generate_noise(self.current_tick, self.global_state) + noise_data = self.green_agent.generate_noise( + self.current_tick, self.global_state + ) for anomaly in noise_data.get('alerts', []): anomaly['arrival_tick'] = self.current_tick + self.log_latency self.global_state.siem_log_buffer.append(anomaly) @@ -231,24 +257,30 @@ def step( for res_agent, res_effect in resolved_effects.items(): if 'red' in res_agent and res_effect.success: target_ip = res_effect.observation_data.get('exploit', 'unknown') - + # Active Deception intercept host = self.global_state.all_hosts.get(target_ip) is_honeytoken_trap = host and host.contains_honeytokens - signature = 'HONEYTOKEN_TRIGGERED' if is_honeytoken_trap else 'RED_ACTION_DETECTED' + signature = ( + 'HONEYTOKEN_TRIGGERED' + if is_honeytoken_trap + else 'RED_ACTION_DETECTED' + ) severity = 10 if is_honeytoken_trap else 5 log_delay = 0 if is_honeytoken_trap else self.log_latency - self.global_state.siem_log_buffer.append({ - 'type': 'anomaly', - 'source': res_agent, - 'target': target_ip, - 'signature': signature, - 'severity': severity, - 'false_positive': False, - 'arrival_tick': self.current_tick + log_delay - }) + self.global_state.siem_log_buffer.append( + { + 'type': 'anomaly', + 'source': res_agent, + 'target': target_ip, + 'signature': signature, + 'severity': severity, + 'false_positive': False, + 'arrival_tick': self.current_tick + log_delay, + } + ) observations = {} rewards = {} @@ -273,14 +305,15 @@ def step( # Normalize the MultiDiscrete action to a float between 0.0 and 1.0 cmd_val = ( (float(cmd_action[0]) / 12.0) - if getattr(cmd_action, '__iter__', False) and not isinstance(cmd_action, BaseAction) + if getattr(cmd_action, '__iter__', False) + and not isinstance(cmd_action, BaseAction) else 1.0 ) obs_array[0] = cmd_val observations[agent] = { - "obs": obs_array, - "action_mask": self.action_mask(agent) + 'obs': obs_array, + 'action_mask': self.action_mask(agent), } # Reward shaping applied here natively factoring in immediate action outcomes agent_effect = resolved_effects.get(agent) diff --git a/netforge_rl/models/recurrent_mask_model.py b/netforge_rl/models/recurrent_mask_model.py index f8ea0f3..5f1b46d 100644 --- a/netforge_rl/models/recurrent_mask_model.py +++ b/netforge_rl/models/recurrent_mask_model.py @@ -11,7 +11,7 @@ class MaskedLSTMModel(TorchRNN, nn.Module): """ A custom PyTorch model integrating native RLlib LSTM cells with strict Action Masking. - We subclass TorchRNN to allow Ray to handle complex `seq_lens` padding and tensor + We subclass TorchRNN to allow Ray to handle complex `seq_lens` padding and tensor BPTT dimension tracking natively. We extract the mask out of the flattened array manually. """ @@ -26,7 +26,9 @@ def __init__( nn.Module.__init__(self) super().__init__(obs_space, action_space, num_outputs, model_config, name) - self.cell_size = model_config.get("custom_model_config", {}).get("lstm_cell_size", 128) + self.cell_size = model_config.get('custom_model_config', {}).get( + 'lstm_cell_size', 128 + ) # 1. Feature Extractor (Dense Layers) # Input size is 256 sliced from the flattened 318 Dict space @@ -50,22 +52,21 @@ def __init__( def forward_rnn( self, inputs: TensorType, state: List[TensorType], seq_lens: TensorType ) -> Tuple[TensorType, List[TensorType]]: - # Ray flatly concatenates spaces in alphanumeric order. - # action_mask Box(62) + # action_mask Box(62) # obs Box(256) # Therefore: action_mask is [:62], obs is [62:] action_mask = inputs[:, :, :62] obs = inputs[:, :, 62:] - + # 1. Core Embeddings over Observation Sequence x = nn.functional.relu(self.fc1(obs)) x = nn.functional.relu(self.fc2(x)) - + # 2. Evaluate Temporal Memory h_in, c_in = state[0].unsqueeze(0), state[1].unsqueeze(0) x, (h_out, c_out) = self.lstm(x, (h_in, c_in)) - + # 3. Finalize Output Logit Distribution Branches logits = self.action_branch(x) self._cur_value = torch.reshape(self.value_branch(x), [-1]) @@ -74,19 +75,21 @@ def forward_rnn( masked_logits = torch.where( action_mask == 0.0, torch.tensor(-1e10, device=logits.device, dtype=logits.dtype), - logits + logits, ) - + return masked_logits, [h_out.squeeze(0), c_out.squeeze(0)] @override(TorchRNN) def value_function(self) -> TensorType: - assert self._cur_value is not None, "Evaluate forward_rnn() before value_function() call." + assert self._cur_value is not None, ( + 'Evaluate forward_rnn() before value_function() call.' + ) return self._cur_value @override(TorchRNN) def get_initial_state(self) -> List[TensorType]: return [ - torch.zeros(self.cell_size, dtype=torch.float32), - torch.zeros(self.cell_size, dtype=torch.float32) + torch.zeros(self.cell_size, dtype=torch.float32), + torch.zeros(self.cell_size, dtype=torch.float32), ] diff --git a/netforge_rl/scenarios/ransomware.py b/netforge_rl/scenarios/ransomware.py index 0a783d1..a16f436 100644 --- a/netforge_rl/scenarios/ransomware.py +++ b/netforge_rl/scenarios/ransomware.py @@ -73,7 +73,10 @@ def _red_reward( reward += 2.0 # OT / SCADA Catastrophic Win - elif 'system_integrity' in delta_key and delta_val == 'kinetic_destruction': + elif ( + 'system_integrity' in delta_key + and delta_val == 'kinetic_destruction' + ): reward += 10000.0 # ── Observation-based rewards ───────────────────────── @@ -140,7 +143,10 @@ def _blue_reward( reward += 1.0 # Proactive defense # Failed to prevent Kinetic Destruction of SCADA framework - elif 'system_integrity' in delta_key and delta_val == 'kinetic_destruction': + elif ( + 'system_integrity' in delta_key + and delta_val == 'kinetic_destruction' + ): reward -= 10000.0 # ── Per-step network health (small, bounded) ────────── @@ -165,7 +171,8 @@ def _blue_reward( # ── Business Downtime Penalty (Economics) ───────────── total_isolated = sum( - 1 for h in global_state.all_hosts.values() + 1 + for h in global_state.all_hosts.values() if h.status == 'isolated' and h.subnet_cidr != '169.254.0.0/16' ) if total_isolated > 0: @@ -196,7 +203,10 @@ def check_termination(self, global_state: 'GlobalNetworkState') -> Dict[str, boo return {agent: True for agent in self.agents} # Terminal Fail-State for Blue: A PLC has been physically destroyed - if any(getattr(h, 'system_integrity', 'clean') == 'kinetic_destruction' for h in global_state.all_hosts.values()): + if any( + getattr(h, 'system_integrity', 'clean') == 'kinetic_destruction' + for h in global_state.all_hosts.values() + ): return {agent: True for agent in self.agents} return terminate diff --git a/netforge_rl/topologies/network_generator.py b/netforge_rl/topologies/network_generator.py index 34e8300..03f3010 100644 --- a/netforge_rl/topologies/network_generator.py +++ b/netforge_rl/topologies/network_generator.py @@ -32,12 +32,12 @@ def generate(self, seed: Optional[int] = None) -> GlobalNetworkState: def _generate_procedural(self) -> GlobalNetworkState: """Creates a randomized network using NetworkX hierarchical patterns. - + Enforces a constant size of 50 hosts for Neural Network dimension consistency. Active topology spans 15-30 nodes; the rest are instantiated as inactive padding. """ import networkx as nx - + state = GlobalNetworkState() G = nx.DiGraph() @@ -61,17 +61,21 @@ def _generate_procedural(self) -> GlobalNetworkState: state.add_subnet(subnet) # Weight more hosts into Corp and Secure zones - num_hosts = random.randint(3, 8) if name in ['Corporate', 'Secure'] else random.randint(2, 5) - + num_hosts = ( + random.randint(3, 8) + if name in ['Corporate', 'Secure'] + else random.randint(2, 5) + ) + for j in range(1, num_hosts + 1): host_ip = f'{base_ips[i]}.{j * random.randint(1, 3)}' - + # Check for duplicates due to random gap intervals while host_ip in [h.ip for h in active_hosts]: host_ip = f'{base_ips[i]}.{j * random.randint(1, 10)}' host = Host(ip=host_ip, hostname=f'{name}_Node_{j}', subnet_cidr=cidr) - + # Assign Decoys vs Real Systems if random.random() < 0.15 and name != 'OT_Subnet': host.decoy = random.choice(['Apache', 'SSHD', 'Tomcat', 'active']) @@ -84,24 +88,42 @@ def _generate_procedural(self) -> GlobalNetworkState: setattr(host, 'pressure', float(random.randint(90, 110))) else: profiles = [ - ('Windows_Server_2016', ['SMB', 'IIS'], ['MS17-010', 'CVE-2021-44228']), - ('Windows_10', ['RDP', 'SMB'], ['CVE-2019-0708', 'MS17-010']), - ('Linux_Ubuntu', ['SSH', 'Apache'], ['CVE-2021-44228', 'V4L2']), + ( + 'Windows_Server_2016', + ['SMB', 'IIS'], + ['MS17-010', 'CVE-2021-44228'], + ), + ( + 'Windows_10', + ['RDP', 'SMB'], + ['CVE-2019-0708', 'MS17-010'], + ), + ( + 'Linux_Ubuntu', + ['SSH', 'Apache'], + ['CVE-2021-44228', 'V4L2'], + ), ('Linux_CentOS', ['SSH', 'Tomcat'], ['CVE-2021-44228']), ] - chosen_os, chosen_services, potential_cves = random.choice(profiles) - + chosen_os, chosen_services, potential_cves = random.choice( + profiles + ) + host.os = chosen_os host.services = chosen_services host.cvss_score = round(random.uniform(3.5, 9.8), 1) - + # Human error dynamics: Linux admins fall for phishing less often than generalized Windows Corporate users - base_phish = random.uniform(0.1, 0.4) if 'Linux' in chosen_os else random.uniform(0.3, 0.9) + base_phish = ( + random.uniform(0.1, 0.4) + if 'Linux' in chosen_os + else random.uniform(0.3, 0.9) + ) host.human_vulnerability_score = round(base_phish, 2) - + num_vulns = random.randint(0, min(2, len(potential_cves))) host.vulnerabilities = random.sample(potential_cves, num_vulns) - + # Designate Domain Controllers only in Corp or Secure Windows servers if 'Windows' in chosen_os and name in ['Corporate', 'Secure']: if random.random() < 0.3: @@ -110,7 +132,7 @@ def _generate_procedural(self) -> GlobalNetworkState: active_hosts.append(host) state.register_host(host) G.add_node(host.ip, type=name) - + # Assure at least 1 Domain Controller exists if domain_controllers: random.choice(domain_controllers).is_domain_controller = True @@ -119,13 +141,15 @@ def _generate_procedural(self) -> GlobalNetworkState: win_hosts = [h for h in active_hosts if 'Windows' in h.os] if win_hosts: random.choice(win_hosts).is_domain_controller = True - + # Fill strictly to 50 nodes for Neural Network shape constant padding_needed = 50 - len(state.all_hosts) for p in range(padding_needed): - pad_ip = f'169.254.0.{p+1}' - pad_host = Host(ip=pad_ip, hostname=f'Pad_Node_{p}', subnet_cidr='169.254.0.0/16') - pad_host.status = 'isolated' # Native Action Masking bounds + pad_ip = f'169.254.0.{p + 1}' + pad_host = Host( + ip=pad_ip, hostname=f'Pad_Node_{p}', subnet_cidr='169.254.0.0/16' + ) + pad_host.status = 'isolated' # Native Action Masking bounds state.register_host(pad_host) self._configure_procedural_vision(state) @@ -139,7 +163,7 @@ def _configure_procedural_vision(self, state: GlobalNetworkState): state.update_knowledge('red_commander', host.ip) state.update_knowledge('red_operator', host.ip) break - + # Blue knows all active topology natively but is blind to zero-padded isolated objects for host in state.all_hosts.values(): if host.status != 'isolated':