diff --git a/components/site-workflows/sensors/sensor-ironic-node-update.yaml b/components/site-workflows/sensors/sensor-ironic-node-update.yaml index 5dd9a5b0a..94caf15d8 100644 --- a/components/site-workflows/sensors/sensor-ironic-node-update.yaml +++ b/components/site-workflows/sensors/sensor-ironic-node-update.yaml @@ -8,7 +8,6 @@ metadata: workflows.argoproj.io/description: |+ Triggers on the following Ironic Events: - - baremetal.node.create.end which happens when a baremetal node is created - baremetal.node.update.end which happens when node fields are updated - baremetal.node.delete.end which happens when a node is deleted @@ -37,7 +36,6 @@ spec: - path: "body.event_type" type: "string" value: - - "baremetal.node.create.end" - "baremetal.node.update.end" - "baremetal.node.delete.end" template: diff --git a/python/understack-workflows/tests/test_nautobot_device_sync.py b/python/understack-workflows/tests/test_nautobot_device_sync.py index a210547e2..deaaf7109 100644 --- a/python/understack-workflows/tests/test_nautobot_device_sync.py +++ b/python/understack-workflows/tests/test_nautobot_device_sync.py @@ -267,6 +267,25 @@ def test_set_location_no_switch_info(self, device_info, mock_nautobot): assert device_info.location_id is None assert device_info.rack_id is None + def test_set_location_switch_info_is_string_none(self, device_info, mock_nautobot): + """Test that literal string 'None' in switch_info is skipped.""" + ports = [ + MagicMock( + local_link_connection={ + "switch_info": "None", + "switch_id": "00:00:00:00:00:00", + "port_id": "None", + } + ) + ] + + _set_location_from_switches(device_info, ports, mock_nautobot) + + # Should not make any API calls + mock_nautobot.dcim.devices.get.assert_not_called() + assert device_info.location_id is None + assert device_info.rack_id is None + def test_set_location_switch_not_found(self, device_info, mock_nautobot): ports = [ MagicMock( @@ -405,6 +424,54 @@ def test_no_changes(self, mock_nautobot_device): assert result is False mock_nautobot_device.save.assert_not_called() + def test_update_position_and_face(self, mock_nautobot_device): + """Test that position and face are updated when preserved from old device.""" + mock_nautobot_device.position = None + mock_nautobot_device.face = None + device_info = DeviceInfo( + uuid="test-uuid", + position=42, + face="front", + ) + + result = _update_nautobot_device(device_info, mock_nautobot_device) + + assert result is True + assert mock_nautobot_device.position == 42 + assert mock_nautobot_device.face == "front" + mock_nautobot_device.save.assert_called_once() + + def test_update_position_defaults_face_to_front(self, mock_nautobot_device): + """Test that face defaults to 'front' when position is set but face is not.""" + mock_nautobot_device.position = None + mock_nautobot_device.face = None + device_info = DeviceInfo( + uuid="test-uuid", + position=10, + # face is None + ) + + result = _update_nautobot_device(device_info, mock_nautobot_device) + + assert result is True + assert mock_nautobot_device.position == 10 + assert mock_nautobot_device.face == "front" + + def test_update_position_no_change_when_same(self, mock_nautobot_device): + """Test that no update occurs when position/face are already set correctly.""" + mock_nautobot_device.position = 42 + mock_nautobot_device.face = MagicMock(value="front") + device_info = DeviceInfo( + uuid="test-uuid", + position=42, + face="front", + ) + + result = _update_nautobot_device(device_info, mock_nautobot_device) + + assert result is False + mock_nautobot_device.save.assert_not_called() + class TestExtractNodeUuidFromEvent: """Test cases for _extract_node_uuid_from_event function.""" @@ -443,7 +510,7 @@ def mock_nautobot(self): return MagicMock() @patch("understack_workflows.oslo_event.nautobot_device_sync.IronicClient") - @patch("understack_workflows.oslo_event.nautobot_device_sync.fetch_device_info") + @patch("understack_workflows.oslo_event.nautobot_device_sync.fetch_node_details") @patch( "understack_workflows.oslo_event.nautobot_device_sync.sync_interfaces_from_data" ) @@ -470,7 +537,7 @@ def test_sync_creates_new_device( mock_nautobot.dcim.devices.create.assert_called_once() @patch("understack_workflows.oslo_event.nautobot_device_sync.IronicClient") - @patch("understack_workflows.oslo_event.nautobot_device_sync.fetch_device_info") + @patch("understack_workflows.oslo_event.nautobot_device_sync.fetch_node_details") @patch( "understack_workflows.oslo_event.nautobot_device_sync.sync_interfaces_from_data" ) @@ -507,10 +574,11 @@ def test_sync_with_empty_uuid_returns_error(self, mock_nautobot): assert result == EXIT_STATUS_FAILURE @patch("understack_workflows.oslo_event.nautobot_device_sync.IronicClient") - @patch("understack_workflows.oslo_event.nautobot_device_sync.fetch_device_info") - def test_sync_without_location_returns_error( + @patch("understack_workflows.oslo_event.nautobot_device_sync.fetch_node_details") + def test_sync_without_location_skips_for_uninspected_node( self, mock_fetch, mock_ironic_class, mock_nautobot ): + """Test that sync skips gracefully for uninspected nodes without location.""" node_uuid = str(uuid.uuid4()) device_info = DeviceInfo(uuid=node_uuid) # No location mock_fetch.return_value = (device_info, {}, []) @@ -518,18 +586,22 @@ def test_sync_without_location_returns_error( result = sync_device_to_nautobot(node_uuid, mock_nautobot) + # Should fail since no location available assert result == EXIT_STATUS_FAILURE + # Should not attempt to create device + mock_nautobot.dcim.devices.create.assert_not_called() @patch("understack_workflows.oslo_event.nautobot_device_sync.IronicClient") - @patch("understack_workflows.oslo_event.nautobot_device_sync.fetch_device_info") + @patch("understack_workflows.oslo_event.nautobot_device_sync.fetch_node_details") @patch( "understack_workflows.oslo_event.nautobot_device_sync.sync_interfaces_from_data" ) - def test_sync_finds_device_by_name_with_matching_uuid( + def test_sync_recreates_device_with_mismatched_uuid( self, mock_sync_interfaces, mock_fetch, mock_ironic_class, mock_nautobot ): - """Test that device found by name with matching UUID is updated.""" + """Test device with mismatched UUID is deleted and recreated.""" node_uuid = str(uuid.uuid4()) + old_uuid = str(uuid.uuid4()) # Different UUID device_info = DeviceInfo( uuid=node_uuid, name="Dell-ABC123", @@ -541,39 +613,34 @@ def test_sync_finds_device_by_name_with_matching_uuid( mock_fetch.return_value = (device_info, {}, []) # First get by ID returns None - # Second get by name returns device with same UUID + # Second get by name returns device with different UUID existing_device = MagicMock() - existing_device.id = node_uuid # Same UUID + existing_device.id = old_uuid # Different UUID existing_device.status = MagicMock(name="Planned") existing_device.name = "Dell-ABC123" - existing_device.serial = None - existing_device.location = None - existing_device.rack = None - existing_device.tenant = None - existing_device.custom_fields = {} mock_nautobot.dcim.devices.get.side_effect = [None, existing_device] + mock_nautobot.dcim.devices.create.return_value = MagicMock() mock_sync_interfaces.return_value = EXIT_STATUS_SUCCESS result = sync_device_to_nautobot(node_uuid, mock_nautobot) assert result == EXIT_STATUS_SUCCESS - # Should NOT delete since UUIDs match - existing_device.delete.assert_not_called() - # Should NOT create new device - mock_nautobot.dcim.devices.create.assert_not_called() + # Should delete old device + existing_device.delete.assert_called_once() + # Should create new device with correct UUID + mock_nautobot.dcim.devices.create.assert_called_once() @patch("understack_workflows.oslo_event.nautobot_device_sync.IronicClient") - @patch("understack_workflows.oslo_event.nautobot_device_sync.fetch_device_info") + @patch("understack_workflows.oslo_event.nautobot_device_sync.fetch_node_details") @patch( "understack_workflows.oslo_event.nautobot_device_sync.sync_interfaces_from_data" ) - def test_sync_recreates_device_with_mismatched_uuid( + def test_sync_device_not_found_by_name_creates_new( self, mock_sync_interfaces, mock_fetch, mock_ironic_class, mock_nautobot ): - """Test device with mismatched UUID is deleted and recreated.""" + """Test that device not found by UUID or name is created.""" node_uuid = str(uuid.uuid4()) - old_uuid = str(uuid.uuid4()) # Different UUID device_info = DeviceInfo( uuid=node_uuid, name="Dell-ABC123", @@ -584,53 +651,60 @@ def test_sync_recreates_device_with_mismatched_uuid( ) mock_fetch.return_value = (device_info, {}, []) - # First get by ID returns None - # Second get by name returns device with different UUID - existing_device = MagicMock() - existing_device.id = old_uuid # Different UUID - existing_device.status = MagicMock(name="Planned") - existing_device.name = "Dell-ABC123" - - mock_nautobot.dcim.devices.get.side_effect = [None, existing_device] + # Both lookups return None + mock_nautobot.dcim.devices.get.side_effect = [None, None] mock_nautobot.dcim.devices.create.return_value = MagicMock() mock_sync_interfaces.return_value = EXIT_STATUS_SUCCESS result = sync_device_to_nautobot(node_uuid, mock_nautobot) assert result == EXIT_STATUS_SUCCESS - # Should delete old device - existing_device.delete.assert_called_once() - # Should create new device with correct UUID mock_nautobot.dcim.devices.create.assert_called_once() @patch("understack_workflows.oslo_event.nautobot_device_sync.IronicClient") - @patch("understack_workflows.oslo_event.nautobot_device_sync.fetch_device_info") + @patch("understack_workflows.oslo_event.nautobot_device_sync.fetch_node_details") @patch( "understack_workflows.oslo_event.nautobot_device_sync.sync_interfaces_from_data" ) - def test_sync_device_not_found_by_name_creates_new( + def test_sync_uuid_mismatch_uses_old_device_location( self, mock_sync_interfaces, mock_fetch, mock_ironic_class, mock_nautobot ): - """Test that device not found by UUID or name is created.""" + """Test that location is preserved from old device when new node has none. + + When re-enrolling a node that hasn't been inspected yet, we should use + the location from the old Nautobot device to create the new one. + """ node_uuid = str(uuid.uuid4()) + old_uuid = str(uuid.uuid4()) device_info = DeviceInfo( uuid=node_uuid, name="Dell-ABC123", manufacturer="Dell", model="PowerEdge R640", - location_id="location-uuid", + # No location_id from switch lookup status="Active", ) mock_fetch.return_value = (device_info, {}, []) - # Both lookups return None - mock_nautobot.dcim.devices.get.side_effect = [None, None] + # Old device has location + existing_device = MagicMock() + existing_device.id = old_uuid + existing_device.name = "Dell-ABC123" + existing_device.location = MagicMock(id="old-location-uuid") + existing_device.rack = MagicMock(id="old-rack-uuid") + existing_device.position = 42 + existing_device.face = MagicMock(value="front") + + mock_nautobot.dcim.devices.get.side_effect = [None, existing_device] mock_nautobot.dcim.devices.create.return_value = MagicMock() mock_sync_interfaces.return_value = EXIT_STATUS_SUCCESS result = sync_device_to_nautobot(node_uuid, mock_nautobot) assert result == EXIT_STATUS_SUCCESS + # Should delete old device after preserving location + existing_device.delete.assert_called_once() + # Should create new device mock_nautobot.dcim.devices.create.assert_called_once() diff --git a/python/understack-workflows/understack_workflows/main/openstack_oslo_event.py b/python/understack-workflows/understack_workflows/main/openstack_oslo_event.py index ef7dd90e6..2ef7946ea 100644 --- a/python/understack-workflows/understack_workflows/main/openstack_oslo_event.py +++ b/python/understack-workflows/understack_workflows/main/openstack_oslo_event.py @@ -72,7 +72,6 @@ class NoEventHandlerError(Exception): "baremetal.portgroup.create.end": ironic_portgroup.handle_portgroup_create_update, "baremetal.portgroup.update.end": ironic_portgroup.handle_portgroup_create_update, "baremetal.portgroup.delete.end": ironic_portgroup.handle_portgroup_delete, - "baremetal.node.create.end": nautobot_device_sync.handle_node_event, "baremetal.node.update.end": nautobot_device_sync.handle_node_event, "baremetal.node.delete.end": nautobot_device_sync.handle_node_delete_event, "baremetal.node.provision_set.end": [ diff --git a/python/understack-workflows/understack_workflows/oslo_event/nautobot_device_interface_sync.py b/python/understack-workflows/understack_workflows/oslo_event/nautobot_device_interface_sync.py index 6f2738d54..92b641b45 100644 --- a/python/understack-workflows/understack_workflows/oslo_event/nautobot_device_interface_sync.py +++ b/python/understack-workflows/understack_workflows/oslo_event/nautobot_device_interface_sync.py @@ -483,7 +483,13 @@ def _handle_cable_management( nautobot_client: Nautobot, ) -> None: """Handle cable creation/update for interface with switch connection info.""" - if not interface.switch_info or not interface.switch_port_id: + # Skip if switch info is missing or placeholder "None" string + if ( + not interface.switch_info + or not interface.switch_port_id + or interface.switch_info == "None" + or interface.switch_port_id == "None" + ): return logger.debug( diff --git a/python/understack-workflows/understack_workflows/oslo_event/nautobot_device_sync.py b/python/understack-workflows/understack_workflows/oslo_event/nautobot_device_sync.py index 5864e4b3d..87041c3f2 100644 --- a/python/understack-workflows/understack_workflows/oslo_event/nautobot_device_sync.py +++ b/python/understack-workflows/understack_workflows/oslo_event/nautobot_device_sync.py @@ -14,6 +14,7 @@ from typing import Any from uuid import UUID +from ironicclient.common.apiclient import exceptions as ironic_exceptions from openstack.connection import Connection from pynautobot.core.api import Api as Nautobot @@ -60,6 +61,8 @@ class DeviceInfo: # Location location_id: str | None = None rack_id: str | None = None + position: int | None = None + face: str | None = None # Status status: str | None = None @@ -73,12 +76,6 @@ class DeviceInfo: custom_fields: dict[str, str] = field(default_factory=dict) -class RackLocationError(Exception): - """Raised when node rack location cannot be determined.""" - - pass - - def _normalise_manufacturer(name: str) -> str: """Return a standard name for Manufacturer.""" if "DELL" in name.upper(): @@ -176,7 +173,8 @@ def _set_location_from_switches( llc = port.local_link_connection or {} switch_info = llc.get("switch_info") - if not switch_info: + # Skip if switch_info is missing, empty, or placeholder "None" string + if not switch_info or switch_info == "None": continue # Find switch in Nautobot by name @@ -212,7 +210,7 @@ def _set_location_from_switches( ) -def fetch_device_info( +def fetch_node_details( node_uuid: str, ironic_client: IronicClient, nautobot_client: Nautobot, @@ -230,7 +228,14 @@ def fetch_device_info( device_info = DeviceInfo(uuid=node_uuid) node = ironic_client.get_node(node_uuid) - inventory = ironic_client.get_node_inventory(node_ident=node_uuid) + + # Inventory may not exist yet for newly created nodes (pre-inspection) + try: + inventory = ironic_client.get_node_inventory(node_ident=node_uuid) + except ironic_exceptions.NotFound: + logger.info("No inventory yet for node %s (not inspected)", node_uuid) + inventory = {} + ports = ironic_client.list_ports(node_id=node_uuid) # Populate in order @@ -246,6 +251,7 @@ def _create_nautobot_device(device_info: DeviceInfo, nautobot_client: Nautobot): """Create a new device in Nautobot with minimal required fields. Returns the created device object for subsequent updates. + Rack, position, and face are handled by _update_nautobot_device. """ if not device_info.location_id: raise ValueError(f"Cannot create device {device_info.uuid} without location") @@ -342,6 +348,20 @@ def _update_nautobot_device( updated = True logger.debug("Updating rack: %s", device_info.rack_id) + # Position (preserved from old device on recreate) + if device_info.position is not None: + if nautobot_device.position != device_info.position: + nautobot_device.position = device_info.position + updated = True + logger.debug("Updating position: %s", device_info.position) + # Face is required when position is set + target_face = device_info.face or "front" + current_face = _get_record_value(nautobot_device.face, "value") + if current_face != target_face: + nautobot_device.face = target_face + updated = True + logger.debug("Updating face: %s", target_face) + # Tenant (Record with .id attribute, from Ironic lessee) if device_info.tenant_id: current_tenant = _get_record_value(nautobot_device.tenant, "id") @@ -377,6 +397,93 @@ def _update_nautobot_device( return updated +def _preserve_location_from_device(device_info: DeviceInfo, nautobot_device) -> None: + """Preserve location, rack, position and face from existing Nautobot device.""" + if nautobot_device.location: + old_location_id = _get_record_value(nautobot_device.location, "id") + if old_location_id and not device_info.location_id: + device_info.location_id = old_location_id + logger.info("Preserving location %s from old device", old_location_id) + + if nautobot_device.rack: + old_rack_id = _get_record_value(nautobot_device.rack, "id") + if old_rack_id and not device_info.rack_id: + device_info.rack_id = old_rack_id + logger.info("Preserving rack %s from old device", old_rack_id) + + if nautobot_device.position is not None: + device_info.position = nautobot_device.position + logger.info("Preserving position %s from old device", nautobot_device.position) + + if nautobot_device.face: + device_info.face = _get_record_value(nautobot_device.face, "value") + logger.info("Preserving face %s from old device", device_info.face) + + +class DeviceNotReadyError(Exception): + """Raised when device cannot be synced yet (e.g., awaiting inspection).""" + + pass + + +def _delete_old_device_by_name(ironic_node_info: DeviceInfo, nautobot_client: Nautobot): + """Handle re-enrollment scenario where device exists with different UUID. + + When a device is found by name but has a different UUID (re-enrollment), + preserves location data and deletes the old device. + """ + if not ironic_node_info.name: + return + + nautobot_device = nautobot_client.dcim.devices.get(name=ironic_node_info.name) + if not nautobot_device or isinstance(nautobot_device, list): + return + + # UUID mismatch - need to recreate + # Preserve location, rack, position, face from old device + _preserve_location_from_device(ironic_node_info, nautobot_device) + + # Now safe to delete old device (location preserved from old device) + logger.warning( + "Device %s has mismatched UUID (Nautobot: %s, Ironic: %s), deleting old device", + ironic_node_info.name, + nautobot_device.id, + ironic_node_info.uuid, + ) + nautobot_device.delete() + + +def _find_or_create_nautobot_device( + ironic_node_info: DeviceInfo, nautobot_client: Nautobot +): + """Find existing device in Nautobot or create a new one. + + Handles UUID mismatches during re-enrollment by preserving location + data from the old device before deleting it. + + Returns the existing or newly created device. + + Raises: + DeviceNotReadyError: If device doesn't exist and can't be created yet + """ + # First try by UUID + nautobot_device = nautobot_client.dcim.devices.get(id=ironic_node_info.uuid) + if nautobot_device: + return nautobot_device + + # Handle re-enrollment: delete old device by name if exists with different UUID + _delete_old_device_by_name(ironic_node_info, nautobot_client) + + # No existing device - check if we have enough data to create one + if not ironic_node_info.location_id: + raise DeviceNotReadyError( + f"No location yet for node {ironic_node_info.uuid} (awaiting inspection)" + ) + + # Create new device + return _create_nautobot_device(ironic_node_info, nautobot_client) + + def sync_device_to_nautobot( node_uuid: str, nautobot_client: Nautobot, @@ -406,50 +513,16 @@ def sync_device_to_nautobot( try: ironic_client = IronicClient() - # Fetch all device info from Ironic (returns inventory and ports too) - device_info, inventory, ports = fetch_device_info( + ironic_node_info, inventory, ports = fetch_node_details( node_uuid, ironic_client, nautobot_client ) - # Check if device exists in Nautobot - nautobot_device = nautobot_client.dcim.devices.get(id=device_info.uuid) + nautobot_device = _find_or_create_nautobot_device( + ironic_node_info, nautobot_client + ) + + _update_nautobot_device(ironic_node_info, nautobot_device) - if not nautobot_device: - # Try finding by name (handles re-enrollment scenarios) - if device_info.name: - nautobot_device = nautobot_client.dcim.devices.get( - name=device_info.name - ) - if nautobot_device and not isinstance(nautobot_device, list): - logger.info( - "Found existing device by name %s with ID %s, " - "will recreate with UUID %s", - device_info.name, - nautobot_device.id, - device_info.uuid, - ) - if str(nautobot_device.id) != device_info.uuid: - logger.warning( - "Device %s has mismatched UUID (Nautobot: %s, Ironic: %s), " - "recreating", - device_info.name, - nautobot_device.id, - device_info.uuid, - ) - nautobot_device.delete() - nautobot_device = None # Will trigger creation below - - if not nautobot_device: - # Create new device with minimal fields - if not device_info.location_id: - logger.error("Cannot create device %s: no location found", node_uuid) - return EXIT_STATUS_FAILURE - nautobot_device = _create_nautobot_device(device_info, nautobot_client) - - # Update device with all fields (works for both new and existing) - _update_nautobot_device(device_info, nautobot_device) - - # Sync interfaces using already-fetched inventory and ports if sync_interfaces: interface_result = sync_interfaces_from_data( node_uuid, inventory, ports, nautobot_client @@ -459,11 +532,13 @@ def sync_device_to_nautobot( "Interface sync failed for node %s, device sync succeeded", node_uuid, ) - # Don't fail the whole operation if interface sync fails - # Device is already synced successfully return EXIT_STATUS_SUCCESS + except DeviceNotReadyError as e: + logger.info(str(e)) + return EXIT_STATUS_FAILURE + except Exception: logger.exception("Failed to sync device %s to Nautobot", node_uuid) return EXIT_STATUS_FAILURE @@ -503,7 +578,6 @@ def handle_node_event( This is a generic handler that works with: - baremetal.node.provision_set.end - - baremetal.node.create.end - baremetal.node.update.end - baremetal.node.power_set.end - baremetal.node.power_state_corrected.success