diff --git a/pkg/apis/types.go b/pkg/apis/types.go index 5f172d32..b2e50466 100644 --- a/pkg/apis/types.go +++ b/pkg/apis/types.go @@ -26,6 +26,9 @@ type NetworkConfig struct { // Routes defines static routes to be configured for this interface. Routes []RouteConfig `json:"routes,omitempty"` + // Rules defines routing rules to be configured for this interface. + Rules []RuleConfig `json:"rules,omitempty"` + // Neighbors defines permanent neighbor (ARP/NDP) entries to be added for this interface. Neighbors []NeighborConfig `json:"neighbors,omitempty"` @@ -86,6 +89,20 @@ type RouteConfig struct { // Scope is the scope of the route (e.g., link, host, global). // Refers to Linux route scopes (e.g., 0 for RT_SCOPE_UNIVERSE, 253 for RT_SCOPE_LINK). Scope uint8 `json:"scope,omitempty"` + // Table is the routing table to use for the route. + Table int `json:"table,omitempty"` +} + +// RuleConfig represents a network rule configuration. +type RuleConfig struct { + // Priority is the priority of the rule. + Priority int `json:"priority,omitempty"` + // Source is the source IP address for the rule. + Source string `json:"source,omitempty"` + // Destination is the destination IP address for the rule. + Destination string `json:"destination,omitempty"` + // Table is the routing table to use for the rule. + Table int `json:"table,omitempty"` } // NeighborConfig represents a neighbor (ARP/NDP) entry. diff --git a/pkg/apis/validation.go b/pkg/apis/validation.go index 3d5865bc..55f8a9f4 100644 --- a/pkg/apis/validation.go +++ b/pkg/apis/validation.go @@ -67,6 +67,11 @@ func ValidateConfig(raw *runtime.RawExtension) (*NetworkConfig, []error) { allErrors = append(allErrors, validateRoutes(config.Routes, "routes")...) } + // Validate Rules + if len(config.Rules) > 0 { + allErrors = append(allErrors, validateRules(config.Rules, "rules")...) + } + // Validate EthtoolConfig if present if config.Ethtool != nil { allErrors = append(allErrors, validateEthtoolConfig(config.Ethtool, "ethtool")...) @@ -204,6 +209,38 @@ func validateRoutes(routes []RouteConfig, fieldPath string) (allErrors []error) allErrors = append(allErrors, fmt.Errorf("%s.source: invalid IP address format '%s'", currentFieldPath, route.Source)) } } + + if route.Table < 0 { + allErrors = append(allErrors, fmt.Errorf("%s.table: must be a non-negative integer, got %d", currentFieldPath, route.Table)) + } + } + return allErrors +} + +// validateRules validates a slice of RuleConfig. +func validateRules(rules []RuleConfig, fieldPath string) (allErrors []error) { + for i, rule := range rules { + currentFieldPath := fmt.Sprintf("%s[%d]", fieldPath, i) + + if rule.Priority < 0 || rule.Priority > 32767 { + allErrors = append(allErrors, fmt.Errorf("%s.priority: must be an integer between 0 and 32767, got %d", currentFieldPath, rule.Priority)) + } + + if rule.Table < 0 { + allErrors = append(allErrors, fmt.Errorf("%s.table: must be a non-negative integer, got %d", currentFieldPath, rule.Table)) + } + + if rule.Source != "" { + if _, _, err := net.ParseCIDR(rule.Source); err != nil { + allErrors = append(allErrors, fmt.Errorf("%s.source: invalid CIDR format '%s'", currentFieldPath, rule.Source)) + } + } + + if rule.Destination != "" { + if _, _, err := net.ParseCIDR(rule.Destination); err != nil { + allErrors = append(allErrors, fmt.Errorf("%s.destination: invalid CIDR format '%s'", currentFieldPath, rule.Destination)) + } + } } return allErrors } diff --git a/pkg/apis/validation_test.go b/pkg/apis/validation_test.go index 327ded8d..dd4db8c1 100644 --- a/pkg/apis/validation_test.go +++ b/pkg/apis/validation_test.go @@ -48,10 +48,14 @@ func TestValidateConfig(t *testing.T) { Routes: []RouteConfig{ {Destination: "0.0.0.0/0", Gateway: "192.168.1.254", Scope: unix.RT_SCOPE_UNIVERSE}, }, + Rules: []RuleConfig{ + {Source: "10.0.0.0/8", Table: 100}, + }, Ethtool: &EthtoolConfig{Features: map[string]bool{"tso": true}}, } invalidInterfaceConf := NetworkConfig{Interface: InterfaceConfig{Name: "eth/0"}} invalidRouteConf := NetworkConfig{Interface: InterfaceConfig{Name: "eth0"}, Routes: []RouteConfig{{Destination: "invalid-cidr"}}} + invalidRuleConf := NetworkConfig{Interface: InterfaceConfig{Name: "eth0"}, Rules: []RuleConfig{{Source: "invalid-cidr"}}} tests := []struct { name string @@ -106,6 +110,13 @@ func TestValidateConfig(t *testing.T) { expectedCfg: &invalidRouteConf, errContains: []string{"routes[0].destination: invalid IP or CIDR format 'invalid-cidr'"}, }, + { + name: "config with rule validation error", + raw: newRawExtension(t, invalidRuleConf), + expectErr: true, + expectedCfg: &invalidRuleConf, + errContains: []string{"rules[0].source: invalid CIDR format 'invalid-cidr'"}, + }, } for _, tt := range tests { @@ -323,6 +334,19 @@ func TestValidateRoutes(t *testing.T) { fieldPath: "routes", expectErr: false, }, + { + name: "valid route with table", + routes: []RouteConfig{{Destination: "10.10.10.0/24", Gateway: "192.168.1.1", Table: 100}}, + fieldPath: "routes", + expectErr: false, + }, + { + name: "invalid route with negative table", + routes: []RouteConfig{{Destination: "10.10.10.0/24", Gateway: "192.168.1.1", Table: -1}}, + fieldPath: "routes", + expectErr: true, + errCount: 1, + }, { name: "empty destination", routes: []RouteConfig{{Gateway: "192.168.1.1"}}, @@ -387,6 +411,89 @@ func TestValidateRoutes(t *testing.T) { } } +func TestValidateRules(t *testing.T) { + tests := []struct { + name string + rules []RuleConfig + fieldPath string + expectErr bool + errCount int + }{ + { + name: "valid rule", + rules: []RuleConfig{{Source: "10.0.0.0/8", Table: 100, Priority: 10}}, + fieldPath: "rules", + expectErr: false, + }, + { + name: "valid rule - priority at min", + rules: []RuleConfig{{Priority: 0, Table: 100}}, + fieldPath: "rules", + expectErr: false, + }, + { + name: "valid rule - priority at max", + rules: []RuleConfig{{Priority: 32767, Table: 100}}, + fieldPath: "rules", + expectErr: false, + }, + { + name: "invalid priority - too high", + rules: []RuleConfig{{Priority: 32768}}, + fieldPath: "rules", + expectErr: true, + errCount: 1, + }, + { + name: "invalid priority - negative", + rules: []RuleConfig{{Priority: -1}}, + fieldPath: "rules", + expectErr: true, + errCount: 1, + }, + { + name: "invalid table", + rules: []RuleConfig{{Table: -1}}, + fieldPath: "rules", + expectErr: true, + errCount: 1, + }, + { + name: "invalid source CIDR", + rules: []RuleConfig{{Source: "invalid-cidr"}}, + fieldPath: "rules", + expectErr: true, + errCount: 1, + }, + { + name: "invalid destination CIDR", + rules: []RuleConfig{{Destination: "invalid-cidr"}}, + fieldPath: "rules", + expectErr: true, + errCount: 1, + }, + { + name: "multiple errors", + rules: []RuleConfig{{Priority: -1, Table: -1, Source: "invalid", Destination: "invalid"}}, + fieldPath: "rules", + expectErr: true, + errCount: 4, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + errs := validateRules(tt.rules, tt.fieldPath) + if (len(errs) > 0) != tt.expectErr { + t.Errorf("validateRules() expectErr %v, got errors: %v", tt.expectErr, errs) + } + if tt.expectErr && len(errs) != tt.errCount { + t.Errorf("validateRules() expected %d errors, got %d: %v", tt.errCount, len(errs), errs) + } + }) + } +} + func TestValidateNeighborConfig(t *testing.T) { tests := []struct { name string diff --git a/pkg/driver/dra_hooks.go b/pkg/driver/dra_hooks.go index 295aca81..2aad0e85 100644 --- a/pkg/driver/dra_hooks.go +++ b/pkg/driver/dra_hooks.go @@ -134,6 +134,8 @@ func (np *NetworkDriver) prepareResourceClaims(ctx context.Context, claims []*re // prepareResourceClaim gets all the configuration required to be applied at runtime and passes it downs to the handlers. // This happens in the kubelet so it can be a "slow" operation, so we can execute fast in RunPodsandbox, that happens in the // container runtime and has strong expectactions to be executed fast (default hook timeout is 2 seconds). +// +// TODO(#290): This function has grown too large and needs to be split apart. func (np *NetworkDriver) prepareResourceClaim(ctx context.Context, claim *resourceapi.ResourceClaim) kubeletplugin.PrepareResult { klog.V(2).Infof("PrepareResourceClaim Claim %s/%s", claim.Namespace, claim.Name) start := time.Now() @@ -161,6 +163,13 @@ func (np *NetworkDriver) prepareResourceClaim(ctx context.Context, claim *resour } } + rulesByTable, err := getRuleInfo(nlHandle) + if err != nil { + return kubeletplugin.PrepareResult{ + Err: fmt.Errorf("error getting rule info: %v", err), + } + } + var errorList []error charDevices := sets.New[string]() for _, result := range claim.Status.Allocation.Devices.Results { @@ -284,35 +293,21 @@ func (np *NetworkDriver) prepareResourceClaim(ctx context.Context, claim *resour podCfg.NetworkInterfaceConfigInPod.Ethtool.Features = ethtoolFeatures } - // Obtain the routes associated to the interface - // TODO: only considers outgoing traffic - filter := &netlink.Route{ - LinkIndex: link.Attrs().Index, - } - routes, err := nlHandle.RouteListFiltered(netlink.FAMILY_ALL, filter, netlink.RT_FILTER_OIF) + // Obtain the routes and rules associated with the interface. + routes, tables, err := getRouteInfo(nlHandle, ifName, link) if err != nil { - klog.Infof("fail to get ip routes for interface %s : %v", ifName, err) + errorList = append(errorList, err) + continue } - for _, route := range routes { - routeCfg := apis.RouteConfig{} - // routes need a destination - if route.Dst == nil { - continue - } - // Discard IPv6 link-local routes, but allow IPv4 link-local. - if route.Dst.IP.To4() == nil && route.Dst.IP.IsLinkLocalUnicast() { - continue + podCfg.NetworkInterfaceConfigInPod.Routes = append(podCfg.NetworkInterfaceConfigInPod.Routes, routes...) + + for _, table := range tables.UnsortedList() { + if rules, ok := rulesByTable[table]; ok { + klog.V(5).Infof("Adding %d rules for table %d associated with interface %s", len(rules), table, ifName) + podCfg.NetworkInterfaceConfigInPod.Rules = append(podCfg.NetworkInterfaceConfigInPod.Rules, rules...) + // Avoid adding the same rule twice + delete(rulesByTable, table) } - routeCfg.Destination = route.Dst.String() - - if route.Gw != nil { - routeCfg.Gateway = route.Gw.String() - } - if route.Src != nil { - routeCfg.Source = route.Src.String() - } - routeCfg.Scope = uint8(route.Scope) - podCfg.NetworkInterfaceConfigInPod.Routes = append(podCfg.NetworkInterfaceConfigInPod.Routes, routeCfg) } // Obtain the neighbors associated to the interface @@ -446,3 +441,85 @@ func formatDeviceNames(devices []resourceapi.Device, max int) string { return fmt.Sprintf("%s, and %d more", strings.Join(deviceNames[:max], ", "), len(deviceNames)-max) } + +// getRuleInfo lists all IP rules in the host network namespace and groups them +// by the route table they are associated with. It returns a map where keys are +// table IDs and values are slices of RuleConfig. Rules associated with the +// main or local tables are ignored. +func getRuleInfo(nlHandle nlwrap.Handle) (map[int][]apis.RuleConfig, error) { + rulesByTable := make(map[int][]apis.RuleConfig) + rules, err := nlHandle.RuleList(netlink.FAMILY_ALL) + if err != nil { + return nil, fmt.Errorf("failed to get ip rules: %w", err) + } + for _, rule := range rules { + ruleCfg := apis.RuleConfig{ + Priority: rule.Priority, + Table: rule.Table, + } + if rule.Src != nil { + ruleCfg.Source = rule.Src.String() + } + if rule.Dst != nil { + ruleCfg.Destination = rule.Dst.String() + } + // Only care about rules with route tables associated, and exclude main and local tables. + if rule.Table > 0 && rule.Table != unix.RT_TABLE_MAIN && rule.Table != unix.RT_TABLE_LOCAL { + klog.V(5).Infof("Found rule %s for table %d", rule.String(), rule.Table) + rulesByTable[rule.Table] = append(rulesByTable[rule.Table], ruleCfg) + } + } + return rulesByTable, nil +} + +// getRouteInfo retrieves all routes associated with a given network interface. +// It filters out routes that are not suitable for pod namespaces, such as +// routes in the local table. It returns the list of suitable routes and a set +// of the route table IDs to which they belong. +func getRouteInfo(nlHandle nlwrap.Handle, ifName string, link netlink.Link) ([]apis.RouteConfig, sets.Set[int], error) { + routes := []apis.RouteConfig{} + tables := sets.Set[int]{} + filter := &netlink.Route{ + LinkIndex: link.Attrs().Index, + } + rl, err := nlHandle.RouteListFiltered(netlink.FAMILY_ALL, filter, netlink.RT_FILTER_OIF|netlink.RT_FILTER_TABLE) + if err != nil { + return nil, nil, fmt.Errorf("fail to get ip routes for interface %s : %w", ifName, err) + } + for _, route := range rl { + routeCfg := apis.RouteConfig{} + // routes need a destination + if route.Dst == nil { + klog.V(5).Infof("Skipping route %s for interface %s because it has no destination", route.String(), ifName) + continue + } + // Do not copy routes from the local table because they are specific + // to the host and the kernel will manage the local routing + // table within the pod's network namespace. + if route.Table == unix.RT_TABLE_LOCAL { + klog.V(5).Infof("Skipping route %s for interface %s because it is in the local table", route.String(), ifName) + continue + } + // Discard IPv6 link-local routes, but allow IPv4 link-local. + if route.Dst.IP.To4() == nil && route.Dst.IP.IsLinkLocalUnicast() { + klog.V(5).Infof("Skipping IPv6 link-local route %s for interface %s", route.String(), ifName) + continue + } + routeCfg.Destination = route.Dst.String() + if route.Gw != nil { + routeCfg.Gateway = route.Gw.String() + } + if route.Src != nil { + routeCfg.Source = route.Src.String() + } + routeCfg.Scope = uint8(route.Scope) + routeCfg.Table = route.Table + routes = append(routes, routeCfg) + // Collect table IDs for rules lookup later. + if route.Table > 0 { + klog.V(5).Infof("Found route table %d for interface %s", route.Table, ifName) + tables.Insert(route.Table) + } + } + return routes, tables, nil +} diff --git a/pkg/driver/netnamespace.go b/pkg/driver/netnamespace.go index f4fa4360..1d50b2c5 100644 --- a/pkg/driver/netnamespace.go +++ b/pkg/driver/netnamespace.go @@ -69,6 +69,7 @@ func applyRoutingConfig(containerNsPAth string, ifName string, routeConfig []api r := netlink.Route{ LinkIndex: nsLink.Attrs().Index, Scope: netlink.Scope(route.Scope), + Table: route.Table, } _, dst, err := net.ParseCIDR(route.Destination) @@ -131,3 +132,46 @@ func applyNeighborConfig(containerNsPAth string, ifName string, neighConfig []ap } return errors.Join(errorList...) } + +func applyRulesConfig(containerNsPath string, rulesConfig []apis.RuleConfig) error { + containerNs, err := netns.GetFromPath(containerNsPath) + if err != nil { + return err + } + defer containerNs.Close() + + nsHandle, err := nlwrap.NewHandleAt(containerNs) + if err != nil { + return fmt.Errorf("could not get netlink handle: %v", err) + } + defer nsHandle.Close() + + errorList := []error{} + for _, ruleCfg := range rulesConfig { + rule := netlink.NewRule() + rule.Priority = ruleCfg.Priority + rule.Table = ruleCfg.Table + + if ruleCfg.Source != "" { + _, src, err := net.ParseCIDR(ruleCfg.Source) + if err != nil { + errorList = append(errorList, err) + continue + } + rule.Src = src + } + if ruleCfg.Destination != "" { + _, dst, err := net.ParseCIDR(ruleCfg.Destination) + if err != nil { + errorList = append(errorList, err) + continue + } + rule.Dst = dst + } + + if err := nsHandle.RuleAdd(rule); err != nil && !errors.Is(err, syscall.EEXIST) { + errorList = append(errorList, fmt.Errorf("failed to add rule %s on namespace %s: %w", rule.String(), containerNsPath, err)) + } + } + return errors.Join(errorList...) +} diff --git a/pkg/driver/nri_hooks.go b/pkg/driver/nri_hooks.go index 0e67f008..77ca62bf 100644 --- a/pkg/driver/nri_hooks.go +++ b/pkg/driver/nri_hooks.go @@ -205,6 +205,13 @@ func (np *NetworkDriver) runPodSandbox(_ context.Context, pod *api.PodSandbox, p return fmt.Errorf("error configuring device %s routes on namespace %s: %v", deviceName, ns, err) } + // Configure rules + err = applyRulesConfig(ns, config.NetworkInterfaceConfigInPod.Rules) + if err != nil { + klog.Infof("RunPodSandbox error configuring device %s namespace %s rules: %v", deviceName, ns, err) + return fmt.Errorf("error configuring device %s rules on namespace %s: %v", deviceName, ns, err) + } + // Configure neighbors err = applyNeighborConfig(ns, ifNameInNs, config.NetworkInterfaceConfigInPod.Neighbors) if err != nil { diff --git a/site/content/docs/user/interface-configuration.md b/site/content/docs/user/interface-configuration.md index ec510d76..a251d2b8 100644 --- a/site/content/docs/user/interface-configuration.md +++ b/site/content/docs/user/interface-configuration.md @@ -7,7 +7,7 @@ To configure network interfaces in DRANET, users can provide custom configuratio ### Network Configuration Overview -The primary structure for custom network configuration is NetworkConfig. It encompasses settings for the network interface itself and any specific routes to be applied within the Pod's network namespace. +The primary structure for custom network configuration is NetworkConfig. It encompasses settings for the network interface itself and any specific routes and rules to be applied within the Pod's network namespace. ```go type NetworkConfig struct { @@ -18,6 +18,9 @@ type NetworkConfig struct { // Routes defines static routes to be configured for this interface. Routes []RouteConfig `json:"routes,omitempty"` + // Rules defines routing rules to be configured for this interface. + Rules []RuleConfig `json:"rules,omitempty"` + // Neighbors defines permanent neighbor (ARP/NDP) entries to be added for this interface. Neighbors []NeighborConfig `json:"neighbors,omitempty"` @@ -83,6 +86,7 @@ type RouteConfig struct { Gateway string `json:"gateway,omitempty"` Source string `json:"source,omitempty"` Scope uint8 `json:"scope,omitempty"` + Table int `json:"table,omitempty"` } ``` @@ -92,7 +96,30 @@ type RouteConfig struct { * **scope** (uint8, optional): The scope of the route. Only Link (253) or Universe (0) are allowed. * Link (253): Routes directly to a device without a gateway (e.g., for directly connected subnets). * Universe (0): Routes to a network via a gateway. - +* **table** (int, optional): The routing table to use for the route. Defaults to the main table (254) if not specified. + +#### Rule Configuration (RuleConfig) + +The RuleConfig structure defines individual routing rules to be added to the Pod's network namespace. + +```go +type RuleConfig struct { + // Priority is the priority of the rule. + Priority int `json:"priority,omitempty"` + // Source is the source IP address for the rule. + Source string `json:"source,omitempty"` + // Destination is the destination IP address for the rule. + Destination string `json:"destination,omitempty"` + // Table is the routing table to use for the rule. + Table int `json:"table,omitempty"` +} +``` + +* **priority** (int, optional): The priority of the rule. Lower values mean higher priority. Defaults to a kernel-assigned value if not specified. +* **source** (string, optional): The source IP address or CIDR for the rule (e.g., "192.168.1.0/24"). +* **destination** (string, optional): The destination IP address or CIDR for the rule (e.g., "10.0.0.0/8"). +* **table** (int, optional): The routing table to use for the rule. Defaults to the main table (254) if not specified. + #### Neighbor Configuration (NeighborConfig) The NeighborConfig structure defines permanent neighbor entries (ARP for IPv4, NDP for IPv6) to be added to the Pod's network namespace. diff --git a/tests/e2e.bats b/tests/e2e.bats index bb006d0a..00e3089f 100644 --- a/tests/e2e.bats +++ b/tests/e2e.bats @@ -440,3 +440,41 @@ EOF assert_output --partial "$NEIGH_IPV6 dev eth99 lladdr $NEIGH_MAC_IPV6 PERM" } +@test "route rules and routes with non-default table are copied to pod namespace" { + local NODE_NAME="$CLUSTER_NAME"-worker + local DUMMY_IFACE="dummy-rules" + local ROUTE_DST="10.10.10.0/24" + local ROUTE_GW="169.254.169.1" + local TABLE_ID="100" + local RULE_PRIORITY="500" + local RULE_SRC="10.20.30.0/24" + + # Create a dummy interface on the worker node + docker exec "$NODE_NAME" bash -c "ip link add $DUMMY_IFACE type dummy" + docker exec "$NODE_NAME" bash -c "ip link set up dev $DUMMY_IFACE" + docker exec "$NODE_NAME" bash -c "ip addr add 169.254.169.13/24 dev $DUMMY_IFACE" + + # Add a route with a non-default table + docker exec "$NODE_NAME" bash -c "ip route add $ROUTE_DST via $ROUTE_GW dev $DUMMY_IFACE table $TABLE_ID" + + # Add a rule + docker exec "$NODE_NAME" bash -c "ip rule add from $RULE_SRC table $TABLE_ID priority $RULE_PRIORITY" + + kubectl apply -f "$BATS_TEST_DIRNAME"/../tests/manifests/deviceclass.yaml + kubectl apply -f "$BATS_TEST_DIRNAME"/../tests/manifests/resourceclaim.yaml + kubectl wait --timeout=30s --for=condition=ready pods -l app=pod + + # Get the pod name + POD_NAME=$(kubectl get pods -l app=pod -o name) + + # Verify the route entry inside the pod's network namespace + run kubectl exec "$POD_NAME" -- ip route show table $TABLE_ID + assert_success + assert_output --partial "$ROUTE_DST via $ROUTE_GW dev eth99" + + # Verify the rule entry inside the pod's network namespace + run kubectl exec "$POD_NAME" -- ip rule show + assert_success + assert_output --regexp "$RULE_PRIORITY:[[:space:]]+from $RULE_SRC lookup $TABLE_ID" +} +