Skip to content

Commit 3ce6ba1

Browse files
committed
Add support for NF-based accelerated mode
Plumb bridgeID across CNI and network function APIs. Add per-VF accelerated mode and orphaned VF representor recovery on shutdown. Signed-off-by: Alkama Hasan <alkamah@marvell.com>
1 parent 485e697 commit 3ce6ba1

13 files changed

Lines changed: 398 additions & 71 deletions

File tree

dpu-cni/pkgs/cnitypes/cnitypes.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,8 @@ type NetConf struct {
130130
RuntimeConfig struct {
131131
Mac string `json:"mac,omitempty"`
132132
} `json:"runtimeConfig,omitempty"`
133-
LogLevel string `json:"logLevel,omitempty"`
134-
LogFile string `json:"logFile,omitempty"`
133+
LogLevel string `json:"logLevel,omitempty"`
134+
LogFile string `json:"logFile,omitempty"`
135+
BridgeID string `json:"bridgeID,omitempty"`
136+
IsAccelerated bool `json:"isAccelerated,omitempty"`
135137
}

dpu-cni/pkgs/networkfn/networkfn.go

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -322,11 +322,18 @@ func CmdDel(req *cnitypes.PodRequest) error {
322322
conf := req.CNIConf
323323

324324
if req.Netns == "" {
325+
klog.Warning("CmdDel: netns is empty, device may be orphaned")
325326
return nil
326327
}
327328

328329
containerNs, err := ns.GetNS(req.Netns)
329330
if err != nil {
331+
if _, ok := err.(ns.NSPathNotExistErr); ok {
332+
klog.Warningf("CmdDel: netns %q no longer exists for device %s — attempting recovery",
333+
req.Netns, conf.DeviceID)
334+
recoverOrphanedDevice(conf.DeviceID)
335+
return nil
336+
}
330337
return fmt.Errorf("failed to open netns %q: %v", req.Netns, err)
331338
}
332339
defer containerNs.Close()
@@ -347,3 +354,41 @@ func CmdDel(req *cnitypes.PodRequest) error {
347354

348355
return nil
349356
}
357+
358+
// recoverOrphanedDevice attempts to find a device that was left behind in a
359+
// now-destroyed namespace and restore it to the init namespace.
360+
// Hardware-backed VF representors survive namespace destruction (unlike veths
361+
// which are auto-cleaned by the kernel), so they may be found under the
362+
// temporary name assigned by moveLinkInNetNamespace with the original name
363+
// stored in the device's alias.
364+
// For veth pairs this is a safe no-op: the kernel already destroyed both ends.
365+
func recoverOrphanedDevice(deviceName string) {
366+
if _, err := netlink.LinkByName(deviceName); err == nil {
367+
klog.Infof("recoverOrphanedDevice: %s already in init namespace", deviceName)
368+
return
369+
}
370+
371+
links, err := netlink.LinkList()
372+
if err != nil {
373+
klog.Errorf("recoverOrphanedDevice: failed to list links: %v", err)
374+
return
375+
}
376+
for _, link := range links {
377+
if link.Attrs().Alias == deviceName {
378+
klog.Infof("recoverOrphanedDevice: found %s under temp name %s, renaming back",
379+
deviceName, link.Attrs().Name)
380+
if err := netlink.LinkSetName(link, deviceName); err != nil {
381+
klog.Errorf("recoverOrphanedDevice: failed to rename %s to %s: %v",
382+
link.Attrs().Name, deviceName, err)
383+
return
384+
}
385+
if err := netlink.LinkSetUp(link); err != nil {
386+
klog.Warningf("recoverOrphanedDevice: failed to bring up %s: %v", deviceName, err)
387+
}
388+
return
389+
}
390+
}
391+
392+
klog.Warningf("recoverOrphanedDevice: %s not found in init namespace — "+
393+
"device was likely a veth pair already cleaned up by the kernel", deviceName)
394+
}

examples/host-pod.yaml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
apiVersion: v1
2+
kind: Pod
3+
metadata:
4+
name: host-pod
5+
namespace: default
6+
annotations:
7+
dpu.config.openshift.io/dpu-network: net1
8+
spec:
9+
nodeSelector:
10+
node-role.kubernetes.io/worker: ""
11+
containers:
12+
- name: app-container
13+
image: ghcr.io/ovn-kubernetes/kubernetes-traffic-flow-tests:latest
14+
resources:
15+
requests:
16+
openshift.io/dpunetwork-net1: "1"
17+
limits:
18+
openshift.io/dpunetwork-net1: "1"

examples/nf-pod.yaml

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
apiVersion: v1
2+
kind: Pod
3+
metadata:
4+
name: nf-pod
5+
namespace: default
6+
annotations:
7+
dpu.config.openshift.io/dpu-network: net1
8+
spec:
9+
nodeSelector:
10+
dpu.config.openshift.io/dpuside: "dpu"
11+
containers:
12+
- name: nf-container
13+
image: ghcr.io/ovn-kubernetes/kubernetes-traffic-flow-tests:latest
14+
securityContext:
15+
capabilities:
16+
add: ["NET_ADMIN", "NET_RAW"]
17+
resources:
18+
requests:
19+
openshift.io/dpunetwork-net1: "4"
20+
openshift.io/dpu-accelerated: "1"
21+
limits:
22+
openshift.io/dpunetwork-net1: "4"
23+
openshift.io/dpu-accelerated: "1"

internal/daemon/device-plugin/deviceplugin.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ import (
3131
)
3232

3333
const (
34-
DefaultDpuResourceName = "openshift.io/dpu"
34+
DefaultDpuResourceName = "openshift.io/dpu"
3535
AcceleratedResourceName = "openshift.io/dpu-accelerated"
3636

3737
acceleratedDevicePrefix = "accelerated:"
@@ -924,4 +924,3 @@ func vfRangesEqual(a, b []string) bool {
924924
}
925925
return true
926926
}
927-

internal/daemon/dpusidemanager.go

Lines changed: 81 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,13 @@ import (
2121
"github.com/openshift/dpu-operator/pkgs/vars"
2222
pb "github.com/opiproject/opi-api/network/evpn-gw/v1alpha1/gen/go"
2323
lifecycleapi "github.com/opiproject/opi-api/v1/gen/go/lifecycle/v1alpha1"
24+
"github.com/vishvananda/netlink"
2425
"google.golang.org/grpc"
2526
emptypb "google.golang.org/protobuf/types/known/emptypb"
2627
"k8s.io/client-go/rest"
2728
ctrl "sigs.k8s.io/controller-runtime"
2829
"sigs.k8s.io/controller-runtime/pkg/cache"
30+
"sigs.k8s.io/controller-runtime/pkg/client"
2931
"sigs.k8s.io/controller-runtime/pkg/metrics/filters"
3032
"sigs.k8s.io/controller-runtime/pkg/metrics/server"
3133
)
@@ -113,7 +115,12 @@ func NewDpuSideManager(vsp plugin.VendorPlugin, config *rest.Config, opts ...fun
113115
opt(d)
114116
}
115117

116-
d.dp = deviceplugin.NewDevicePlugin(vsp, true, d.pathManager)
118+
if k8sClient, err := client.New(d.config, client.Options{Scheme: scheme.Scheme}); err != nil {
119+
d.log.Error(err, "Failed to create Kubernetes client for device plugin; falling back to default-only registration")
120+
d.dp = deviceplugin.NewDevicePluginManager(vsp, true, d.pathManager, nil)
121+
} else {
122+
d.dp = deviceplugin.NewDevicePluginManager(vsp, true, d.pathManager, k8sClient)
123+
}
117124

118125
return d, nil
119126
}
@@ -149,12 +156,21 @@ func (d *DpuSideManager) cniCmdNfAddHandler(req *cnitypes.PodRequest) (*cni100.R
149156
return nil, fmt.Errorf("SRIOV manager failed in add handler: %v", err)
150157
}
151158

152-
d.macStore[req.Netns] = append(d.macStore[req.Netns], req.CNIConf.MAC)
153-
if len(d.macStore[req.Netns]) == 2 {
154-
d.log.Info("cniCmdNfAddHandler", "req.Netns", req.Netns)
155-
macs := d.macStore[req.Netns]
156-
d.vsp.CreateNetworkFunction(macs[0], macs[1])
159+
bridgeID := req.CNIConf.BridgeID
160+
161+
if req.CNIConf.IsAccelerated {
162+
d.log.Info("cniCmdNfAddHandler accelerated mode: calling CNF per VF", "mac", req.CNIConf.MAC, "bridgeID", bridgeID)
163+
d.macStore[req.Netns] = append(d.macStore[req.Netns], req.CNIConf.MAC)
164+
d.vsp.CreateNetworkFunction(req.CNIConf.MAC, "", bridgeID)
165+
} else {
166+
d.macStore[req.Netns] = append(d.macStore[req.Netns], req.CNIConf.MAC)
167+
if len(d.macStore[req.Netns]) == 2 {
168+
d.log.Info("cniCmdNfAddHandler", "req.Netns", req.Netns, "bridgeID", bridgeID)
169+
macs := d.macStore[req.Netns]
170+
d.vsp.CreateNetworkFunction(macs[0], macs[1], bridgeID)
171+
}
157172
}
173+
158174
d.log.Info("cniCmdNfAddHandler CmdAdd succeeded")
159175
return res, nil
160176
}
@@ -166,19 +182,71 @@ func (d *DpuSideManager) cniCmdNfDelHandler(req *cnitypes.PodRequest) (*cni100.R
166182
return nil, errors.New("SRIOV manager failed in del handler")
167183
}
168184

169-
macs := d.macStore[req.Netns]
185+
bridgeID := req.CNIConf.BridgeID
170186

171-
if len(macs) == 2 {
172-
d.log.Info("cniCmdNfDelHandler", "req.Netns", req.Netns)
173-
d.vsp.DeleteNetworkFunction(macs[0], macs[1])
187+
if req.CNIConf.IsAccelerated {
188+
macs := d.macStore[req.Netns]
189+
mac := ""
190+
if len(macs) > 0 {
191+
mac = macs[len(macs)-1]
192+
d.macStore[req.Netns] = macs[:len(macs)-1]
193+
}
194+
d.log.Info("cniCmdNfDelHandler accelerated mode: calling DNF per VF", "mac", mac, "bridgeID", bridgeID)
195+
d.vsp.DeleteNetworkFunction(mac, "", bridgeID)
196+
} else {
197+
macs := d.macStore[req.Netns]
198+
if len(macs) == 2 {
199+
d.log.Info("cniCmdNfDelHandler", "req.Netns", req.Netns, "bridgeID", bridgeID)
200+
d.vsp.DeleteNetworkFunction(macs[0], macs[1], bridgeID)
201+
}
202+
if len(macs) > 0 {
203+
d.macStore[req.Netns] = macs[:len(macs)-1]
204+
}
174205
}
175206

176-
d.macStore[req.Netns] = macs[:len(macs)-1]
177-
178207
d.log.Info("cniCmdNfDelHandler CmdDel succeeded")
179208
return nil, nil
180209
}
181210

211+
// releaseNfDevices recovers any devices (VF representors or veths) that are
212+
// still inside NF pod namespaces during graceful shutdown. This must run
213+
// before the CNI server and VSP are stopped so the host can safely reset
214+
// sriov_numvfs without hitting a kernel D-state hang.
215+
func (d *DpuSideManager) releaseNfDevices() {
216+
d.log.Info("releaseNfDevices: checking for devices still in pod namespaces")
217+
218+
devices, err := d.vsp.GetDevices()
219+
if err != nil {
220+
d.log.Error(err, "releaseNfDevices: failed to get device list from VSP")
221+
return
222+
}
223+
224+
for _, dev := range devices.Devices {
225+
devName := dev.ID
226+
if _, err := netlink.LinkByName(devName); err == nil {
227+
continue
228+
}
229+
230+
links, listErr := netlink.LinkList()
231+
if listErr != nil {
232+
d.log.Error(listErr, "releaseNfDevices: failed to list links")
233+
return
234+
}
235+
for _, link := range links {
236+
if link.Attrs().Alias == devName {
237+
d.log.Info("releaseNfDevices: found device under temp name, restoring",
238+
"tempName", link.Attrs().Name, "originalName", devName)
239+
if err := netlink.LinkSetName(link, devName); err != nil {
240+
d.log.Error(err, "releaseNfDevices: failed to rename", "device", devName)
241+
} else if err := netlink.LinkSetUp(link); err != nil {
242+
d.log.Error(err, "releaseNfDevices: failed to bring up", "device", devName)
243+
}
244+
break
245+
}
246+
}
247+
}
248+
}
249+
182250
func (d *DpuSideManager) Listen() (net.Listener, error) {
183251
d.startedWg.Add(1)
184252
d.log.Info("Starting DpuDaemon")
@@ -225,6 +293,7 @@ func (d *DpuSideManager) Serve(ctx context.Context, listener net.Listener) error
225293
go func() {
226294
<-ctx.Done()
227295
d.log.Info("Context cancelled, shutting down servers")
296+
d.releaseNfDevices()
228297
d.server.Stop()
229298
d.dp.Stop()
230299
d.vsp.Close()

internal/daemon/dpusidemanager_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ func waitAllNodesDpuAllocatable(client client.Client) {
3636
}
3737
readyNodes := 0
3838
for _, node := range latestNodes.Items {
39-
allocatableQuantity, ok := node.Status.Allocatable[deviceplugin.DpuResourceName]
39+
allocatableQuantity, ok := node.Status.Allocatable[deviceplugin.DefaultDpuResourceName]
4040
if ok {
4141
allocatable, _ := allocatableQuantity.AsInt64()
4242
if allocatable > 0 {

internal/daemon/hostsidemanager.go

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ import (
2626
"k8s.io/client-go/rest"
2727
ctrl "sigs.k8s.io/controller-runtime"
2828
"sigs.k8s.io/controller-runtime/pkg/cache"
29+
"sigs.k8s.io/controller-runtime/pkg/client"
2930
)
3031

3132
type HostSideManager struct {
@@ -51,7 +52,7 @@ type HostSideManager struct {
5152
dpListener net.Listener
5253
}
5354

54-
func (d *HostSideManager) CreateBridgePort(pf int, vf int, vlan int, mac string) (*pb.BridgePort, error) {
55+
func (d *HostSideManager) CreateBridgePort(pf int, vf int, vlan int, mac string, bridgeID string) (*pb.BridgePort, error) {
5556
err := d.connectWithRetry()
5657
if err != nil {
5758
return nil, fmt.Errorf("Failed to connect with retry: %v", err)
@@ -69,8 +70,7 @@ func (d *HostSideManager) CreateBridgePort(pf int, vf int, vlan int, mac string)
6970
Ptype: 1,
7071
MacAddress: m,
7172
LogicalBridges: []string{
72-
// TODO: Remove +2
73-
fmt.Sprintf("%d", vf+2),
73+
bridgeID,
7474
},
7575
},
7676
},
@@ -100,10 +100,16 @@ func NewHostSideManager(vsp plugin.VendorPlugin, opts ...func(*HostSideManager))
100100
opt(h)
101101
}
102102

103-
h.dp = deviceplugin.NewDevicePlugin(vsp, false, h.pathManager)
104103
if h.config == nil {
105104
h.config = ctrl.GetConfigOrDie()
106105
}
106+
107+
if k8sClient, err := client.New(h.config, client.Options{Scheme: scheme.Scheme}); err != nil {
108+
h.log.Error(err, "Failed to create Kubernetes client for device plugin; falling back to default-only registration")
109+
h.dp = deviceplugin.NewDevicePluginManager(vsp, false, h.pathManager, nil)
110+
} else {
111+
h.dp = deviceplugin.NewDevicePluginManager(vsp, false, h.pathManager, k8sClient)
112+
}
107113
return h, nil
108114
}
109115

@@ -193,11 +199,12 @@ func (d *HostSideManager) cniCmdAddHandler(req *cnitypes.PodRequest) (*cni100.Re
193199
pf := 0
194200
vf := req.CNIConf.VFID
195201
mac := req.CNIConf.OrigVfState.EffectiveMAC
202+
bridgeID := req.CNIConf.BridgeID
196203
d.log.Info("addHandler", "CNIConf", req.CNIConf)
197204
// TODO: fix setting Vlan based on network definition in CR
198205
vlan := 2 // *req.CNIConf.Vlan
199-
d.log.Info("addHandler", "pf", pf, "vf", vf, "mac", mac, "vlan", vlan)
200-
_, err = d.CreateBridgePort(pf, vf, vlan, mac)
206+
d.log.Info("addHandler", "pf", pf, "vf", vf, "mac", mac, "vlan", vlan, "bridgeID", bridgeID)
207+
_, err = d.CreateBridgePort(pf, vf, vlan, mac, bridgeID)
201208
if err != nil {
202209
return nil, fmt.Errorf("Failed to call CreateBridgePort: %v", err)
203210
}

internal/daemon/hostsidemanager_test.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,11 +63,15 @@ func (v *DummyPlugin) DeleteBridgePort(deleteRequest *opi.DeleteBridgePortReques
6363
return nil
6464
}
6565

66-
func (g *DummyPlugin) CreateNetworkFunction(input string, output string) error {
66+
func (g *DummyPlugin) CreateNetworkFunction(input string, output string, bridgeID string) error {
6767
return nil
6868
}
6969

70-
func (g *DummyPlugin) DeleteNetworkFunction(input string, output string) error {
70+
func (g *DummyPlugin) DeleteNetworkFunction(input string, output string, bridgeID string) error {
71+
return nil
72+
}
73+
74+
func (g *DummyPlugin) SetDpuNetworkConfig(isAccelerated bool) error {
7175
return nil
7276
}
7377

0 commit comments

Comments
 (0)