From 30e590fdcb53fca06fe7b2e1c89d750ac755cc12 Mon Sep 17 00:00:00 2001 From: goyalpalak18 Date: Tue, 10 Mar 2026 00:13:45 +0530 Subject: [PATCH] fix: handle dead VMM and scan all urunc TAP devices during cleanup Treat ESRCH from killProcess as success since the process is already gone, allowing network cleanup to proceed instead of failing silently. Refactored network.Cleanup to scan for all tap.*_urunc interfaces rather than relying on a hardcoded name. Continue processing remaining devices on per-device errors and return a joined error at the end. Fixes #408 Integrates #407 Signed-off-by: goyalpalak18 --- .github/linters/urunc-dict.txt | 1 + pkg/network/network.go | 65 +++++++++++++++++--------- pkg/unikontainers/hypervisors/utils.go | 4 ++ pkg/unikontainers/unikontainers.go | 5 +- 4 files changed, 49 insertions(+), 26 deletions(-) diff --git a/.github/linters/urunc-dict.txt b/.github/linters/urunc-dict.txt index 31072a2a..c6a462dc 100644 --- a/.github/linters/urunc-dict.txt +++ b/.github/linters/urunc-dict.txt @@ -403,3 +403,4 @@ gocyclo gomega Logr onsi +ESRCH diff --git a/pkg/network/network.go b/pkg/network/network.go index eee2f834..591fc4d4 100644 --- a/pkg/network/network.go +++ b/pkg/network/network.go @@ -18,6 +18,7 @@ import ( "errors" "fmt" "net" + "regexp" "strings" "github.com/jackpal/gateway" @@ -262,35 +263,53 @@ func networkSetup(tapName string, ipAddress string, redirectLink netlink.Link, a return newTapDevice, nil } -func Cleanup(tapDevice string) error { +func CleanupAllUruncTaps() error { netlog.Debug("net cleanup called") - ifaces, err := net.Interfaces() - if err != nil { - return err - } - for _, iface := range ifaces { - netlog.Debugf("Discovered device %s", iface.Name) - } - tapLink, err := netlink.LinkByName(tapDevice) - if err != nil { - netlog.Errorf("Failed to get link %s by name: %v", tapDevice, err) - return nil - } - err = deleteAllTCFilters(tapLink) + + handle, err := netlink.NewHandle() if err != nil { - netlog.Errorf("Failed to delete all TC filters: %v", err) - return err + return fmt.Errorf("failed to get netlink handle: %w", err) } - err = deleteAllQDiscs(tapLink) + defer handle.Close() + + links, err := handle.LinkList() if err != nil { - netlog.Errorf("Failed to delete all qdiscs: %v", err) - return err + return fmt.Errorf("failed to list links: %w", err) } - err = deleteTapDevice(tapLink) - if err != nil { - netlog.Errorf("Failed to delete link %s: %v", tapDevice, err) + + var retErr error + tapRe := regexp.MustCompile(`^tap_\d+_urunc$`) + for _, link := range links { + attrs := link.Attrs() + if attrs == nil { + continue + } + name := attrs.Name + if !tapRe.MatchString(name) { + continue + } + + netlog.Debugf("cleaning up tap device %s", name) + var devErr error + if err := deleteAllTCFilters(link); err != nil { + netlog.Errorf("failed to delete TC filters for %s: %v", name, err) + devErr = errors.Join(devErr, err) + } + if err := deleteAllQDiscs(link); err != nil { + netlog.Errorf("failed to delete qdiscs for %s: %v", name, err) + devErr = errors.Join(devErr, err) + } + if err := deleteTapDevice(link); err != nil { + netlog.Errorf("failed to delete tap %s: %v", name, err) + devErr = errors.Join(devErr, err) + } + if devErr == nil { + netlog.Debugf("deleted tap device %s", name) + } + retErr = errors.Join(retErr, devErr) } - return nil + + return retErr } func deleteIngressQdisc(link netlink.Link) error { diff --git a/pkg/unikontainers/hypervisors/utils.go b/pkg/unikontainers/hypervisors/utils.go index 6694c6eb..c3de7c8b 100644 --- a/pkg/unikontainers/hypervisors/utils.go +++ b/pkg/unikontainers/hypervisors/utils.go @@ -71,6 +71,10 @@ func killProcess(pid int) error { const timeout = 2 * time.Second err := syscall.Kill(pid, unix.SIGKILL) if err != nil { + if errors.Is(err, syscall.ESRCH) { + // Process already dead, nothing to do + return nil + } return err } deadline := time.Now().Add(timeout) diff --git a/pkg/unikontainers/unikontainers.go b/pkg/unikontainers/unikontainers.go index 72006710..c09508ce 100644 --- a/pkg/unikontainers/unikontainers.go +++ b/pkg/unikontainers/unikontainers.go @@ -585,10 +585,9 @@ func (u *Unikontainer) Kill() error { return err } - // TODO: tap0_urunc should not be hardcoded - err = network.Cleanup("tap0_urunc") + err = network.CleanupAllUruncTaps() if err != nil { - uniklog.Errorf("failed to delete tap0_urunc: %v", err) + uniklog.Errorf("failed to cleanup tap devices: %v", err) } return nil