From 9ace918767af39e90d42d00e6ce22a2d770bfbd0 Mon Sep 17 00:00:00 2001 From: Nathaniel McCallum Date: Sat, 20 Jun 2026 11:49:28 -0400 Subject: [PATCH] patches-7.0: replace cdnsp-sky1 shutdown SError mitigations with host teardown Remove 0061-DPTSW-24991 and 0062-DPTSW-25423 and replace them with 0061-usb-cdns3-cdnsp-sky1-tear-down-host-on-shutdown.patch. cdnsp_sky1_shutdown() gates the USB clocks while the child xHCI host and its devices are still live above the controller. If anything touches the controller's registers after the clocks are gated, the access raises a fatal asynchronous SError that hangs shutdown/reboot. Three distinct paths can do this: 1. the gadget (cdnsp_gadget_pullup) - addressed by 0061 (release the gadget driver); 2. the xHCI IRQ handler (xhci_irq reading USBSTS) - addressed by 0062 (disable_irq + clear HCD_FLAG_HW_ACCESSIBLE); 3. a host device's async error recovery - not covered by either. The panic we captured (via netconsole) came from usb-storage -> xhci_urb_dequeue: an attached mass-storage device's SCSI error-recovery thread resetting its port and dequeuing URBs touches xHCI MMIO after the clocks are gated. 0061 (gadget) and 0062 (IRQ) do not stop this thread, so the hang still reproduces with both. Instead of a third targeted mitigation, route ->shutdown() through the existing ->remove() path, which calls of_platform_depopulate() to remove the xHCI host first - disconnecting its devices and stopping all of their work (gadget, IRQ, and async recovery) - before the resets are asserted and the clocks gated. One patch supersedes both mitigations and covers the host async-recovery path they miss. Tested: 13/13 consecutive `systemctl reboot` with an attached composite USB device (HID + mass-storage), zero SErrors on every teardown (captured via netconsole), versus a deterministic hang before; an 8-reboot stress run was also clean. --- ...fix-SError-during-poweroff-by-releas.patch | 100 ------------------ ...dnsp-sky1-tear-down-host-on-shutdown.patch | 72 +++++++++++++ ...cdns3-sky1-disabled-IRQ-before-disab.patch | 44 -------- 3 files changed, 72 insertions(+), 144 deletions(-) delete mode 100644 patches-7.0/0061-DPTSW-24991-usb-fix-SError-during-poweroff-by-releas.patch create mode 100644 patches-7.0/0061-usb-cdns3-cdnsp-sky1-tear-down-host-on-shutdown.patch delete mode 100644 patches-7.0/0062-DPTSW-25423-usb-cdns3-sky1-disabled-IRQ-before-disab.patch diff --git a/patches-7.0/0061-DPTSW-24991-usb-fix-SError-during-poweroff-by-releas.patch b/patches-7.0/0061-DPTSW-24991-usb-fix-SError-during-poweroff-by-releas.patch deleted file mode 100644 index 1f3f795..0000000 --- a/patches-7.0/0061-DPTSW-24991-usb-fix-SError-during-poweroff-by-releas.patch +++ /dev/null @@ -1,100 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "hongliang.yang" -Date: Fri, 8 May 2026 16:21:27 +0800 -Subject: DPTSW-24991: usb: fix SError during poweroff by releasing gadget - before shutdown - -During poweroff -f, the IDM blocks USB3CH0 register access causing an -SError interrupt that panics the kernel. This happens because -device_shutdown() disables USB clocks before the gadget driver is fully -released, leading to register access in cdnsp_gadget_pullup(). - -Fix this in cdnsp_sky1_shutdown() by: -1. Finding the cdns3 child device -2. Finding the gadget device (named "gadget.%d" on the gadget bus) -3. Releasing the gadget driver before asserting reset and disabling clocks - -Signed-off-by: hongliang.yang -Change-Id: I34903d03c7152c01d45114b478a3bf5fd499d38b ---- - drivers/usb/cdns3/cdnsp-sky1.c | 51 ++++++++++ - 1 file changed, 51 insertions(+) - -diff --git a/drivers/usb/cdns3/cdnsp-sky1.c b/drivers/usb/cdns3/cdnsp-sky1.c -index 111111111111..222222222222 100644 ---- a/drivers/usb/cdns3/cdnsp-sky1.c -+++ b/drivers/usb/cdns3/cdnsp-sky1.c -@@ -28,6 +28,8 @@ - #include "core.h" - #include "../host/xhci.h" - #include "../host/xhci-plat.h" -+ -+#define GADGET_DEV_NAME_PREFIX "gadget." - #include "cdnsp-sky1.h" - - /* -@@ -880,12 +882,61 @@ static const struct acpi_device_id cdnsp_sky1_acpi_match[] = { - }; - MODULE_DEVICE_TABLE(acpi, cdnsp_sky1_acpi_match); - -+static int cdnsp_sky1_find_cdns(struct device *dev, void *data) -+{ -+ struct cdns **cdns_ptr = data; -+ -+ if (dev->of_node && of_device_is_compatible(dev->of_node, "cdns,usbssp")) { -+ *cdns_ptr = dev_get_drvdata(dev); -+ return 1; -+ } -+ -+ if (acpi_match_device(cdns_sky1_sub_match, dev)) { -+ *cdns_ptr = dev_get_drvdata(dev); -+ return 1; -+ } -+ return 0; -+} -+ -+static int cdnsp_sky1_find_gadget_match(struct device *dev, void *data) -+{ -+ struct device **gadget_dev = data; -+ const char *name = dev_name(dev); -+ -+ /* -+ * The gadget device is registered on the gadget bus with name -+ * "gadget.%d" (see usb_add_gadget_udc -> dev_set_name). -+ * It sits on the gadget bus and has the function driver bound to it. -+ */ -+ if (name && !strncmp(name, GADGET_DEV_NAME_PREFIX, -+ strlen(GADGET_DEV_NAME_PREFIX))) { -+ *gadget_dev = dev; -+ return 1; -+ } -+ return 0; -+} -+ - static void cdnsp_sky1_shutdown(struct platform_device *pdev) - { - struct device *dev = &pdev->dev; - struct cdnsp_sky1 *data = dev_get_drvdata(dev); -+ struct cdns *cdns = NULL; -+ struct device *gadget_dev = NULL; - - if (!device_may_wakeup(dev)) { -+ /* -+ * Find the cdns3 child device, then find its gadget device -+ * and release the function driver before disabling clocks. -+ * This ensures all register accesses in gadget_unbind_driver -+ * complete before clocks are turned off. -+ */ -+ device_for_each_child(dev, &cdns, cdnsp_sky1_find_cdns); -+ if (cdns) -+ device_for_each_child(cdns->dev, &gadget_dev, -+ cdnsp_sky1_find_gadget_match); -+ if (gadget_dev) -+ device_release_driver(gadget_dev); -+ - dev_dbg(dev, "at %s, reset controller\n", __func__); - reset_control_assert(data->reset); - reset_control_assert(data->preset); --- -Cixtech - diff --git a/patches-7.0/0061-usb-cdns3-cdnsp-sky1-tear-down-host-on-shutdown.patch b/patches-7.0/0061-usb-cdns3-cdnsp-sky1-tear-down-host-on-shutdown.patch new file mode 100644 index 0000000..10dcb66 --- /dev/null +++ b/patches-7.0/0061-usb-cdns3-cdnsp-sky1-tear-down-host-on-shutdown.patch @@ -0,0 +1,72 @@ +From e88470a88e8f1487224b8c25300b6d379ca69d6d Mon Sep 17 00:00:00 2001 +From: Nathaniel McCallum +Date: Sat, 20 Jun 2026 12:17:24 -0400 +Subject: [PATCH] usb: cdns3: cdnsp-sky1: quiesce host before gating clocks on + shutdown + +cdnsp_sky1_shutdown() asserted the controller/PHY resets and gated the +USB clocks directly, without first tearing down the child xHCI host. That +leaves the xHCI host and any attached USB devices live above a now +clock-gated controller. If a device is still running asynchronous error +recovery when the clocks are gated (for example a mass-storage device +whose port is being reset), the recovery path touches xHCI registers on +the gated controller and raises a fatal asynchronous SError, hanging the +shutdown/reboot. + +Drop the bespoke ->shutdown() and point it at the existing ->remove() +callback, which calls of_platform_depopulate() to remove the xHCI host +(and so disconnect its devices and stop their async work) before +deasserting the resets and gating the clocks. This is the same ordering +the ->remove() and PM ->suspend() paths already rely on. + +Reusing ->remove() for ->shutdown() is an established pattern for USB +dual-role / glue drivers in mainline, for example: + + - dwc3-qcom: .shutdown = dwc3_qcom_remove + (drivers/usb/dwc3/dwc3-qcom.c) + - dwc3-xilinx: .shutdown = dwc3_xlnx_remove + (drivers/usb/dwc3/dwc3-xilinx.c) + - chipidea/imx: ci_hdrc_imx_shutdown() -> ci_hdrc_imx_remove() + (drivers/usb/chipidea/ci_hdrc_imx.c) + +Signed-off-by: Nathaniel McCallum +--- + drivers/usb/cdns3/cdnsp-sky1.c | 15 +-------------- + 1 file changed, 1 insertion(+), 14 deletions(-) + +diff --git a/drivers/usb/cdns3/cdnsp-sky1.c b/drivers/usb/cdns3/cdnsp-sky1.c +index 9c95f1f5f..17cb85a49 100644 +--- a/drivers/usb/cdns3/cdnsp-sky1.c ++++ b/drivers/usb/cdns3/cdnsp-sky1.c +@@ -869,19 +869,6 @@ static const struct acpi_device_id cdnsp_sky1_acpi_match[] = { + }; + MODULE_DEVICE_TABLE(acpi, cdnsp_sky1_acpi_match); + +-static void cdnsp_sky1_shutdown(struct platform_device *pdev) +-{ +- struct device *dev = &pdev->dev; +- struct cdnsp_sky1 *data = dev_get_drvdata(dev); +- +- if (!device_may_wakeup(dev)) { +- dev_dbg(dev, "at %s, reset controller\n", __func__); +- reset_control_assert(data->reset); +- reset_control_assert(data->preset); +- sky1_usb_clk_disable_all(dev); +- } +-} +- + static struct platform_driver cdnsp_sky1_driver = { + .probe = cdnsp_sky1_probe, + .remove = cdnsp_sky1_remove, +@@ -891,7 +878,7 @@ static struct platform_driver cdnsp_sky1_driver = { + .acpi_match_table = ACPI_PTR(cdnsp_sky1_acpi_match), + .pm = &cdnsp_sky1_pm_ops, + }, +- .shutdown = cdnsp_sky1_shutdown, ++ .shutdown = cdnsp_sky1_remove, + }; + + module_platform_driver(cdnsp_sky1_driver); +-- +2.54.0 + diff --git a/patches-7.0/0062-DPTSW-25423-usb-cdns3-sky1-disabled-IRQ-before-disab.patch b/patches-7.0/0062-DPTSW-25423-usb-cdns3-sky1-disabled-IRQ-before-disab.patch deleted file mode 100644 index 66bf644..0000000 --- a/patches-7.0/0062-DPTSW-25423-usb-cdns3-sky1-disabled-IRQ-before-disab.patch +++ /dev/null @@ -1,44 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: "hongliang.yang" -Date: Fri, 29 May 2026 15:45:29 +0800 -Subject: DPTSW-25423: usb: cdns3: sky1: disabled IRQ before disabling clocks - -After clocks are turned off, any pending or shared IRQ that fires -will cause the xhci_irq handler to access op_regs->status with -clocks gated, leading to a bus fault. - -Signed-off-by: hongliang.yang -Change-Id: Ic720f469937def6c59ebf64eb301804c1585617b ---- - drivers/usb/cdns3/cdnsp-sky1.c | 15 ++++++++++ - 1 file changed, 15 insertions(+) - -diff --git a/drivers/usb/cdns3/cdnsp-sky1.c b/drivers/usb/cdns3/cdnsp-sky1.c -index 111111111111..222222222222 100644 ---- a/drivers/usb/cdns3/cdnsp-sky1.c -+++ b/drivers/usb/cdns3/cdnsp-sky1.c -@@ -937,6 +937,21 @@ static void cdnsp_sky1_shutdown(struct platform_device *pdev) - if (gadget_dev) - device_release_driver(gadget_dev); - -+ if (cdns && cdns->host_dev) { -+ struct usb_hcd *hcd = platform_get_drvdata(cdns->host_dev); -+ -+ if (hcd && hcd->irq > 0) { -+ disable_irq(hcd->irq); -+ synchronize_irq(hcd->irq); -+ /* -+ * Clear HCD_FLAG_HW_ACCESSIBLE before disable_irq. -+ * This prevents usb_hcd_irq from calling xhci_irq -+ * (which reads USBSTS) after clocks are disabled. -+ */ -+ clear_bit(HCD_FLAG_HW_ACCESSIBLE, &hcd->flags); -+ } -+ } -+ - dev_dbg(dev, "at %s, reset controller\n", __func__); - reset_control_assert(data->reset); - reset_control_assert(data->preset); --- -Cixtech -