From e3a3d1017b331cfafc2bffbb42e4de5bba865737 Mon Sep 17 00:00:00 2001 From: Janne Grunau Date: Wed, 15 Apr 2026 23:48:57 +0200 Subject: [PATCH 1/4] smp: Factor preparation for secondary CPU start out It will be used to switch to CPU 0 on M3 and later to avoid confusion when Linux is not started with CPU 0. Signed-off-by: Janne Grunau --- src/smp.c | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/src/smp.c b/src/smp.c index c726ee75f..6f2a5f1a4 100644 --- a/src/smp.c +++ b/src/smp.c @@ -108,21 +108,9 @@ void smp_secondary_prep_el3(void) return; } -static void smp_start_cpu(int index, int die, int cluster, int core, u64 impl, u64 cpu_start_base) -{ - int i; - - if (index >= MAX_CPUS) - return; - - if (has_el3() && index >= MAX_EL3_CPUS) - return; - - if (spin_table[index].flag) - return; - - printf("Starting CPU %d (%d:%d:%d)... ", index, die, cluster, core); +static void smp_prepare_cpu(int index) +{ memset(&spin_table[index], 0, sizeof(struct spin_table)); target_cpu = index; @@ -139,6 +127,24 @@ static void smp_start_cpu(int index, int die, int cluster, int core, u64 impl, u dc_civac_range(&_reset_stack, sizeof(void *)); sysop("dsb sy"); +} + +static void smp_start_cpu(int index, int die, int cluster, int core, u64 impl, u64 cpu_start_base) +{ + int i; + + if (index >= MAX_CPUS) + return; + + if (has_el3() && index >= MAX_EL3_CPUS) + return; + + if (spin_table[index].flag) + return; + + printf("Starting CPU %d (%d:%d:%d)... ", index, die, cluster, core); + + smp_prepare_cpu(index); write64(impl, (u64)_vectors_start); From f671f625d81758325ecd7d5bdff9393a513ead4b Mon Sep 17 00:00:00 2001 From: Janne Grunau Date: Wed, 15 Apr 2026 23:57:49 +0200 Subject: [PATCH 2/4] smp: Allow switching the boot (primary) CPU core M3 and later Apple silicon SoCs start m1n1 on the first perfomance core. The cluster layout remained unchanged with an efficiency core cluster first. This causes an annoying but mostly cosmetic issue in Linux. Linux assigns CPU index 0 to the boot CPU. This results in a mismatch of logical and physical CPU core order. As seen with the Radxa Orion O6 this will at least generate support questions but is also annoying for CPU pinning. It's likely to result in pinning to mixture of performance and efficiency cores. While functionality it should not matter to which secondary CPU core control is passed the only expected use case is switching to CPU core 0. Signed-off-by: Janne Grunau --- proxyclient/m1n1/proxy.py | 3 ++ src/payload.c | 7 +++ src/proxy.c | 3 ++ src/proxy.h | 1 + src/smp.c | 90 ++++++++++++++++++++++++++++++++++++++- src/smp.h | 2 + src/utils_asm.S | 33 ++++++++++++++ 7 files changed, 138 insertions(+), 1 deletion(-) diff --git a/proxyclient/m1n1/proxy.py b/proxyclient/m1n1/proxy.py index 16ed89467..5d3f967e5 100644 --- a/proxyclient/m1n1/proxy.py +++ b/proxyclient/m1n1/proxy.py @@ -572,6 +572,7 @@ class M1N1Proxy(Reloadable): P_SMP_CALL_SYNC_EL1 = 0x508 P_SMP_CALL_EL0 = 0x509 P_SMP_CALL_SYNC_EL0 = 0x50a + P_SMP_SWITCH_BOOT_CPU = 0x50b P_HEAPBLOCK_ALLOC = 0x600 P_MALLOC = 0x601 @@ -1004,6 +1005,8 @@ def smp_call_sync_el0(self, cpu, addr, *args): if len(args) > 3: raise ValueError("Too many arguments") return self.request(self.P_SMP_CALL_SYNC_EL0, cpu, addr, *args) + def smp_switch_boot_cpu(self, cpu): + return self.request(self.P_SMP_SWITCH_BOOT_CPU, cpu) def heapblock_alloc(self, size): return self.request(self.P_HEAPBLOCK_ALLOC, size) diff --git a/src/payload.c b/src/payload.c index 5b1547fde..3c397908d 100644 --- a/src/payload.c +++ b/src/payload.c @@ -342,6 +342,13 @@ int payload_run(void) printf("Failed to kboot set %s='%s'\n", chosen[i], val); } + if (boot_cpu_idx != 0) { + int boot_cpu = boot_cpu_idx; + int index = smp_switch_boot_cpu(0); + if (boot_cpu != index) + printf("Switched boot CPU from %d to %d (%d)\n", boot_cpu, index, boot_cpu_idx); + } + if (kboot_prepare_dt(fdt)) { printf("Failed to prepare FDT!\n"); return -1; diff --git a/src/proxy.c b/src/proxy.c index 3f020c045..49f573ef4 100644 --- a/src/proxy.c +++ b/src/proxy.c @@ -374,6 +374,9 @@ int proxy_process(ProxyRequest *request, ProxyReply *reply) request->args[3], request->args[4]); reply->retval = smp_wait(request->args[0]); break; + case P_SMP_SWITCH_BOOT_CPU: + reply->retval = smp_switch_boot_cpu(request->args[0]); + break; case P_HEAPBLOCK_ALLOC: reply->retval = (u64)heapblock_alloc(request->args[0]); diff --git a/src/proxy.h b/src/proxy.h index 1e8acdf06..01104e909 100644 --- a/src/proxy.h +++ b/src/proxy.h @@ -92,6 +92,7 @@ typedef enum { P_SMP_CALL_EL1_SYNC, P_SMP_CALL_EL0, P_SMP_CALL_EL0_SYNC, + P_SMP_SWITCH_BOOT_CPU, P_HEAPBLOCK_ALLOC = 0x600, // Heap and memory management ops P_MALLOC, diff --git a/src/smp.c b/src/smp.c index 6f2a5f1a4..05ab9b274 100644 --- a/src/smp.c +++ b/src/smp.c @@ -5,6 +5,8 @@ #include "aic.h" #include "aic_regs.h" #include "cpu_regs.h" +#include "exception.h" +#include "fb.h" #include "malloc.h" #include "memory.h" #include "pmgr.h" @@ -108,7 +110,6 @@ void smp_secondary_prep_el3(void) return; } - static void smp_prepare_cpu(int index) { memset(&spin_table[index], 0, sizeof(struct spin_table)); @@ -422,6 +423,93 @@ void smp_stop_secondaries(bool deep_sleep) } } +extern void cpu_reset(void) __attribute__((noreturn)); +extern void *smp_switch_boot_cpu_entry(int cpu_index); +extern int smp_switch_boot_cpu_exit(void *prev_stack, u64 saved_sp); + +static u64 switch_boot_cpu_init_new(int cpu_index, int old_index, u64 saved_sp) +{ + int i; + void *prev_stack; + + // wait for the previous previous boot CPU to be available as secondary + for (i = 0; i < 100; i++) { + sysop("dmb ld"); + if (spin_table[old_index].flag) + break; + udelay(1000); + } + + if (i >= 100) + printf("Previous boot CPU %d failed to start as secondary!\n", old_index); + else + printf(" Started.\n"); + + // restore _reset_stacks from secondary init of previous boot cpu + _reset_stack = dummy_stack + DUMMY_STACK_SIZE; + _reset_stack_el1 = dummy_stack_el1 + DUMMY_STACK_SIZE; + + prev_stack = secondary_stacks[cpu_index]; + secondary_stacks[cpu_index] = dummy_stack; + + // setup current CPU as boot CPU + if (in_el2()) + msr(TPIDR_EL2, boot_cpu_idx); + else + msr(TPIDR_EL1, boot_cpu_idx); + + // clear spin table of new boot CPU + memset(&spin_table[boot_cpu_idx], 0, sizeof(struct spin_table)); + spin_table[boot_cpu_idx].mpidr = mrs(MPIDR_EL1) & 0xFFFFFF; + + exception_initialize(); + + mmu_init(); + fb_set_active(true); + + smp_switch_boot_cpu_exit(prev_stack, saved_sp); + __builtin_unreachable(); + return -1; +} + +void smp_do_switch_boot_cpu(int cpu_index, u64 saved_sp) +{ + int old_index = boot_cpu_idx; + + printf("Switching boot CPU from %d to %d\n", old_index, cpu_index); + + // disable frame buffer until the new CPU has called mmu_init() + fb_set_active(false); + + smp_call3(cpu_index, switch_boot_cpu_init_new, cpu_index, old_index, saved_sp); + + // switch the boot CPU so the old boot CPU resets as secondary + boot_cpu_idx = cpu_index; + boot_cpu_mpidr = spin_table[cpu_index].mpidr; + smp_prepare_cpu(old_index); + + cpu_reset(); + __builtin_unreachable(); +} + +int smp_switch_boot_cpu(int cpu_index) +{ + if (cpu_index == boot_cpu_idx) + return cpu_index; + + if (!smp_is_alive(cpu_index)) { + printf("Trying to switch to offline CPU %d\n", cpu_index); + return -1; + } + + // Call asm helper function to capture callee saved state and pass `sp` to the new boot CPU. + // This function will return on the new boot CPU. + void *old_stack = smp_switch_boot_cpu_entry(cpu_index); + free(old_stack); + + return cpu_index; +} + void smp_send_ipi(int cpu) { if (cpu >= MAX_CPUS) diff --git a/src/smp.h b/src/smp.h index 029697864..cf191a8bb 100644 --- a/src/smp.h +++ b/src/smp.h @@ -19,6 +19,8 @@ void smp_secondary_prep_el3(void); void smp_start_secondaries(void); void smp_stop_secondaries(bool deep_sleep); +int smp_switch_boot_cpu(int cpu_index); + #define smp_call0(i, f) smp_call4(i, f, 0, 0, 0, 0) #define smp_call1(i, f, a) smp_call4(i, f, a, 0, 0, 0) #define smp_call2(i, f, a, b) smp_call4(i, f, a, b, 0, 0) diff --git a/src/utils_asm.S b/src/utils_asm.S index 13af28d83..fa5c47cc2 100644 --- a/src/utils_asm.S +++ b/src/utils_asm.S @@ -172,3 +172,36 @@ _deep_wfi_helper: ldp x30, x0, [sp], #16 ret + +.extern smp_do_switch_boot_cpu + +.globl smp_switch_boot_cpu_entry +.type smp_switch_boot_cpu_entry, @function +smp_switch_boot_cpu_entry: + stp x29, x30, [sp, #-16]! + mov x29, sp + stp x27, x28, [sp, #-16]! + stp x25, x26, [sp, #-16]! + stp x23, x24, [sp, #-16]! + stp x21, x22, [sp, #-16]! + stp x19, x20, [sp, #-16]! + stp x17, x18, [sp, #-16]! + + mov x1, sp + bl smp_do_switch_boot_cpu + // unreachable + b . + +.globl smp_switch_boot_cpu_exit +.type smp_switch_boot_cpu_exit, @function +smp_switch_boot_cpu_exit: + mov sp, x1 + ldp x17, x18, [sp], #16 + ldp x19, x20, [sp], #16 + ldp x21, x22, [sp], #16 + ldp x23, x24, [sp], #16 + ldp x25, x26, [sp], #16 + ldp x27, x28, [sp], #16 + ldp x29, x30, [sp], #16 + // passes argument in x0 as return value + ret // looks like a return from smp_switch_boot_cpu_entry on the new boot cpu From 9cc21345755734fe45d35fbc86fbbd49ee7ebdf1 Mon Sep 17 00:00:00 2001 From: Yureka Date: Wed, 6 May 2026 20:44:40 +0200 Subject: [PATCH 3/4] smp: don't write rvbar when git !cyc_ovrd Signed-off-by: Yureka --- src/smp.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/src/smp.c b/src/smp.c index 05ab9b274..7d941e579 100644 --- a/src/smp.c +++ b/src/smp.c @@ -26,6 +26,9 @@ #define CPU_REG_CLUSTER GENMASK(10, 8) #define CPU_REG_DIE GENMASK(14, 11) +#define RVBAR_LOCK BIT(0) +#define RVBAR_ADDR GENMASK(47, 12) + struct spin_table { u64 mpidr; u64 flag; @@ -143,11 +146,19 @@ static void smp_start_cpu(int index, int die, int cluster, int core, u64 impl, u if (spin_table[index].flag) return; + if (!cpu_features->cyc_ovrd && (read64(impl) & RVBAR_ADDR) != (u64)_vectors_start) { + printf("Failed! \n RVBAR (=0x%lx) is locked and differs from entry point (=0x%lx)\n", + read64(impl) & RVBAR_ADDR, (u64)_vectors_start); + } + printf("Starting CPU %d (%d:%d:%d)... ", index, die, cluster, core); smp_prepare_cpu(index); - write64(impl, (u64)_vectors_start); + if (cpu_features->cyc_ovrd) { + // This also clears RVBAR_LOCK, so that HV can set RVBAR later when the core is running + write64(impl, (u64)_vectors_start); + } cpu_start_base += die * PMGR_DIE_OFFSET; @@ -366,7 +377,7 @@ void smp_start_secondaries(void) if (i == boot_cpu_idx) { // Check if already locked - if (read64(cpu_impl_reg[0]) & 1) + if (FIELD_GET(RVBAR_LOCK, read64(cpu_impl_reg[0]))) continue; // Unlocked, write _vectors_start into boot CPU's rvbar From d54b8af2b24578c47175688ba0aff93a6d8b67dd Mon Sep 17 00:00:00 2001 From: Yureka Date: Wed, 6 May 2026 20:52:00 +0200 Subject: [PATCH 4/4] chainload: only write rvbar if needed Signed-off-by: Yureka --- proxyclient/tools/chainload.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/proxyclient/tools/chainload.py b/proxyclient/tools/chainload.py index 38e936851..92caad879 100755 --- a/proxyclient/tools/chainload.py +++ b/proxyclient/tools/chainload.py @@ -99,6 +99,8 @@ def remove_oslog(node): if cpu.state == "running": continue addr, size = cpu.cpu_impl_reg + if (p.read64(addr) & 0xffff_ffff_f000) == rvbar: + continue print(f" {cpu.name}: [0x{addr:x}] = 0x{rvbar:x}") p.write64(addr, rvbar)