Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 41 additions & 6 deletions kernel/src/interrupts/timer_entry.asm
Original file line number Diff line number Diff line change
Expand Up @@ -50,15 +50,25 @@ timer_interrupt_entry:
cmp rax, 3 ; Ring 3?
jne .skip_swapgs_entry ; If not from userspace, skip swapgs

; We came from userspace, swap to kernel GS FIRST
; We need kernel GS to read kernel_cr3 from per-CPU data
swapgs

; CRITICAL: Save the process CR3 BEFORE switching to kernel CR3
; This allows us to restore it on exit if no context switch happens
; Save process CR3 to per-CPU data at gs:[80] (SAVED_PROCESS_CR3_OFFSET)
mov rax, cr3 ; Read current (process) CR3
mov qword [gs:80], rax ; Save to per-CPU saved_process_cr3

; CRITICAL: Switch CR3 back to kernel page table
; When interrupt fires from userspace, CR3 is still the process PT
; We MUST switch to kernel PT before running any kernel code
; TODO: Make this dynamic by storing kernel CR3 in per-CPU data
mov rax, 0x101000 ; Kernel CR3 (hardcoded for now)
; Read kernel_cr3 from per-CPU data at gs:[72] (KERNEL_CR3_OFFSET)
mov rax, qword [gs:72] ; Read kernel CR3 from per-CPU data
test rax, rax ; Check if kernel_cr3 is set
jz .skip_cr3_switch_entry ; If not set, skip (early boot fallback)
mov cr3, rax ; Switch to kernel page table

; We came from userspace, swap to kernel GS
swapgs
.skip_cr3_switch_entry:

; Log full frame details for first few userspace interrupts
; Pass frame pointer to logging function
Expand Down Expand Up @@ -304,7 +314,32 @@ timer_interrupt_entry:
jmp .after_cr3_check

.no_cr3_switch_back_to_user:
; No CR3 switch needed, swap back to user GS
; No context switch, but we still need to restore the ORIGINAL process CR3!
; We saved it on entry at gs:[80] (SAVED_PROCESS_CR3_OFFSET)
mov rax, qword [gs:80] ; Read saved process CR3
test rax, rax ; Check if it was saved (non-zero)
jz .no_saved_cr3 ; If 0, skip (shouldn't happen from userspace)

; Debug: Output marker for saved CR3 restore
push rdx
mov dx, 0x3F8
push rax
mov al, '!' ; '!' for saved CR3 restore
out dx, al
mov al, 'C'
out dx, al
mov al, 'R'
out dx, al
mov al, '3'
out dx, al
pop rax
pop rdx

; Switch back to original process CR3
mov cr3, rax

.no_saved_cr3:
; Swap back to user GS for IRETQ
swapgs

.after_cr3_check:
Expand Down
11 changes: 11 additions & 0 deletions kernel/src/memory/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,17 @@ pub fn init(physical_memory_offset: VirtAddr, memory_regions: &'static MemoryReg
// PHASE 2: Build master kernel PML4 with upper-half mappings
kernel_page_table::build_master_kernel_pml4();

// CRITICAL: Update kernel_cr3 in per-CPU data to the new master PML4
// per_cpu::init() already ran and set kernel_cr3 to the bootloader's CR3
// Now that we've switched to the master PML4, we must update it
{
use x86_64::registers::control::Cr3;
let (current_frame, _) = Cr3::read();
let master_cr3 = current_frame.start_address().as_u64();
log::info!("CRITICAL: Updating kernel_cr3 to master PML4: {:#x}", master_cr3);
crate::per_cpu::set_kernel_cr3(master_cr3);
}

// Migrate any existing processes (though there shouldn't be any yet)
kernel_page_table::migrate_existing_processes();

Expand Down
75 changes: 74 additions & 1 deletion kernel/src/per_cpu.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,14 @@ pub struct PerCpuData {
/// Target CR3 for next IRETQ (offset 64) - set before context switch
/// 0 means no CR3 switch needed
pub next_cr3: u64,

/// Kernel CR3 (offset 72) - the master kernel page table
/// Used by interrupt/syscall entry to switch to kernel page tables
pub kernel_cr3: u64,

/// Saved process CR3 (offset 80) - saved on interrupt entry from userspace
/// Used to restore process page tables on interrupt exit if no context switch
pub saved_process_cr3: u64,
}

// Linux-style preempt_count bit layout constants
Expand Down Expand Up @@ -123,6 +131,10 @@ const TSS_OFFSET: usize = 48; // offset 48: *mut TSS (8 bytes)
const SOFTIRQ_PENDING_OFFSET: usize = 56; // offset 56: u32 (4 bytes)
#[allow(dead_code)]
const NEXT_CR3_OFFSET: usize = 64; // offset 64: u64 (8 bytes) - ALIGNED
#[allow(dead_code)]
const KERNEL_CR3_OFFSET: usize = 72; // offset 72: u64 (8 bytes) - ALIGNED
#[allow(dead_code)]
const SAVED_PROCESS_CR3_OFFSET: usize = 80; // offset 80: u64 (8 bytes) - ALIGNED

// Compile-time assertions to ensure offsets are correct
// These will fail to compile if the offsets don't match expected values
Expand All @@ -132,7 +144,7 @@ const _: () = assert!(USER_RSP_SCRATCH_OFFSET % 8 == 0, "user_rsp_scratch must b
const _: () = assert!(core::mem::size_of::<usize>() == 8, "This code assumes 64-bit pointers");

// Verify struct size is 128 bytes due to align(64) attribute
// The actual data is 72 bytes, but align(64) rounds up to 128
// The actual data is 88 bytes (saved_process_cr3 at offset 80), but align(64) rounds up to 128
const _: () = assert!(core::mem::size_of::<PerCpuData>() == 128, "PerCpuData must be 128 bytes (aligned to 64)");

// Verify bit layout matches Linux kernel
Expand All @@ -158,6 +170,8 @@ impl PerCpuData {
softirq_pending: 0,
_pad2: 0,
next_cr3: 0,
kernel_cr3: 0,
saved_process_cr3: 0,
}
}
}
Expand Down Expand Up @@ -195,6 +209,25 @@ pub fn init() {
// Mark per-CPU data as initialized and safe to use
PER_CPU_INITIALIZED.store(true, Ordering::Release);
log::info!("Per-CPU data marked as initialized - preempt_count functions now use per-CPU storage");

// Store the current CR3 as the initial kernel CR3
// NOTE: At this point, we're still using the bootloader's page tables.
// After memory::init() calls build_master_kernel_pml4(), the kernel switches
// to the master PML4 and calls set_kernel_cr3() to update this value.
// This initial value provides a fallback during early boot.
let (current_frame, _) = x86_64::registers::control::Cr3::read();
let kernel_cr3_val = current_frame.start_address().as_u64();
log::info!("Storing initial kernel_cr3 = {:#x} in per-CPU data (bootloader PT)", kernel_cr3_val);

unsafe {
core::arch::asm!(
"mov gs:[{offset}], {}",
in(reg) kernel_cr3_val,
offset = const KERNEL_CR3_OFFSET,
options(nostack, preserves_flags)
);
}
log::info!("kernel_cr3 stored successfully - interrupt handlers can now switch to kernel page tables");
}

/// Get the current thread from per-CPU data
Expand Down Expand Up @@ -941,6 +974,46 @@ pub fn set_next_cr3(cr3: u64) {
}
}

/// Get the kernel CR3 (master kernel page table)
/// Returns 0 if not initialized
#[allow(dead_code)]
pub fn get_kernel_cr3() -> u64 {
if !PER_CPU_INITIALIZED.load(Ordering::Acquire) {
return 0;
}

unsafe {
let cr3: u64;
core::arch::asm!(
"mov {}, gs:[{offset}]",
out(reg) cr3,
offset = const KERNEL_CR3_OFFSET,
options(nostack, readonly, preserves_flags)
);
cr3
}
}

/// Set the kernel CR3 (master kernel page table)
/// This should be called once after build_master_kernel_pml4()
pub fn set_kernel_cr3(cr3: u64) {
if !PER_CPU_INITIALIZED.load(Ordering::Acquire) {
log::warn!("set_kernel_cr3 called before per-CPU init, storing for later");
// We can't store it yet, but we'll set it during init
return;
}

log::info!("Setting kernel_cr3 in per-CPU data to {:#x}", cr3);
unsafe {
core::arch::asm!(
"mov gs:[{offset}], {}",
in(reg) cr3,
offset = const KERNEL_CR3_OFFSET,
options(nostack, preserves_flags)
);
}
}

/// Check if we can schedule (preempt_count == 0 and returning to userspace)
pub fn can_schedule(saved_cs: u64) -> bool {
let current_preempt = preempt_count();
Expand Down
99 changes: 85 additions & 14 deletions kernel/src/syscall/entry.asm
Original file line number Diff line number Diff line change
Expand Up @@ -42,16 +42,26 @@ syscall_entry:
; Clear direction flag for string operations
cld

; Always switch to kernel GS FIRST for INT 0x80 entry
; We need kernel GS to read kernel_cr3 from per-CPU data
; INT 0x80 is only used from userspace, so we always need swapgs
swapgs

; CRITICAL: Save the process CR3 BEFORE switching to kernel CR3
; This allows us to restore it on exit if no context switch happens
; Save process CR3 to per-CPU data at gs:[80] (SAVED_PROCESS_CR3_OFFSET)
mov rax, cr3 ; Read current (process) CR3
mov qword [gs:80], rax ; Save to per-CPU saved_process_cr3

; CRITICAL: Switch CR3 back to kernel page table
; Syscalls only come from userspace, so CR3 is always a process PT
; We MUST switch to kernel PT before running kernel code
; TODO: Make this dynamic by storing kernel CR3 in per-CPU data
mov rax, 0x101000 ; Kernel CR3 (hardcoded for now)
; Read kernel_cr3 from per-CPU data at gs:[72] (KERNEL_CR3_OFFSET)
mov rax, qword [gs:72] ; Read kernel CR3 from per-CPU data
test rax, rax ; Check if kernel_cr3 is set
jz .skip_cr3_switch ; If not set, skip (early boot fallback)
mov cr3, rax ; Switch to kernel page table

; Always switch to kernel GS for INT 0x80 entry
; INT 0x80 is only used from userspace, so we always need swapgs
swapgs
.skip_cr3_switch:

; Call the Rust syscall handler
; Pass pointer to saved registers as argument
Expand Down Expand Up @@ -196,7 +206,32 @@ syscall_entry:
jmp .after_cr3_check_syscall

.no_cr3_switch_syscall_back_to_user:
; No CR3 switch needed, swap back to user GS
; No context switch, but we still need to restore the ORIGINAL process CR3!
; We saved it on entry at gs:[80] (SAVED_PROCESS_CR3_OFFSET)
mov rax, qword [gs:80] ; Read saved process CR3
test rax, rax ; Check if it was saved (non-zero)
jz .no_saved_cr3_syscall ; If 0, skip (shouldn't happen from userspace)

; Debug: Output marker for saved CR3 restore
push rdx
mov dx, 0x3F8
push rax
mov al, '!' ; '!' for saved CR3 restore
out dx, al
mov al, 'S'
out dx, al
mov al, 'Y'
out dx, al
mov al, 'S'
out dx, al
pop rax
pop rdx

; Switch back to original process CR3
mov cr3, rax

.no_saved_cr3_syscall:
; Swap back to user GS for IRETQ
swapgs

.after_cr3_check_syscall:
Expand Down Expand Up @@ -317,9 +352,19 @@ syscall_return_to_userspace:
test rax, rax
jz .no_cr3_switch_first_entry_back_to_user

; Interrupts already disabled (CLI at function start line 211)
; Interrupts already disabled (CLI at function start line 260)
; Safe to switch CR3 now

; CRITICAL FIX: Clear next_cr3 BEFORE switching CR3!
; We must do this while kernel page tables are still active,
; because after CR3 switch the process page tables may not
; have the kernel per-CPU region mapped. Accessing [gs:64]
; after CR3 switch would cause a page fault -> triple fault.
push rdx
xor rdx, rdx
mov qword [gs:64], rdx
pop rdx

; Debug: Output marker for CR3 switch
mov dx, 0x3F8
push rax
Expand All @@ -337,20 +382,46 @@ syscall_return_to_userspace:
out dx, al
pop rax

; Switch CR3 to process page table
; NOW safe to switch CR3 to process page table
; Kernel per-CPU data already cleared while kernel PT was active
mov cr3, rax

; Clear next_cr3 flag (set to 0)
xor rdx, rdx
mov qword [gs:64], rdx

; Swap back to user GS for IRETQ
swapgs

jmp .after_cr3_check_first_entry

.no_cr3_switch_first_entry_back_to_user:
; No CR3 switch needed, swap back to user GS
; No context switch, but we still need to restore the ORIGINAL process CR3!
; We saved it on entry at gs:[80] (SAVED_PROCESS_CR3_OFFSET)
mov rax, qword [gs:80] ; Read saved process CR3
test rax, rax ; Check if it was saved (non-zero)
jz .no_saved_cr3_first_entry ; If 0, skip (shouldn't happen from userspace)

; Debug: Output marker for saved CR3 restore
push rdx
mov dx, 0x3F8
push rax
mov al, '!' ; '!' for saved CR3 restore
out dx, al
mov al, 'F'
out dx, al
mov al, 'I'
out dx, al
mov al, 'R'
out dx, al
mov al, 'S'
out dx, al
mov al, 'T'
out dx, al
pop rax
pop rdx

; Switch back to original process CR3
mov cr3, rax

.no_saved_cr3_first_entry:
; Swap back to user GS for IRETQ
swapgs

.after_cr3_check_first_entry:
Expand Down
31 changes: 16 additions & 15 deletions kernel/src/syscall/handler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,23 @@ use super::{SyscallNumber, SyscallResult};
pub struct SyscallFrame {
// General purpose registers (in memory order after all pushes)
// Stack grows down, so last pushed is at lowest address (where RSP points)
// Assembly pushes in reverse order: r15 first, rax last
pub rax: u64, // Syscall number - pushed last, so at RSP+0
pub rcx: u64, // at RSP+8
pub rdx: u64, // at RSP+16
pub rbx: u64, // at RSP+24
pub rbp: u64, // at RSP+32
pub rsi: u64, // at RSP+40
pub rdi: u64, // at RSP+48
// Assembly pushes: rax first, then rcx, rdx, rbx, rbp, rsi, rdi, r8-r15
// So r15 (pushed last) is at RSP+0, and rax (pushed first) is at RSP+112
pub r15: u64, // pushed last, at RSP+0
pub r14: u64, // at RSP+8
pub r13: u64, // at RSP+16
pub r12: u64, // at RSP+24
pub r11: u64, // at RSP+32
pub r10: u64, // at RSP+40
pub r9: u64, // at RSP+48
pub r8: u64, // at RSP+56
pub r9: u64, // at RSP+64
pub r10: u64, // at RSP+72
pub r11: u64, // at RSP+80
pub r12: u64, // at RSP+88
pub r13: u64, // at RSP+96
pub r14: u64, // at RSP+104
pub r15: u64, // pushed first, so at RSP+112
pub rdi: u64, // at RSP+64
pub rsi: u64, // at RSP+72
pub rbp: u64, // at RSP+80
pub rbx: u64, // at RSP+88
pub rdx: u64, // at RSP+96
pub rcx: u64, // at RSP+104
pub rax: u64, // Syscall number - pushed first, at RSP+112

// Interrupt frame (pushed by CPU before our code)
pub rip: u64,
Expand Down
Loading