diff --git a/Bignum/Arm/Machine.lean b/Bignum/Arm/Machine.lean index 9ef093f..7e95c32 100644 --- a/Bignum/Arm/Machine.lean +++ b/Bignum/Arm/Machine.lean @@ -8,3 +8,4 @@ module public import Bignum.Arm.Machine.State public import Bignum.Arm.Machine.Instruction public import Bignum.Arm.Machine.Decode +public import Bignum.Arm.Machine.Loader diff --git a/Bignum/Arm/Machine/Decode.lean b/Bignum/Arm/Machine/Decode.lean index f6c7539..f607766 100644 --- a/Bignum/Arm/Machine/Decode.lean +++ b/Bignum/Arm/Machine/Decode.lean @@ -160,6 +160,44 @@ def decode (w : UInt32) : Option Instruction := some (Instruction.ADD rd rn rm) else none -- Unsupported: 32-bit, or with flags/shift + -- MOVZ (Move wide with zero, 64-bit) + -- Pattern: [sf=1; opc=10; 100101; hw:2; imm16:16; Rd:5] + -- Bits 28-23 = 0b100101 identifies Move wide immediate family + -- opc=10 (bits 30-29) selects MOVZ variant + else if extractBits w 28 23 == 0b100101 then + let sf := getBit w 31 -- 64-bit if 1 + let opc := extractBits w 30 29 -- 10 = MOVZ + let hw := extractBits w 22 21 -- Shift: 00=0, 01=16, 10=32, 11=48 + let imm16 := extractBits w 20 5 -- 16-bit immediate + let Rd := extractBits w 4 0 -- Destination register + if sf && opc == 0b10 then + let rd := decodeReg Rd + let shift := hw.toNat * 16 + some (Instruction.MOVZ rd imm16.toNat shift) + else + none + -- MUL (multiply, encoded as MADD with Ra=XZR) + -- Pattern: [sf=1; op54=00; 11011; op31=000; Rm:5; o0=0; Ra=11111; Rn:5; Rd:5] + -- Bits 28-24 = 0b11011 identifies Data Processing 3 source + -- MADD: sf=1, op54=00, op31=000, o0=0 + -- MUL is MADD with Ra = 11111 (XZR) + else if extractBits w 28 24 == 0b11011 then + let sf := getBit w 31 + let op54 := extractBits w 30 29 + let op31 := extractBits w 23 21 + let Rm := extractBits w 20 16 + let o0 := getBit w 15 + let Ra := extractBits w 14 10 + let Rn := extractBits w 9 5 + let Rd := extractBits w 4 0 + -- Check: 64-bit, MADD (op54=00, op31=000, o0=0), Ra=11111 (MUL alias) + if sf && op54 == 0 && op31 == 0 && !o0 && Ra == 0b11111 then + let rd := decodeReg Rd + let rn := decodeReg Rn + let rm := decodeReg Rm + some (Instruction.MUL rd rn rm) + else + none else -- Other instruction families not yet implemented none @@ -223,67 +261,13 @@ def simple_program_bytes : List UInt8 := 0x42, 0x00, 0x01, 0xcb] -- SUB X2, X2, X1 def simple_program (pc : Nat) : Program := - Program.fromBytes (Word64.ofNat pc) simple_program_bytes + Program.fromBytes (BitVec.ofNat 64 pc) simple_program_bytes ``` -/ def Program.fromBytes (base_addr : Word64) (bytes : List UInt8) : Program := { base_addr := base_addr instructions := decodeBytes bytes } -/-! -## Tests and Validation - -These tests verify the decoder against known instruction encodings from the -Simple.lean tutorial. --/ - --- Test 1: Decode ADD X2, X1, X0 --- Binary: 10001011 00000000 00000000 00100010 = 0x8b000022 --- Expected: ADD X2, X1, X0 --- #eval decode 0x8b000022 --- Note: Commented out due to missing native implementation for Instruction repr - --- Test 2: Decode SUB X2, X2, X1 --- Binary: 11001011 00000001 00000000 01000010 = 0xcb010042 --- Expected: SUB X2, X2, X1 --- #eval decode 0xcb010042 - --- Test 3: Invalid encoding (all zeros) --- Expected: none --- #eval decode 0x00000000 - --- Test 4: Invalid encoding (all ones) --- Expected: none --- #eval decode 0xffffffff - --- Test 5: Endianness verification --- Bytes [0x22, 0x00, 0x00, 0x8b] should form 0x8b000022 -#eval ((0x22 : UInt32) ||| ((0x00 : UInt32) <<< 8) ||| - ((0x00 : UInt32) <<< 16) ||| ((0x8b : UInt32) <<< 24)) --- Expected: 0x8b000022 = 2332033058 - --- Test 6: Byte list decoding (Simple.lean program) --- Expected: Two instructions (ADD and SUB) --- #eval decodeBytes [0x22, 0x00, 0x00, 0x8b, 0x42, 0x00, 0x01, 0xcb] - --- Test 7: Program creation from bytes --- #eval Program.fromBytes (Word64.ofNat 0) --- [0x22, 0x00, 0x00, 0x8b, --- 0x42, 0x00, 0x01, 0xcb] - --- Test 8: Bit extraction helpers -#eval extractBits 0x8b000022 31 31 -- sf bit = 1 -#eval extractBits 0x8b000022 30 30 -- op bit = 0 -#eval extractBits 0x8b000022 29 29 -- S bit = 0 -#eval extractBits 0x8b000022 28 21 -- opcode = 0b01011000 = 88 -#eval extractBits 0x8b000022 4 0 -- Rd = 2 - --- Test 9: Register decoding --- #eval decodeReg 0 -- X0 --- #eval decodeReg 1 -- X1 --- #eval decodeReg 2 -- X2 --- #eval decodeReg 31 -- SP - /-! ## Memory-Based Decoding @@ -346,7 +330,17 @@ theorem arm_decode_of_bytes_loaded (s : ArmState) (pc : Word64) (bytes : List UI (h2 : bytes[2]'(by omega) = b2) (h3 : bytes[3]'(by omega) = b3) : read4Bytes s pc = some (b0, b1, b2, b3) := by - sorry + have hb0 := h_loaded 0 (by omega) + have hb1 := h_loaded 1 (by omega) + have hb2 := h_loaded 2 (by omega) + have hb3 := h_loaded 3 (by omega) + rw [show pc + BitVec.ofNat 64 0 = pc from by bv_omega] at hb0 + rw [show pc + BitVec.ofNat 64 1 = pc + 1 from by bv_omega] at hb1 + rw [show pc + BitVec.ofNat 64 2 = pc + 2 from by bv_omega] at hb2 + rw [show pc + BitVec.ofNat 64 3 = pc + 3 from by bv_omega] at hb3 + subst h0 h1 h2 h3 + simp only [read4Bytes, hb0, hb1, hb2, hb3] + /-! ## Helper Lemmas for Proofs diff --git a/Bignum/Arm/Machine/Instruction.lean b/Bignum/Arm/Machine/Instruction.lean index f8218d1..9587f49 100644 --- a/Bignum/Arm/Machine/Instruction.lean +++ b/Bignum/Arm/Machine/Instruction.lean @@ -48,7 +48,7 @@ inductive Instruction | MOVZ : Reg → ℕ → ℕ → Instruction -- MOVZ Rd, #imm, LSL #pos | MUL : Reg → Reg → Reg → Instruction -- MUL Rd, Rn, Rm (Rd := Rn * Rm) | RET : Instruction - deriving Repr + deriving Repr, DecidableEq /-- Pretty-print an instruction in ARM assembly syntax. @@ -65,8 +65,8 @@ def Instruction.toString : Instruction → String | EOR rd rn rm => s!"EOR {rd}, {rn}, {rm}" | LSL rd rn imm => s!"LSL {rd}, {rn}, #{imm}" | LSR rd rn imm => s!"LSR {rd}, {rn}, #{imm}" - | LDR rd addr => s!"LDR {rd}, [{addr.val}]" - | STR rd addr => s!"STR {rd}, [{addr.val}]" + | LDR rd addr => s!"LDR {rd}, [{addr.toNat}]" + | STR rd addr => s!"STR {rd}, [{addr.toNat}]" | MOV rd rn => s!"MOV {rd}, {rn}" | MOVZ rd imm pos => s!"MOVZ {rd}, #{imm}, LSL #{pos}" | MUL rd rn rm => s!"MUL {rd}, {rn}, {rm}" @@ -88,7 +88,7 @@ def Program.split (p : Program) (i : Nat) : Program × Program := let second_instrs := p.instructions.drop i let first_prog := { base_addr := p.base_addr, instructions := first_instrs } let second_prog := { - base_addr := p.base_addr + Word64.ofNat (i * 4), + base_addr := p.base_addr + BitVec.ofNat 64 (i * 4), instructions := second_instrs } (first_prog, second_prog) @@ -109,11 +109,29 @@ Source: s2n-bignum/arm/proofs/arm.ml (ARM semantics) open Bignum +/-- +Advance the program counter by 4 bytes (one instruction width). + +This is used by all non-branch instructions to move to the next instruction. +Branch instructions (e.g., RET, B, BL) set PC directly and do not use this helper. + +Note: The PC advance reads the *original* PC from `s` (the state before the +instruction's effect), ensuring correct semantics even when the instruction +writes to PC as a side effect. +-/ +@[reducible] +def advance_pc (s : ArmState) (s' : ArmState) : ArmState := + s'.write_reg Reg.PC (s.read_reg Reg.PC + 4) + /-- Execute a single ARM instruction, transforming the state. This corresponds to the symbolic execution performed by HOL Light's ARM_STEPS_TAC in the proof. + +For non-branch instructions, `advance_pc` is applied to advance the program +counter by 4 bytes after computing the instruction's effect. Branch +instructions (e.g., RET) set PC directly. -/ def step (instr : Instruction) (s : ArmState) : ArmState := match instr with @@ -122,130 +140,117 @@ def step (instr : Instruction) (s : ArmState) : ArmState := let val_n := s.read_reg rn let val_m := s.read_reg rm let result := val_n + val_m -- BitVec addition (wraps at 2^64) - s.write_reg rd result |>.write_reg Reg.PC (s.read_reg Reg.PC + 4) + advance_pc s (s.write_reg rd result) | Instruction.SUB rd rn rm => -- Xd := Xn - Xm (word subtraction, no flags) let val_n := s.read_reg rn let val_m := s.read_reg rm let result := val_n - val_m -- BitVec subtraction (wraps at 2^64) - s.write_reg rd result |>.write_reg Reg.PC (s.read_reg Reg.PC + 4) + advance_pc s (s.write_reg rd result) | Instruction.ADDS rd rn rm => -- Xd := Xn + Xm, set flags let val_n := s.read_reg rn let val_m := s.read_reg rm let sum_nat := val_n.toNat + val_m.toNat - let result := Word64.ofNat sum_nat + let result := BitVec.ofNat 64 sum_nat let carry := sum_nat >= 2^64 let flags := { s.flags with C := carry Z := result.toNat = 0 N := result.toNat >= 2^63 } - s.write_reg rd result - |>.write_flags flags |>.write_reg Reg.PC (s.read_reg Reg.PC + 4) + advance_pc s (s.write_reg rd result |>.write_flags flags) | Instruction.SUBS rd rn rm => -- Xd := Xn - Xm, set flags let val_n := s.read_reg rn let val_m := s.read_reg rm let diff_nat := val_n.toNat - val_m.toNat - let result := Word64.ofNat diff_nat + let result := BitVec.ofNat 64 diff_nat let borrow := val_n.toNat < val_m.toNat let flags := { s.flags with C := !borrow -- Carry flag is inverted for subtraction Z := result.toNat = 0 N := result.toNat >= 2^63 } - s.write_reg rd result - |>.write_flags flags |>.write_reg Reg.PC (s.read_reg Reg.PC + 4) + advance_pc s (s.write_reg rd result |>.write_flags flags) | Instruction.ADCS rd rn rm => -- Xd := Xn + Xm + Carry, set flags let val_n := s.read_reg rn let val_m := s.read_reg rm let carry_in := if s.flags.C then 1 else 0 let sum_nat := val_n.toNat + val_m.toNat + carry_in - let result := Word64.ofNat sum_nat + let result := BitVec.ofNat 64 sum_nat let carry := sum_nat >= 2^64 let flags := { s.flags with C := carry Z := result.toNat = 0 N := result.toNat >= 2^63 } - s.write_reg rd result - |>.write_flags flags |>.write_reg Reg.PC (s.read_reg Reg.PC + 4) + advance_pc s (s.write_reg rd result |>.write_flags flags) | Instruction.SBCS rd rn rm => -- Xd := Xn - Xm - ~Carry, set flags let val_n := s.read_reg rn let val_m := s.read_reg rm let borrow_in := if s.flags.C then 0 else 1 let diff_nat := val_n.toNat - val_m.toNat - borrow_in - let result := Word64.ofNat diff_nat + let result := BitVec.ofNat 64 diff_nat let borrow := val_n.toNat < val_m.toNat + borrow_in let flags := { s.flags with C := !borrow Z := result.toNat = 0 N := result.toNat >= 2^63 } - s.write_reg rd result - |>.write_flags flags - |>.write_reg Reg.PC (s.read_reg Reg.PC + 4) + advance_pc s (s.write_reg rd result |>.write_flags flags) | Instruction.AND rd rn rm => let val_n := s.read_reg rn let val_m := s.read_reg rm let result := val_n &&& val_m - s.write_reg rd result |>.write_reg Reg.PC (s.read_reg Reg.PC + 4) + advance_pc s (s.write_reg rd result) | Instruction.ORR rd rn rm => let val_n := s.read_reg rn let val_m := s.read_reg rm let result := val_n ||| val_m - s.write_reg rd result - |>.write_reg Reg.PC (s.read_reg Reg.PC + 4) + advance_pc s (s.write_reg rd result) | Instruction.EOR rd rn rm => let val_n := s.read_reg rn let val_m := s.read_reg rm let result := val_n ^^^ val_m - s.write_reg rd result - |>.write_reg Reg.PC (s.read_reg Reg.PC + 4) + advance_pc s (s.write_reg rd result) | Instruction.LSL rd rn imm => let val_n := s.read_reg rn let result := val_n <<< imm - s.write_reg rd result - |>.write_reg Reg.PC (s.read_reg Reg.PC + 4) + advance_pc s (s.write_reg rd result) | Instruction.LSR rd rn imm => let val_n := s.read_reg rn let result := val_n >>> imm - s.write_reg rd result - |>.write_reg Reg.PC (s.read_reg Reg.PC + 4) + advance_pc s (s.write_reg rd result) | Instruction.LDR rd addr => match s.read_mem_word64 addr with | some val => - s.write_reg rd val - |>.write_reg Reg.PC (s.read_reg Reg.PC + 4) + advance_pc s (s.write_reg rd val) | none => s -- Memory fault: state unchanged | Instruction.STR rd addr => let val := s.read_reg rd - s.write_mem_word64 addr val - |>.write_reg Reg.PC (s.read_reg Reg.PC + 4) + advance_pc s (s.write_mem_word64 addr val) | Instruction.MOV rd rn => let val := s.read_reg rn - s.write_reg rd val - |>.write_reg Reg.PC (s.read_reg Reg.PC + 4) + advance_pc s (s.write_reg rd val) | Instruction.MOVZ rd imm pos => -- MOVZ: Move wide with zero -- Rd := imm * 2^pos (16-bit immediate shifted to position pos) -- pos must be 0, 16, 32, or 48 - let val := Word64.ofNat (imm * 2^pos) - s.write_reg rd val - |>.write_reg Reg.PC (s.read_reg Reg.PC + 4) + let val := BitVec.ofNat 64 (imm * 2^pos) + advance_pc s (s.write_reg rd val) | Instruction.MUL rd rn rm => -- MUL: Multiply -- Rd := Rn * Rm (low 64 bits of product) let val_n := s.read_reg rn let val_m := s.read_reg rm let result := val_n * val_m -- BitVec multiplication (wraps at 2^64) - s.write_reg rd result - |>.write_reg Reg.PC (s.read_reg Reg.PC + 4) + advance_pc s (s.write_reg rd result) | Instruction.RET => -- Return: set PC to X30 (link register) + -- Note: RET is a branch instruction and does NOT use advance_pc let return_addr := s.read_reg Reg.X30 s.write_reg Reg.PC return_addr diff --git a/Bignum/Arm/Machine/Loader.lean b/Bignum/Arm/Machine/Loader.lean new file mode 100644 index 0000000..95185c0 --- /dev/null +++ b/Bignum/Arm/Machine/Loader.lean @@ -0,0 +1,504 @@ +/- +Copyright (c) 2025 Alexandre Rademaker. All rights reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Author: Alexandre Rademaker +-/ +module + +public import Bignum.Arm.Machine.Decode + +@[expose] public section + +/-! +# Object File Loader + +Loads ARM machine code bytes directly from object files (`.o`), without +relying on `objcopy` or `llvm-objcopy`. + +## Supported Formats + +- **Mach-O 64-bit** (macOS ARM64): extracts `__TEXT,__text` +- **ELF 64-bit** (Linux AArch64): extracts `.text` + +## Design Rationale + +The byte list in a proof (e.g., `simple_mc`) is the **ground truth** of the +trusted computing base. The loader lives OUTSIDE the TCB — it provides a +convenient way to verify that the assembler produced the expected bytes. + +This corresponds to HOL Light's `define_assert_from_elf` +(s2n-bignum/common/elf.ml), but parses object files directly in Lean. + +## Usage + +```lean +-- Show bytes from an object file at elaboration time +#load_obj "path/to/program.o" + +-- Verify bytes at run time +#eval assertTextSectionFromObj "path/to/program.o" simple_mc + +-- Load a Program directly +def p : IO Program := Program.fromObj 0 "path/to/program.o" +``` +-/ + +namespace Bignum.Arm + +/-! +## Binary Parsing Helpers + +Little-endian field readers used by the Mach-O and ELF parsers. +These are in the public namespace to enable users to build additional parsers. +-/ + +def readLE16 (bytes : ByteArray) (off : Nat) : Option UInt16 := + if off + 2 ≤ bytes.size then + some (bytes[off]!.toUInt16 ||| (bytes[off + 1]!.toUInt16 <<< 8)) + else none + +def readLE32 (bytes : ByteArray) (off : Nat) : Option UInt32 := + if off + 4 ≤ bytes.size then + let b := fun i => bytes[off + i]!.toUInt32 + some (b 0 ||| (b 1 <<< 8) ||| (b 2 <<< 16) ||| (b 3 <<< 24)) + else none + +def readLE64 (bytes : ByteArray) (off : Nat) : Option UInt64 := + if off + 8 ≤ bytes.size then + let b := fun i => bytes[off + i]!.toUInt64 + some (b 0 ||| (b 1 <<< 8) ||| (b 2 <<< 16) ||| (b 3 <<< 24) ||| + (b 4 <<< 32) ||| (b 5 <<< 40) ||| (b 6 <<< 48) ||| (b 7 <<< 56)) + else none + +/-- Read a null-terminated string from a fixed-width field (e.g., 16-byte sectname). -/ +def readFixedStr (bytes : ByteArray) (off len : Nat) : String := + let raw := (List.range len).map fun i => + if off + i < bytes.size then bytes[off + i]! else 0 + String.ofList (raw.takeWhile (· != 0) |>.map (Char.ofNat ·.toNat)) + +/-- Extract a contiguous byte slice from a ByteArray. -/ +def sliceBytes (bytes : ByteArray) (off size : Nat) : List UInt8 := + (List.range size).map fun i => + if off + i < bytes.size then bytes[off + i]! else 0 + +/-! +## Result Type +-/ + +/-- Result of extracting the text section from an object file. -/ +inductive TextSectionResult + | ok (bytes : List UInt8) : TextSectionResult + | unknownFormat : TextSectionResult + | sectionNotFound : TextSectionResult + | parseError (msg : String) : TextSectionResult + deriving Repr + +instance : ToString TextSectionResult where + toString + | .ok bytes => s!"OK: {bytes.length} bytes" + | .unknownFormat => "Unknown format (expected Mach-O 64 or ELF 64)" + | .sectionNotFound => "Text section not found" + | .parseError msg => s!"Parse error: {msg}" + +/-! +## Mach-O 64-bit Parser + +Mach-O (macOS ARM64) binary layout: +``` +Offset Size Field +0 4 magic = 0xFEEDFACF (LE 64-bit Mach-O) +16 4 ncmds +32 ... load commands +``` + +`LC_SEGMENT_64` (cmd = 0x19) — 72-byte header + N × 80-byte `section_64`: +``` +segment_command_64 offsets: + +8 segname (16) +64 nsects (4) + +section_64 offsets (each section, 80 bytes): + +0 sectname (16) +40 size (8) ← byte count + +48 offset (4) ← file offset of section data +``` +-/ +def parseMachO64 (bytes : ByteArray) : Option (List UInt8) := + if readLE32 bytes 0 != some 0xFEEDFACF then none + else + let ncmds := (readLE32 bytes 16).getD 0 + -- Walk load commands; carry (current_offset, found_result) as state. + -- Note: in MH_OBJECT files the LC_SEGMENT_64 segname is "" (empty), not "__TEXT". + -- We therefore check only sectname == "__text" inside every LC_SEGMENT_64. + let (_, found) := (List.range ncmds.toNat).foldl (fun (lc_off, acc) _ => + match acc with + | some _ => (lc_off, acc) -- section found, skip remaining commands + | none => + match readLE32 bytes lc_off, readLE32 bytes (lc_off + 4) with + | some cmd, some cmdsize => + let result := + if cmd == 0x19 then -- LC_SEGMENT_64 (any segment) + let nsects := (readLE32 bytes (lc_off + 64)).getD 0 + -- Search sections for __text (each section_64 is 80 bytes) + (List.range nsects.toNat).foldl (fun r i => + match r with + | some _ => r + | none => + let soff := lc_off + 72 + i * 80 + if readFixedStr bytes soff 16 == "__text" then + match readLE64 bytes (soff + 40), readLE32 bytes (soff + 48) with + | some sz, some fo => some (sliceBytes bytes fo.toNat sz.toNat) + | _, _ => none + else none) none + else none + (lc_off + cmdsize.toNat, result) + | _, _ => (lc_off, none)) (32, none) + found + +/-! +## ELF 64-bit Parser + +ELF 64-bit little-endian layout: +``` +Offset Size Field +0 4 magic = 0x7f 'E' 'L' 'F' +4 1 class = 2 (64-bit) +5 1 data = 1 (LE) +40 8 e_shoff — section header table file offset +60 2 e_shnum — number of section headers +62 2 e_shstrndx — string table section index +``` + +Each section header (64 bytes): +``` ++0 sh_name (4) — index into string table ++24 sh_offset (8) ← file offset of section data ++32 sh_size (8) ← byte count of section data +``` +-/ +def parseELF64 (bytes : ByteArray) : Option (List UInt8) := + if bytes.size < 64 + || bytes[0]! != 0x7f || bytes[1]! != 0x45 -- 'E' + || bytes[2]! != 0x4c || bytes[3]! != 0x46 -- 'L', 'F' + || bytes[4]! != 2 || bytes[5]! != 1 then -- 64-bit, LE + none + else do + let shoff ← readLE64 bytes 40 + let shnum ← readLE16 bytes 60 + let shstrndx ← readLE16 bytes 62 + -- String table: section header at index shstrndx + let strsh := shoff.toNat + shstrndx.toNat * 64 + let strtab ← readLE64 bytes (strsh + 24) -- sh_offset of strtab section + let strsz ← readLE64 bytes (strsh + 32) -- sh_size of strtab section + -- Scan section headers for ".text" + (List.range shnum.toNat).foldl (fun acc i => + match acc with + | some _ => acc + | none => + let sh := shoff.toNat + i * 64 + let ni := (readLE32 bytes sh).getD 0 + if readFixedStr bytes (strtab.toNat + ni.toNat) strsz.toNat == ".text" then + match readLE64 bytes (sh + 24), readLE64 bytes (sh + 32) with + | some fo, some sz => some (sliceBytes bytes fo.toNat sz.toNat) + | _, _ => none + else none) none + +/-! +## Public Extraction API +-/ + +/-- +Extract the text section from a Mach-O or ELF 64-bit object file. +Automatically detects the format. +-/ +def extractTextSection (bytes : ByteArray) : TextSectionResult := + if readLE32 bytes 0 == some 0xFEEDFACF then + match parseMachO64 bytes with + | some bs => .ok bs + | none => .sectionNotFound + else if bytes.size ≥ 4 + && bytes[0]! == 0x7f && bytes[1]! == 0x45 + && bytes[2]! == 0x4c && bytes[3]! == 0x46 then + match parseELF64 bytes with + | some bs => .ok bs + | none => .sectionNotFound + else + .unknownFormat + +/-- Read an object file and extract its text section. -/ +def readTextSectionFromFile (path : System.FilePath) : IO TextSectionResult := + return extractTextSection (← IO.FS.readBinFile path) + +/-! +## Byte Comparison +-/ + +def toHexStr (n : Nat) : String := + let digits := Nat.toDigits 16 n + String.ofList (if digits.length < 2 then '0' :: digits else digits) + +/-- Result of comparing two byte lists. -/ +inductive ByteCompareResult + | ok : ByteCompareResult + | lengthMismatch (expected actual : Nat) : ByteCompareResult + | byteMismatch (index : Nat) (expected actual : UInt8) : ByteCompareResult + deriving Repr + +def ByteCompareResult.toString : ByteCompareResult → String + | .ok => "OK: all bytes match" + | .lengthMismatch exp act => + s!"Length mismatch: expected {exp} bytes, got {act} bytes" + | .byteMismatch idx exp act => + s!"Byte mismatch at offset {idx}: expected 0x{toHexStr exp.toNat}, got 0x{toHexStr act.toNat}" + +instance : ToString ByteCompareResult := ⟨ByteCompareResult.toString⟩ + +/-- Compare two byte lists element by element. -/ +def compareBytes (expected actual : List UInt8) : ByteCompareResult := + if expected.length != actual.length then + .lengthMismatch expected.length actual.length + else + let rec go (i : Nat) : List UInt8 → List UInt8 → ByteCompareResult + | [], [] => .ok + | e :: es, a :: as_ => + if e == a then go (i + 1) es as_ else .byteMismatch i e a + | _, _ => .ok -- unreachable + go 0 expected actual + +/-- +Verify that the text section of an object file matches expected bytes. + +This is the Lean equivalent of `define_assert_from_elf`: reads the `.o`, +extracts the text section, and asserts it matches the proof-level byte list. +-/ +def assertTextSectionFromObj (path : System.FilePath) (expected : List UInt8) : IO Bool := do + match ← readTextSectionFromFile path with + | .ok actual => + match compareBytes expected actual with + | .ok => + IO.println s!"[Loader] {path}: OK ({expected.length} bytes verified)" + return true + | result => + IO.println s!"[Loader] {path}: FAIL - {result}" + return false + | e => + IO.println s!"[Loader] {path}: FAIL - {e}" + return false + +/-- Read a raw binary file (pre-extracted .text section). -/ +def readBinaryFile (path : System.FilePath) : IO (List UInt8) := + return (← IO.FS.readBinFile path).toList + +/-- +Verify that a raw binary file matches expected bytes. +For workflows that use `objcopy -j .text -O binary` to pre-extract sections. +-/ +def assertBytesFromFile (path : System.FilePath) (expected : List UInt8) : IO Bool := do + let actual ← readBinaryFile path + match compareBytes expected actual with + | .ok => + IO.println s!"[Loader] {path}: OK ({expected.length} bytes verified)" + return true + | result => + IO.println s!"[Loader] {path}: FAIL - {result}" + return false + +/-! +## Program Construction +-/ + +/-- Load a Program directly from an object file (Mach-O or ELF). -/ +def Program.fromObj (base_addr : Word64) (path : System.FilePath) : IO Program := do + match ← readTextSectionFromFile path with + | .ok bytes => return Program.fromBytes base_addr bytes + | e => throw (IO.userError s!"Failed to load {path}: {e}") + +/-- Load a Program from an object file and verify against expected bytes. -/ +def Program.fromObjVerified (base_addr : Word64) (path : System.FilePath) + (expected : List UInt8) : IO Program := do + match ← readTextSectionFromFile path with + | .ok actual => + match compareBytes expected actual with + | .ok => return Program.fromBytes base_addr expected + | result => throw (IO.userError s!"Byte mismatch in {path}: {result}") + | e => throw (IO.userError s!"Failed to load {path}: {e}") + +/-- Load a Program from a raw binary file (pre-extracted .text section). -/ +def Program.fromFile (base_addr : Word64) (path : System.FilePath) : IO Program := + return Program.fromBytes base_addr (← readBinaryFile path) + +/-! +## Formatting Utilities +-/ + +/-- Format a byte list as a Lean definition string. -/ +def formatBytesAsLeanDef (name : String) (bytes : List UInt8) : String := + let hexBytes := bytes.map fun b => s!"0x{toHexStr b.toNat}" + let rec group4 : List String → List (List String) + | a :: b :: c :: d :: rest => [a, b, c, d] :: group4 rest + | [] => [] + | remaining => [remaining] + let groups := group4 hexBytes + let firstLine := ", ".intercalate (groups.headD []) + let restLines := groups.tail.map fun g => s!" {", ".intercalate g}" + s!"def {name} : List UInt8 :=\n [{",\n".intercalate (firstLine :: restLines)}]" + +/-- Format a byte list as a hex dump (8 bytes per line). -/ +def hexDump (bytes : List UInt8) : String := + let chunks := chunkBy8 bytes + let lines := chunks.zipIdx.map fun (chunk, i) => + let offset := i * 8 + let hexPart := " ".intercalate (chunk.map fun b => toHexStr b.toNat) + let asciiPart := String.ofList (chunk.map fun b => + let n := b.toNat + if 0x20 ≤ n && n < 0x7f then Char.ofNat n else '.') + let offsetStr := String.ofList + (List.replicate (8 - (Nat.toDigits 16 offset).length) '0' ++ Nat.toDigits 16 offset) + s!"{offsetStr}: {hexPart} {asciiPart}" + "\n".intercalate lines +where + chunkBy8 : List UInt8 → List (List UInt8) + | a :: b :: c :: d :: e :: f :: g :: h :: rest => + [a, b, c, d, e, f, g, h] :: chunkBy8 rest + | [] => [] + | remaining => [remaining] + +/-! +## Elaboration-Time Commands + +`elab` commands run in the Lean meta interpreter and cannot call `@[expose]` +(native) functions directly. Parsing is reimplemented inline using local lambdas. +-/ + +open Lean Elab Command in +/-- +`#load_obj` — extract the `.text` section from an object file at elaboration time. + +```lean +#load_obj "path/to/program.o" +``` + +Supports Mach-O (macOS `.o`) and ELF (Linux `.o`) object files. +Output is a Lean list literal suitable for use in a `def my_mc : List UInt8 := [...]`. +-/ +elab "#load_obj " path:str : command => do + let pathStr := path.getString + let bytes ← IO.FS.readBinFile pathStr + -- All helpers defined as local lambdas (pure, meta-compatible, capture bytes) + let readLE32 : Nat → Option UInt32 := fun off => + if off + 4 ≤ bytes.size then + let b := fun i => bytes[off + i]!.toUInt32 + some (b 0 ||| (b 1 <<< 8) ||| (b 2 <<< 16) ||| (b 3 <<< 24)) + else none + let readLE64 : Nat → Option UInt64 := fun off => + if off + 8 ≤ bytes.size then + let b := fun i => bytes[off + i]!.toUInt64 + some (b 0 ||| (b 1 <<< 8) ||| (b 2 <<< 16) ||| (b 3 <<< 24) ||| + (b 4 <<< 32) ||| (b 5 <<< 40) ||| (b 6 <<< 48) ||| (b 7 <<< 56)) + else none + let readLE16 : Nat → Option UInt16 := fun off => + if off + 2 ≤ bytes.size then + some (bytes[off]!.toUInt16 ||| (bytes[off + 1]!.toUInt16 <<< 8)) + else none + let readStr : Nat → Nat → String := fun off len => + let raw := (List.range len).map fun i => + if off + i < bytes.size then bytes[off + i]! else 0 + String.ofList (raw.takeWhile (· != 0) |>.map (Char.ofNat ·.toNat)) + let slice : Nat → Nat → List UInt8 := fun off size => + (List.range size).map fun i => + if off + i < bytes.size then bytes[off + i]! else 0 + let result : Option (List UInt8) := + if readLE32 0 == some 0xFEEDFACF then + -- Mach-O 64-bit + let ncmds := (readLE32 16).getD 0 + let (_, found) := (List.range ncmds.toNat).foldl (fun (lc_off, acc) _ => + match acc with + | some _ => (lc_off, acc) + | none => + match readLE32 lc_off, readLE32 (lc_off + 4) with + | some cmd, some cmdsize => + let r := + if cmd == 0x19 then -- LC_SEGMENT_64 (segname may be "" in MH_OBJECT) + let nsects := (readLE32 (lc_off + 64)).getD 0 + (List.range nsects.toNat).foldl (fun r i => + match r with + | some _ => r + | none => + let soff := lc_off + 72 + i * 80 + if readStr soff 16 == "__text" then + match readLE64 (soff + 40), readLE32 (soff + 48) with + | some sz, some fo => some (slice fo.toNat sz.toNat) + | _, _ => none + else none) none + else none + (lc_off + cmdsize.toNat, r) + | _, _ => (lc_off, none)) (32, none) + found + else if bytes.size ≥ 4 + && bytes[0]! == 0x7f && bytes[1]! == 0x45 + && bytes[2]! == 0x4c && bytes[3]! == 0x46 + && bytes[4]! == 2 && bytes[5]! == 1 then + -- ELF 64-bit + (do + let shoff ← readLE64 40 + let shnum ← readLE16 60 + let shstrndx ← readLE16 62 + let strsh := shoff.toNat + shstrndx.toNat * 64 + let strtab ← readLE64 (strsh + 24) + let strsz ← readLE64 (strsh + 32) + (List.range shnum.toNat).foldl (fun acc i => + match acc with + | some _ => acc + | none => + let sh := shoff.toNat + i * 64 + let ni := (readLE32 sh).getD 0 + if readStr (strtab.toNat + ni.toNat) strsz.toNat == ".text" then + match readLE64 (sh + 24), readLE64 (sh + 32) with + | some fo, some sz => some (slice fo.toNat sz.toNat) + | _, _ => none + else none) none) + else none + let toHex : Nat → String := fun n => + let d := Nat.toDigits 16 n + String.ofList (if d.length < 2 then '0' :: d else d) + match result with + | none => logError m!"Could not extract text section from: {pathStr}" + | some byteList => + let hexStrs := byteList.map fun b => s!"0x{toHex b.toNat}" + let rec group4 : List String → List (List String) + | a :: b :: c :: d :: rest => [a, b, c, d] :: group4 rest + | [] => [] + | remaining => [remaining] + let groups := group4 hexStrs + let firstLine := ", ".intercalate (groups.headD []) + let restLines := groups.tail.map fun g => s!" {", ".intercalate g}," + let body := "\n".intercalate restLines + logInfo m!"-- {byteList.length} bytes from {pathStr}\n[\n {firstLine},\n{body}\n]" + +open Lean Elab Command in +/-- +`#load_bytes` — display bytes from a raw binary file at elaboration time. + +```lean +#load_bytes "path/to/program.bin" +``` + +For raw `.text` bytes pre-extracted via `objcopy -j .text -O binary`. +Prefer `#load_obj` for object files. +-/ +elab "#load_bytes " path:str : command => do + let pathStr := path.getString + let byteList := (← IO.FS.readBinFile pathStr).toList + let toHex : Nat → String := fun n => + let d := Nat.toDigits 16 n + String.ofList (if d.length < 2 then '0' :: d else d) + let hexStrs := byteList.map fun b => s!"0x{toHex b.toNat}" + let rec group4 : List String → List (List String) + | a :: b :: c :: d :: rest => [a, b, c, d] :: group4 rest + | [] => [] + | remaining => [remaining] + let groups := group4 hexStrs + let firstLine := ", ".intercalate (groups.headD []) + let restLines := groups.tail.map fun g => s!" {", ".intercalate g}," + let body := "\n".intercalate restLines + logInfo m!"-- {byteList.length} bytes from {pathStr}\n[\n {firstLine},\n{body}\n]" + +end Bignum.Arm diff --git a/Bignum/Arm/Tutorial/Sequence.lean b/Bignum/Arm/Tutorial/Sequence.lean index 6e11eef..ea0ea92 100644 --- a/Bignum/Arm/Tutorial/Sequence.lean +++ b/Bignum/Arm/Tutorial/Sequence.lean @@ -88,11 +88,11 @@ ensures arm Precondition: program loaded, PC at start, registers initialized. -/ def sequence_pre (pc a b c : ℕ) (s : ArmState) : Prop := - aligned_bytes_loaded s.mem (Word64.ofNat pc) sequence_mc ∧ - s.read_reg Reg.PC = Word64.ofNat pc ∧ - s.read_reg Reg.X0 = Word64.ofNat a ∧ - s.read_reg Reg.X1 = Word64.ofNat b ∧ - s.read_reg Reg.X2 = Word64.ofNat c + aligned_bytes_loaded s.mem (BitVec.ofNat 64 pc) sequence_mc ∧ + s.read_reg Reg.PC = BitVec.ofNat 64 pc ∧ + s.read_reg Reg.X0 = BitVec.ofNat 64 a ∧ + s.read_reg Reg.X1 = BitVec.ofNat 64 b ∧ + s.read_reg Reg.X2 = BitVec.ofNat 64 c /-- Intermediate assertion at pc+8: X1 = a + b. @@ -103,16 +103,16 @@ This is the `\s. read X1 s = word (a + b)` from HOL Light's Source: s2n-bignum/arm/tutorial/sequence.ml:77 -/ def sequence_mid (pc a b : ℕ) (s : ArmState) : Prop := - aligned_bytes_loaded s.mem (Word64.ofNat pc) sequence_mc ∧ - s.read_reg Reg.PC = Word64.ofNat (pc + 8) ∧ - s.read_reg Reg.X1 = Word64.ofNat (a + b) + aligned_bytes_loaded s.mem (BitVec.ofNat 64 pc) sequence_mc ∧ + s.read_reg Reg.PC = BitVec.ofNat 64 (pc + 8) ∧ + s.read_reg Reg.X1 = BitVec.ofNat 64 (a + b) /-- Postcondition: PC at pc+16, X1 = (a+b)*2. -/ def sequence_post (pc a b : ℕ) (s : ArmState) : Prop := - s.read_reg Reg.PC = Word64.ofNat (pc + 16) ∧ - s.read_reg Reg.X1 = Word64.ofNat ((a + b) * 2) + s.read_reg Reg.PC = BitVec.ofNat 64 (pc + 16) ∧ + s.read_reg Reg.X1 = BitVec.ofNat 64 ((a + b) * 2) /-! ## Symbolic Execution Helpers @@ -124,25 +124,111 @@ Decode lemmas for each instruction in the sequence program. theorem sequence_decode_instr1 (s : ArmState) (pc : Word64) (h : aligned_bytes_loaded s.mem pc sequence_mc) : arm_decode s pc = some (Instruction.ADD Reg.X1 Reg.X1 Reg.X0) := by - sorry + obtain ⟨_, h_bytes⟩ := h + have h0 : s.mem.read_byte pc = some 0x21 := by + have := h_bytes 0 (by decide : 0 < 16) + simp only [sequence_mc] at this + convert this using 2 + exact (BitVec.add_zero pc).symm + have h1 : s.mem.read_byte (pc + 1) = some 0x00 := by + have := h_bytes 1 (by decide : 1 < 16) + simp only [sequence_mc] at this + convert this using 2 + have h2 : s.mem.read_byte (pc + 2) = some 0x00 := by + have := h_bytes 2 (by decide : 2 < 16) + simp only [sequence_mc] at this + convert this using 2 + have h3 : s.mem.read_byte (pc + 3) = some 0x8b := by + have := h_bytes 3 (by decide : 3 < 16) + simp only [sequence_mc] at this + convert this using 2 + unfold arm_decode read4Bytes + simp only [h0, h1, h2, h3] + rfl /-- Decode instruction 2: ADD X2, X2, X0 -/ theorem sequence_decode_instr2 (s : ArmState) (pc : Word64) (h : aligned_bytes_loaded s.mem pc sequence_mc) : arm_decode s (pc + 4) = some (Instruction.ADD Reg.X2 Reg.X2 Reg.X0) := by - sorry + obtain ⟨_, h_bytes⟩ := h + have h4 : s.mem.read_byte (pc + 4) = some 0x42 := by + have := h_bytes 4 (by decide : 4 < 16) + simp only [sequence_mc] at this + convert this using 2 + have h5 : s.mem.read_byte (pc + 4 + 1) = some 0x00 := by + have := h_bytes 5 (by decide : 5 < 16) + simp only [sequence_mc] at this + convert this using 2 + bv_omega + have h6 : s.mem.read_byte (pc + 4 + 2) = some 0x00 := by + have := h_bytes 6 (by decide : 6 < 16) + simp only [sequence_mc] at this + convert this using 2 + bv_omega + have h7 : s.mem.read_byte (pc + 4 + 3) = some 0x8b := by + have := h_bytes 7 (by decide : 7 < 16) + simp only [sequence_mc] at this + convert this using 2 + bv_omega + unfold arm_decode read4Bytes + simp only [h4, h5, h6, h7] + rfl /-- Decode instruction 3: MOV X3, #2 (encoded as MOVZ) -/ theorem sequence_decode_instr3 (s : ArmState) (pc : Word64) (h : aligned_bytes_loaded s.mem pc sequence_mc) : arm_decode s (pc + 8) = some (Instruction.MOVZ Reg.X3 2 0) := by - sorry + obtain ⟨_, h_bytes⟩ := h + have h8 : s.mem.read_byte (pc + 8) = some 0x43 := by + have := h_bytes 8 (by decide : 8 < 16) + simp only [sequence_mc] at this + convert this using 2 + have h9 : s.mem.read_byte (pc + 8 + 1) = some 0x00 := by + have := h_bytes 9 (by decide : 9 < 16) + simp only [sequence_mc] at this + convert this using 2 + bv_omega + have h10 : s.mem.read_byte (pc + 8 + 2) = some 0x80 := by + have := h_bytes 10 (by decide : 10 < 16) + simp only [sequence_mc] at this + convert this using 2 + bv_omega + have h11 : s.mem.read_byte (pc + 8 + 3) = some 0xd2 := by + have := h_bytes 11 (by decide : 11 < 16) + simp only [sequence_mc] at this + convert this using 2 + bv_omega + unfold arm_decode read4Bytes + simp only [h8, h9, h10, h11] + rfl /-- Decode instruction 4: MUL X1, X1, X3 -/ theorem sequence_decode_instr4 (s : ArmState) (pc : Word64) (h : aligned_bytes_loaded s.mem pc sequence_mc) : arm_decode s (pc + 12) = some (Instruction.MUL Reg.X1 Reg.X1 Reg.X3) := by - sorry + obtain ⟨_, h_bytes⟩ := h + have h12 : s.mem.read_byte (pc + 12) = some 0x21 := by + have := h_bytes 12 (by decide : 12 < 16) + simp only [sequence_mc] at this + convert this using 2 + have h13 : s.mem.read_byte (pc + 12 + 1) = some 0x7c := by + have := h_bytes 13 (by decide : 13 < 16) + simp only [sequence_mc] at this + convert this using 2 + bv_omega + have h14 : s.mem.read_byte (pc + 12 + 2) = some 0x03 := by + have := h_bytes 14 (by decide : 14 < 16) + simp only [sequence_mc] at this + convert this using 2 + bv_omega + have h15 : s.mem.read_byte (pc + 12 + 3) = some 0x9b := by + have := h_bytes 15 (by decide : 15 < 16) + simp only [sequence_mc] at this + convert this using 2 + bv_omega + unfold arm_decode read4Bytes + simp only [h12, h13, h14, h15] + rfl /-! ## Compositional Verification @@ -213,7 +299,83 @@ theorem sequence_chunk1_correct (pc a b c : ℕ) : (sequence_pre pc a b c) (sequence_mid pc a b) (maychange_regs [Reg.PC, Reg.X1, Reg.X2, Reg.X3]) := by - sorry + intro s₀ h_pre + obtain ⟨h_loaded, h_pc, h_x0, h_x1, h_x2⟩ := h_pre + -- Step 1: Execute ADD X1, X1, X0 + apply eventually.ind + · use step (Instruction.ADD Reg.X1 Reg.X1 Reg.X0) s₀ + unfold arm + simp only [h_pc, sequence_decode_instr1 s₀ (BitVec.ofNat 64 pc) h_loaded] + · intro s₁ h_step1 + unfold arm at h_step1 + simp only [h_pc, sequence_decode_instr1 s₀ (BitVec.ofNat 64 pc) h_loaded] at h_step1 + -- Step 2: Execute ADD X2, X2, X0 + apply eventually.ind + · have h_pc1 : s₁.read_reg Reg.PC = BitVec.ofNat 64 pc + 4 := by + rw [h_step1] ; unfold step + simp only [ArmState.read_write_same] + rw [h_pc] + have h_loaded1 : aligned_bytes_loaded s₁.mem (BitVec.ofNat 64 pc) sequence_mc := by + rw [h_step1] ; unfold step ; simp only [ArmState.write_reg] + exact h_loaded + use step (Instruction.ADD Reg.X2 Reg.X2 Reg.X0) s₁ + unfold arm + simp only [h_pc1, sequence_decode_instr2 s₁ (BitVec.ofNat 64 pc) h_loaded1] + · intro s₂ h_step2 + apply eventually.base + -- Extract s₁ properties + have h_s1_pc : s₁.read_reg Reg.PC = BitVec.ofNat 64 pc + 4 := by + rw [h_step1] ; unfold step + simp only [ArmState.read_write_same] + rw [h_pc] + have h_s1_x1 : s₁.read_reg Reg.X1 = BitVec.ofNat 64 b + BitVec.ofNat 64 a := by + rw [h_step1] ; unfold step + simp only [ArmState.read_write_same, + ArmState.read_write_diff _ _ _ _ (by decide : Reg.PC ≠ Reg.X1)] + rw [h_x1, h_x0] + have h_loaded1 : aligned_bytes_loaded s₁.mem (BitVec.ofNat 64 pc) sequence_mc := by + rw [h_step1] ; unfold step ; simp only [ArmState.write_reg] + exact h_loaded + -- Extract s₂ from h_step2 + unfold arm at h_step2 + simp only [h_s1_pc, sequence_decode_instr2 s₁ (BitVec.ofNat 64 pc) h_loaded1] at h_step2 + constructor + · -- Postcondition: sequence_mid + unfold sequence_mid + constructor + · -- Memory still loaded + rw [h_step2] ; unfold step ; simp only [ArmState.write_reg] + exact h_loaded1 + constructor + · -- PC = pc + 8 + rw [h_step2] ; unfold step + simp only [ArmState.read_write_same] + rw [h_s1_pc] + simp only [BitVec.add_assoc] + rw [show (4 : BitVec 64) + 4 = 8 by rfl] + rw [← BitVec.ofNat_add_ofNat] + rfl + · -- X1 = a + b + rw [h_step2] ; unfold step + simp only [ + ArmState.read_write_diff _ _ _ _ (by decide : Reg.PC ≠ Reg.X1), + ArmState.read_write_diff _ _ _ _ (by decide : Reg.X2 ≠ Reg.X1)] + rw [h_s1_x1] + rw [BitVec.add_comm, ← BitVec.ofNat_add_ofNat] + · -- Frame: only PC, X1, X2, X3 may change + unfold maychange_regs unchanged_reg + intro r h_not_in + simp only [List.mem_cons] at h_not_in + push Not at h_not_in + obtain ⟨h_ne_pc, h_ne_x1, h_ne_x2, h_ne_x3, _⟩ := h_not_in + rw [h_step2] ; unfold step + simp only [ + ArmState.read_write_diff _ _ _ _ (Ne.symm h_ne_x2), + ArmState.read_write_diff _ _ _ _ (Ne.symm h_ne_pc)] + rw [h_step1] ; unfold step + simp only [ + ArmState.read_write_diff _ _ _ _ (Ne.symm h_ne_x1), + ArmState.read_write_diff _ _ _ _ (Ne.symm h_ne_pc)] /-- Second chunk specification: MOV x3, #2 and MUL x1, x1, x3. @@ -228,7 +390,94 @@ theorem sequence_chunk2_correct (pc a b : ℕ) : (sequence_mid pc a b) (sequence_post pc a b) (maychange_regs [Reg.PC, Reg.X1, Reg.X2, Reg.X3]) := by - sorry + intro s₀ h_mid + obtain ⟨h_loaded, h_pc, h_x1⟩ := h_mid + -- Key: convert BitVec.ofNat 64 (pc + 8) to BitVec.ofNat 64 pc + 8 + have h_pc_eq : BitVec.ofNat 64 (pc + 8) = BitVec.ofNat 64 pc + 8 := by + rw [← BitVec.ofNat_add_ofNat]; rfl + rw [h_pc_eq] at h_pc + -- Decode lemma for instruction 3 at this state + have h_dec3 := sequence_decode_instr3 s₀ (BitVec.ofNat 64 pc) h_loaded + -- Step 1: Execute MOVZ X3, #2 + apply eventually.ind + · use step (Instruction.MOVZ Reg.X3 2 0) s₀ + unfold arm + simp only [h_pc, h_dec3] + · intro s₁ h_step1 + unfold arm at h_step1 + simp only [h_pc, h_dec3] at h_step1 + -- Step 2: Execute MUL X1, X1, X3 + apply eventually.ind + · have h_pc1 : s₁.read_reg Reg.PC = BitVec.ofNat 64 pc + 12 := by + rw [h_step1] ; unfold step + simp only [ArmState.read_write_same] + rw [h_pc] + simp only [BitVec.add_assoc] + rw [show (8 : BitVec 64) + 4 = 12 by rfl] + have h_loaded1 : aligned_bytes_loaded s₁.mem (BitVec.ofNat 64 pc) sequence_mc := by + rw [h_step1] ; unfold step ; simp only [ArmState.write_reg] + exact h_loaded + use step (Instruction.MUL Reg.X1 Reg.X1 Reg.X3) s₁ + unfold arm + simp only [h_pc1, sequence_decode_instr4 s₁ (BitVec.ofNat 64 pc) h_loaded1] + · intro s₂ h_step2 + apply eventually.base + -- Extract s₁ properties + have h_s1_pc : s₁.read_reg Reg.PC = BitVec.ofNat 64 pc + 12 := by + rw [h_step1] ; unfold step + simp only [ArmState.read_write_same] + rw [h_pc] + simp only [BitVec.add_assoc] + rw [show (8 : BitVec 64) + 4 = 12 by rfl] + have h_s1_x1 : s₁.read_reg Reg.X1 = BitVec.ofNat 64 (a + b) := by + rw [h_step1] ; unfold step + simp only [ + ArmState.read_write_diff _ _ _ _ (by decide : Reg.PC ≠ Reg.X1), + ArmState.read_write_diff _ _ _ _ (by decide : Reg.X3 ≠ Reg.X1)] + exact h_x1 + have h_s1_x3 : s₁.read_reg Reg.X3 = BitVec.ofNat 64 (2 * 2 ^ 0) := by + rw [h_step1] ; unfold step + simp only [ArmState.read_write_same, + ArmState.read_write_diff _ _ _ _ (by decide : Reg.PC ≠ Reg.X3)] + have h_loaded1 : aligned_bytes_loaded s₁.mem (BitVec.ofNat 64 pc) sequence_mc := by + rw [h_step1] ; unfold step ; simp only [ArmState.write_reg] + exact h_loaded + -- Extract s₂ from h_step2 + unfold arm at h_step2 + simp only [h_s1_pc, sequence_decode_instr4 s₁ (BitVec.ofNat 64 pc) h_loaded1] at h_step2 + constructor + · -- Postcondition: sequence_post + unfold sequence_post + constructor + · -- PC = pc + 16 + rw [h_step2] ; unfold step + simp only [ArmState.read_write_same] + rw [h_s1_pc] + simp only [BitVec.add_assoc] + rw [show (12 : BitVec 64) + 4 = 16 by rfl] + rw [← BitVec.ofNat_add_ofNat] + rfl + · -- X1 = (a + b) * 2 + rw [h_step2] ; unfold step + simp only [ArmState.read_write_same, + ArmState.read_write_diff _ _ _ _ (by decide : Reg.PC ≠ Reg.X1)] + rw [h_s1_x1, h_s1_x3] + simp only [show 2 * 2 ^ 0 = 2 by norm_num] + rw [← BitVec.ofNat_mul_ofNat] + · -- Frame: only PC, X1, X2, X3 may change + unfold maychange_regs unchanged_reg + intro r h_not_in + simp only [List.mem_cons] at h_not_in + push Not at h_not_in + obtain ⟨h_ne_pc, h_ne_x1, _, h_ne_x3, _⟩ := h_not_in + rw [h_step2] ; unfold step + simp only [ + ArmState.read_write_diff _ _ _ _ (Ne.symm h_ne_x1), + ArmState.read_write_diff _ _ _ _ (Ne.symm h_ne_pc)] + rw [h_step1] ; unfold step + simp only [ + ArmState.read_write_diff _ _ _ _ (Ne.symm h_ne_x3), + ArmState.read_write_diff _ _ _ _ (Ne.symm h_ne_pc)] /-! ## Main Correctness Theorem diff --git a/Bignum/Arm/Tutorial/Simple.lean b/Bignum/Arm/Tutorial/Simple.lean index 90ca853..18c0070 100644 --- a/Bignum/Arm/Tutorial/Simple.lean +++ b/Bignum/Arm/Tutorial/Simple.lean @@ -74,22 +74,22 @@ Precondition: program loaded, PC at start, registers initialized. -/ def simple_pre (pc a b : ℕ) (s : ArmState) : Prop := -- Program bytes are loaded at PC (4-byte aligned) - aligned_bytes_loaded s.mem (Word64.ofNat pc) simple_mc ∧ + aligned_bytes_loaded s.mem (BitVec.ofNat 64 pc) simple_mc ∧ -- PC is at program start - s.read_reg Reg.PC = Word64.ofNat pc ∧ + s.read_reg Reg.PC = BitVec.ofNat 64 pc ∧ -- X0 contains a - s.read_reg Reg.X0 = Word64.ofNat a ∧ + s.read_reg Reg.X0 = BitVec.ofNat 64 a ∧ -- X1 contains b - s.read_reg Reg.X1 = Word64.ofNat b + s.read_reg Reg.X1 = BitVec.ofNat 64 b /-- Postcondition: PC advanced, X2 contains a. -/ def simple_post (pc a : ℕ) (s : ArmState) : Prop := -- PC advanced by 8 bytes (2 instructions) - s.read_reg Reg.PC = Word64.ofNat (pc + 8) ∧ + s.read_reg Reg.PC = BitVec.ofNat 64 (pc + 8) ∧ -- X2 contains the original value of X0 (which is a) - s.read_reg Reg.X2 = Word64.ofNat a + s.read_reg Reg.X2 = BitVec.ofNat 64 a /-! @@ -115,20 +115,20 @@ theorem simple_decode_instr1 (s : ArmState) (pc : Word64) -- Get the concrete byte values from bytes_loaded have h0 : s.mem.read_byte pc = some 0x22 := by have := h_bytes 0 (by decide : 0 < 8) - simp only [simple_mc, Word64.ofNat] at this + simp only [simple_mc] at this convert this using 2 exact (BitVec.add_zero pc).symm have h1 : s.mem.read_byte (pc + 1) = some 0x00 := by have := h_bytes 1 (by decide : 1 < 8) - simp only [simple_mc, Word64.ofNat] at this + simp only [simple_mc] at this convert this using 2 have h2 : s.mem.read_byte (pc + 2) = some 0x00 := by have := h_bytes 2 (by decide : 2 < 8) - simp only [simple_mc, Word64.ofNat] at this + simp only [simple_mc] at this convert this using 2 have h3 : s.mem.read_byte (pc + 3) = some 0x8b := by have := h_bytes 3 (by decide : 3 < 8) - simp only [simple_mc, Word64.ofNat] at this + simp only [simple_mc] at this convert this using 2 -- Unfold arm_decode and read4Bytes, substitute the concrete bytes unfold arm_decode read4Bytes @@ -146,21 +146,21 @@ theorem simple_decode_instr2 (s : ArmState) (pc : Word64) -- Get the concrete byte values for the second instruction (offsets 4-7) have h4 : s.mem.read_byte (pc + 4) = some 0x42 := by have := h_bytes 4 (by decide : 4 < 8) - simp only [simple_mc, Word64.ofNat] at this + simp only [simple_mc] at this convert this using 2 have h5 : s.mem.read_byte (pc + 4 + 1) = some 0x00 := by have := h_bytes 5 (by decide : 5 < 8) - simp only [simple_mc, Word64.ofNat] at this + simp only [simple_mc] at this convert this using 2 bv_omega have h6 : s.mem.read_byte (pc + 4 + 2) = some 0x01 := by have := h_bytes 6 (by decide : 6 < 8) - simp only [simple_mc, Word64.ofNat] at this + simp only [simple_mc] at this convert this using 2 bv_omega have h7 : s.mem.read_byte (pc + 4 + 3) = some 0xcb := by have := h_bytes 7 (by decide : 7 < 8) - simp only [simple_mc, Word64.ofNat] at this + simp only [simple_mc] at this convert this using 2 bv_omega -- Unfold arm_decode and read4Bytes, substitute the concrete bytes @@ -201,56 +201,56 @@ theorem simple_correct (pc a b : ℕ) : · -- Show there exists a next state use step (Instruction.ADD Reg.X2 Reg.X1 Reg.X0) s₀ unfold arm - simp only [h_pc, simple_decode_instr1 s₀ (Word64.ofNat pc) h_loaded] + simp only [h_pc, simple_decode_instr1 s₀ (BitVec.ofNat 64 pc) h_loaded] · -- For all next states, eventually reach goal intro s₁ h_step1 -- s₁ is the state after ADD unfold arm at h_step1 - simp only [h_pc, simple_decode_instr1 s₀ (Word64.ofNat pc) h_loaded] at h_step1 + simp only [h_pc, simple_decode_instr1 s₀ (BitVec.ofNat 64 pc) h_loaded] at h_step1 -- Step 2: Execute SUB X2, X2, X1 apply eventually.ind · -- Show there exists a next state use step (Instruction.SUB Reg.X2 Reg.X2 Reg.X1) s₁ -- After ADD, PC = pc + 4 - have h_pc1 : s₁.read_reg Reg.PC = Word64.ofNat pc + 4 := by + have h_pc1 : s₁.read_reg Reg.PC = BitVec.ofNat 64 pc + 4 := by rw [h_step1] unfold step simp only [ArmState.read_write_same] rw [h_pc] -- Bytes are still loaded (memory unchanged by ADD) - have h_loaded1 : aligned_bytes_loaded s₁.mem (Word64.ofNat pc) simple_mc := by + have h_loaded1 : aligned_bytes_loaded s₁.mem (BitVec.ofNat 64 pc) simple_mc := by rw [h_step1] unfold step simp only [ArmState.write_reg] exact h_loaded unfold arm - simp only [h_pc1, simple_decode_instr2 s₁ (Word64.ofNat pc) h_loaded1] + simp only [h_pc1, simple_decode_instr2 s₁ (BitVec.ofNat 64 pc) h_loaded1] · -- For all next states, show goal (base case) intro s₂ h_step2 apply eventually.base -- s₂ is the final state after SUB -- Extract information about s₁ from h_step1 - have h_s1_pc : s₁.read_reg Reg.PC = Word64.ofNat pc + 4 := by + have h_s1_pc : s₁.read_reg Reg.PC = BitVec.ofNat 64 pc + 4 := by rw [h_step1] ; unfold step simp only [ArmState.read_write_same] rw [h_pc] - have h_s1_x2 : s₁.read_reg Reg.X2 = Word64.ofNat b + Word64.ofNat a := by + have h_s1_x2 : s₁.read_reg Reg.X2 = BitVec.ofNat 64 b + BitVec.ofNat 64 a := by rw [h_step1] ; unfold step simp only [ArmState.read_write_same, ArmState.read_write_diff _ _ _ _ (by decide : Reg.PC ≠ Reg.X2)] rw [h_x1, h_x0] - have h_s1_x1 : s₁.read_reg Reg.X1 = Word64.ofNat b := by + have h_s1_x1 : s₁.read_reg Reg.X1 = BitVec.ofNat 64 b := by rw [h_step1] ; unfold step simp only [ ArmState.read_write_diff _ _ _ _ (by decide : Reg.PC ≠ Reg.X1), ArmState.read_write_diff _ _ _ _ (by decide : Reg.X2 ≠ Reg.X1)] exact h_x1 - have h_loaded1 : aligned_bytes_loaded s₁.mem (Word64.ofNat pc) simple_mc := by + have h_loaded1 : aligned_bytes_loaded s₁.mem (BitVec.ofNat 64 pc) simple_mc := by rw [h_step1] ; unfold step ; simp only [ArmState.write_reg] exact h_loaded -- Extract s₂ from h_step2 unfold arm at h_step2 - simp only [h_s1_pc, simple_decode_instr2 s₁ (Word64.ofNat pc) h_loaded1] at h_step2 + simp only [h_s1_pc, simple_decode_instr2 s₁ (BitVec.ofNat 64 pc) h_loaded1] at h_step2 constructor · -- Postcondition constructor @@ -258,8 +258,7 @@ theorem simple_correct (pc a b : ℕ) : rw [h_step2] ; unfold step simp only [ArmState.read_write_same] rw [h_s1_pc] - -- Goal: Word64.ofNat pc + 4 + 4 = Word64.ofNat (pc + 8) - unfold Word64.ofNat + -- Goal: BitVec.ofNat 64 pc + 4 + 4 = BitVec.ofNat 64 (pc + 8) simp only [BitVec.add_assoc] -- Goal: BitVec.ofNat 64 pc + (4 + 4) = BitVec.ofNat 64 (pc + 8) rw [show (4 : BitVec 64) + 4 = 8 by rfl] @@ -271,13 +270,12 @@ theorem simple_correct (pc a b : ℕ) : ArmState.read_write_diff _ _ _ _ (by decide : Reg.PC ≠ Reg.X2)] rw [h_s1_x2, h_s1_x1] -- Goal: (b + a) - b = a - unfold Word64.ofNat rw [BitVec.add_comm, BitVec.add_sub_cancel] · -- Frame: only PC and X2 changed unfold maychange_regs unchanged_reg intro r h_not_in simp only [List.mem_cons] at h_not_in - push_neg at h_not_in + push Not at h_not_in obtain ⟨h_ne_pc, h_ne_x2, _⟩ := h_not_in rw [h_step2] ; unfold step simp only [ diff --git a/Bignum/Common/Basic/Defs.lean b/Bignum/Common/Basic/Defs.lean index f4c8d1c..32c7ddf 100644 --- a/Bignum/Common/Basic/Defs.lean +++ b/Bignum/Common/Basic/Defs.lean @@ -83,6 +83,7 @@ let BIGDIGIT_BOUND = prove Source: s2n-bignum/common/bignum.ml:24-27 -/ +@[simp] theorem bigdigit_bound (n i : Nat) : bigdigit n i < 2 ^ 64 := by unfold bigdigit apply Nat.mod_lt @@ -100,6 +101,7 @@ let BIGDIGIT_0 = prove Source: s2n-bignum/common/bignum.ml:33-35 -/ +@[simp] theorem bigdigit_zero (i : Nat) : bigdigit 0 i = 0 := by unfold bigdigit simp @@ -115,6 +117,7 @@ let LOWDIGITS_0 = prove Source: s2n-bignum/common/bignum.ml:121-123 -/ +@[simp] theorem lowdigits_zero (n : Nat) : lowdigits n 0 = 0 := by unfold lowdigits rw [Nat.mul_zero, Nat.pow_zero] @@ -131,6 +134,7 @@ let HIGHDIGITS_0 = prove Source: s2n-bignum/common/bignum.ml:109-111 -/ +@[simp] theorem highdigits_zero (n : Nat) : highdigits n 0 = n := by unfold highdigits simp @@ -146,6 +150,7 @@ let LOWDIGITS_BOUND = prove Source: s2n-bignum/common/bignum.ml:139-141 -/ +@[simp] theorem lowdigits_bound (n i : Nat) : lowdigits n i < 2 ^ (64 * i) := by unfold lowdigits exact Nat.mod_lt _ (Nat.two_pow_pos (64 * i)) diff --git a/Bignum/Common/Basic/Lemmas.lean b/Bignum/Common/Basic/Lemmas.lean index d958468..9687cf3 100644 --- a/Bignum/Common/Basic/Lemmas.lean +++ b/Bignum/Common/Basic/Lemmas.lean @@ -204,7 +204,16 @@ Source: s2n-bignum/common/bignum.ml:131-137 theorem highdigits_top (n k : Nat) (h : n < 2 ^ (64 * k)) : highdigits n (k - 1) = bigdigit n (k - 1) := by unfold highdigits bigdigit - sorry + -- Need to show: n / 2^(64*(k-1)) = (n / 2^(64*(k-1))) % 2^64 + -- i.e., n / 2^(64*(k-1)) < 2^64 + symm + apply Nat.mod_eq_of_lt + rcases k with _ | k + · simp at h ⊢; omega + · simp only [Nat.add_sub_cancel] + rw [Nat.mul_succ] at h + rw [Nat.pow_add] at h + exact Nat.div_lt_of_lt_mul h /-- @@ -219,6 +228,7 @@ let HIGHDIGITS_TRIVIAL = prove Source: s2n-bignum/common/bignum.ml:117-119 -/ +@[simp] theorem highdigits_trivial (n : Nat) : highdigits 0 n = 0 := by unfold highdigits @@ -293,6 +303,7 @@ let LOWDIGITS_TRIVIAL = prove Source: s2n-bignum/common/bignum.ml:151-153 -/ +@[simp] theorem lowdigits_trivial (n : Nat) : lowdigits 0 n = 0 := by unfold lowdigits @@ -357,7 +368,19 @@ Source: s2n-bignum/common/bignum.ml:159-162 theorem lowdigits_lowdigits (n i j : Nat) : lowdigits (lowdigits n i) j = lowdigits n (min i j) := by unfold lowdigits - sorry + -- Goal: (n % 2^(64*i)) % 2^(64*j) = n % 2^(64 * min i j) + by_cases hjle : j ≤ i + · -- j ≤ i: use Nat.mod_mod_of_dvd + rw [Nat.min_eq_right hjle] + rw [Nat.mod_mod_of_dvd] + apply Nat.pow_dvd_pow + omega + · -- i < j: n % 2^(64*i) < 2^(64*i) ≤ 2^(64*j), so second mod is identity + push Not at hjle + rw [Nat.min_eq_left (Nat.le_of_lt hjle)] + apply Nat.mod_eq_of_lt + exact Nat.lt_of_lt_of_le (Nat.mod_lt n (Nat.two_pow_pos (64 * i))) + (Nat.pow_le_pow_right (by norm_num : 1 ≤ 2) (by omega)) /-- @@ -374,10 +397,22 @@ Source: s2n-bignum/common/bignum.ml:19-22 Note: We use Finset.sum instead of HOL Light's nsum. -/ +private theorem lowdigits_eq_sum (n : Nat) : ∀ k, + lowdigits n k = ((List.range k).map (fun i => 2 ^ (64 * i) * bigdigit n i)).sum := by + intro k + induction k with + | zero => + simp [lowdigits_zero] + | succ k ih => + have hsuc := lowdigits_succ n k + rw [List.range_succ, List.map_append, List.sum_append] + simp only [List.map_cons, List.map_nil, List.sum_cons, List.sum_nil, Nat.add_zero] + omega + theorem bigdigitsum_works (n k : Nat) (h : n < 2 ^ (64 * k)) - : (Finset.range k).toList.foldl (init := 0) - (fun i => 2 ^ (64 * i) * bigdigit n i) = n := by - sorry + : ((List.range k).map (fun i => 2 ^ (64 * i) * bigdigit n i)).sum = n := by + rw [← lowdigits_eq_sum] + exact lowdigits_of_lt n k h /-- @@ -433,8 +468,14 @@ Source: s2n-bignum/common/bignum.ml:41-62 theorem bigdigit_add_left (a n b i : Nat) (h : i < n) : bigdigit (a + 2 ^ (64 * n) * b) i = bigdigit a i := by unfold bigdigit - sorry - + have hexp_split : 2 ^ (64 * n) = 2 ^ (64 * i) * 2 ^ (64 * (n - i)) := by + rw [← Nat.pow_add]; congr 1; omega + rw [hexp_split, Nat.mul_assoc] + rw [Nat.add_mul_div_left _ _ (Nat.two_pow_pos (64 * i))] + have hexp_split2 : 2 ^ (64 * (n - i)) = 2 ^ 64 * 2 ^ (64 * (n - i - 1)) := by + rw [← Nat.pow_add]; congr 1; omega + rw [hexp_split2, Nat.mul_assoc] + rw [Nat.add_mul_mod_self_left] /-- Successor digit extraction: extracting digit (i+1) from a 2-word number. @@ -459,7 +500,12 @@ Source: s2n-bignum/common/bignum.ml:64-71 theorem bigdigit_succ (n i t : Nat) (h : t < 2 ^ 64) : bigdigit (t + 2 ^ 64 * n) (i + 1) = bigdigit n i := by unfold bigdigit - sorry + have hexp : 64 * (i + 1) = 64 + 64 * i := by ring + rw [hexp, Nat.pow_add, ← Nat.div_div_eq_div_mul] + have hdiv : (t + 2 ^ 64 * n) / 2 ^ 64 = n := by + rw [Nat.add_mul_div_left _ _ (Nat.two_pow_pos 64)] + rw [Nat.div_eq_of_lt h, Nat.zero_add] + rw [hdiv] /-- @@ -507,6 +553,18 @@ Source: s2n-bignum/common/bignum.ml:174-184 theorem bigdigit_lowdigits (n i j : Nat) : bigdigit (lowdigits n i) j = if j < i then bigdigit n j else 0 := by unfold bigdigit lowdigits - sorry + split + · -- Case j < i + rename_i hji + have := high_low_digits n i + have hdecomp : n = lowdigits n i + 2 ^ (64 * i) * highdigits n i := by omega + conv_rhs => rw [hdecomp] + exact (bigdigit_add_left (lowdigits n i) i (highdigits n i) j hji).symm + · -- Case ¬(j < i), i.e., i ≤ j + rename_i hji + push Not at hji + apply bigdigit_of_lt + exact Nat.lt_of_lt_of_le (lowdigits_bound n i) + (Nat.pow_le_pow_right (by norm_num : 1 ≤ 2) (by omega)) end Bignum diff --git a/Bignum/Common/Memory.lean b/Bignum/Common/Memory.lean index 62e4741..a834917 100644 --- a/Bignum/Common/Memory.lean +++ b/Bignum/Common/Memory.lean @@ -71,7 +71,7 @@ def Memory.read_bytes (mem : Memory) (addr : Address) (n : Nat) if i = 0 then some acc.reverse else - let offset := Word64.ofNat (n - i) + let offset := BitVec.ofNat 64 (n - i) match mem (addr + offset) with | none => none | some byte => aux (i - 1) (byte :: acc) @@ -83,7 +83,7 @@ Write bytes to memory starting at addr. def Memory.write_bytes (mem : Memory) (addr : Address) (bytes : List UInt8) : Memory := bytes.zipIdx.foldl - (fun m (byte, i) => m.write_byte (addr + Word64.ofNat i) byte) + (fun m (byte, i) => m.write_byte (addr + BitVec.ofNat 64 i) byte) mem /-- @@ -97,7 +97,7 @@ def Memory.read_word64 (mem : Memory) (addr : Address) : Option Word64 := -- Combine 8 bytes in little-endian order -- byte[0] is least significant, byte[7] is most significant some <| bytes.zipIdx.foldl - (fun acc (byte, i) => acc + Word64.ofNat (byte.toNat * 2^(8 * i))) + (fun acc (byte, i) => acc + BitVec.ofNat 64 (byte.toNat * 2^(8 * i))) 0 /-- @@ -105,7 +105,7 @@ Write a 64-bit word to memory in little-endian format. -/ def Memory.write_word64 (mem : Memory) (addr : Address) (w : Word64) : Memory := let bytes := List.range 8 |>.map fun i => - UInt8.ofNat ((w.val / 2^(8 * i)) % 256) + UInt8.ofNat ((w.toNat / 2^(8 * i)) % 256) mem.write_bytes addr bytes /-- @@ -120,10 +120,10 @@ def Memory.read_bignum (mem : Memory) (addr : Address) (n : ℕ) : Option ℕ := if i = 0 then some acc else - let word_addr := addr + Word64.ofNat (8 * (n - i)) + let word_addr := addr + BitVec.ofNat 64 (8 * (n - i)) match mem.read_word64 word_addr with | none => none - | some w => aux (i - 1) (acc + 2^(64 * (n - i)) * w.val) + | some w => aux (i - 1) (acc + 2^(64 * (n - i)) * w.toNat) aux n 0 /-- @@ -132,8 +132,8 @@ Write a bignum to memory as n 64-bit words. def Memory.write_bignum (mem : Memory) (addr : Address) (n : ℕ) (val : ℕ) : Memory := List.range n |>.foldl (fun m i => - let word_addr := addr + Word64.ofNat (8 * i) - let word := Word64.ofNat ((val / 2^(64 * i)) % 2^64) + let word_addr := addr + BitVec.ofNat 64 (8 * i) + let word := BitVec.ofNat 64 ((val / 2^(64 * i)) % 2^64) m.write_word64 word_addr word) mem @@ -145,14 +145,14 @@ Source: s2n-bignum/common/overlap.ml -/ def nonoverlapping (addr1 : Address) (size1 : ℕ) (addr2 : Address) (size2 : ℕ) : Prop := - addr1.val + size1 ≤ addr2.val ∨ addr2.val + size2 ≤ addr1.val + addr1.toNat + size1 ≤ addr2.toNat ∨ addr2.toNat + size2 ≤ addr1.toNat /-- Bytes are loaded at a given address (no alignment requirement). -/ def bytes_loaded (mem : Memory) (addr : Address) (bytes : List UInt8) : Prop := ∀ i, (h : i < bytes.length) → - mem.read_byte (addr + Word64.ofNat i) = some (bytes[i]'h) + mem.read_byte (addr + BitVec.ofNat 64 i) = some (bytes[i]'h) /-- Bytes are loaded and aligned at a given address. Corresponds to HOL Light's @@ -160,7 +160,7 @@ Bytes are loaded and aligned at a given address. Corresponds to HOL Light's -/ def aligned_bytes_loaded (mem : Memory) (addr : Address) (bytes : List UInt8) : Prop := - addr.val % 4 = 0 ∧ -- 4-byte alignment + addr.toNat % 4 = 0 ∧ -- 4-byte alignment bytes_loaded mem addr bytes /-- diff --git a/Bignum/Common/Word.lean b/Bignum/Common/Word.lean index 75afbbc..84e18b3 100644 --- a/Bignum/Common/Word.lean +++ b/Bignum/Common/Word.lean @@ -8,15 +8,12 @@ module /-! # 64-bit Word Arithmetic -This file defines 64-bit words and their arithmetic operations, +This file defines 64-bit words and foundational theorems, corresponding to the word operations in HOL Light. ## Main Definitions * `Word64` - 64-bit words (alias for BitVec 64) -* `val` - Convert word to natural number -* `word` - Convert natural number to word (with modulo) -* Arithmetic operations with carry/borrow ## References @@ -33,76 +30,6 @@ A 64-bit word, represented as a bitvector. -/ public abbrev Word64 := BitVec 64 -/-- -Convert a Word64 to a natural number. - -Corresponds to HOL Light's `val : (N)word -> num` --/ -public def Word64.val (w : Word64) : Nat := - w.toNat - -/-- -Convert a natural number to a Word64 (with implicit modulo 2^64). - -Corresponds to HOL Light's `word : num -> (N)word` --/ -public def Word64.ofNat (n : Nat) : Word64 := - BitVec.ofNat 64 n - - -/-- -Word addition with carry out. - -Returns (sum, carry) where: -- sum = (x + y) % 2^64 -- carry = 1 if x + y >= 2^64, else 0 - -This models the ARM ADDS instruction behavior. --/ -def Word64.addWithCarry (x y : Word64) : Word64 × Nat := - let sum := x.val + y.val - (Word64.ofNat sum, if sum >= 2^64 then 1 else 0) - -/-- -Word addition with carry in and carry out. - -Returns (sum, carry_out) where: -- sum = (x + y + carry_in) % 2^64 -- carry_out = 1 if x + y + carry_in >= 2^64, else 0 - -This models the ARM ADCS instruction behavior. --/ -def Word64.adcWithCarry (x y : Word64) (carry_in : Nat) : Word64 × Nat := - let sum := x.val + y.val + carry_in - (Word64.ofNat sum, if sum >= 2^64 then 1 else 0) - -/-- -Word subtraction with borrow out. - -Returns (diff, borrow) where: -- diff = (x - y) % 2^64 -- borrow = 1 if x < y, else 0 - -This models the ARM SUBS instruction behavior. --/ -def Word64.subWithBorrow (x y : Word64) : Word64 × Nat := - let diff := x.val - y.val - (Word64.ofNat diff, if x.val < y.val then 1 else 0) - -/-- -Word subtraction with borrow in and borrow out. - -Returns (diff, borrow_out) where: -- diff = (x - y - borrow_in) % 2^64 -- borrow_out = 1 if x < y + borrow_in, else 0 - -This models the ARM SBCS instruction behavior. --/ -def Word64.sbcWithBorrow (x y : Word64) (borrow_in : Nat) - : Word64 × Nat := - let diff := x.val - y.val - borrow_in - (Word64.ofNat diff, if x.val < y.val + borrow_in then 1 else 0) - /-- The value of a word constructed from a natural number that's already bounded by 2^64 equals that natural number. @@ -112,26 +39,21 @@ Corresponds to HOL Light's VAL_WORD_EQ and related theorems. Source: Related to s2n-bignum/common/bignum.ml:29-31 (VAL_WORD_BIGDIGIT) -/ theorem val_ofNat_of_lt {n : Nat} (h : n < 2^64) : - (Word64.ofNat n).val = n := by - unfold Word64.ofNat Word64.val - grind only [= BitVec.toNat_ofNat] + (BitVec.ofNat 64 n).toNat = n := by + simp [BitVec.toNat_ofNat, Nat.mod_eq_of_lt h] /-- Converting to word and back gives modulo 2^64. -/ theorem val_ofNat (n : Nat) : - (Word64.ofNat n).val = n % 2^64 := by - unfold Word64.ofNat Word64.val + (BitVec.ofNat 64 n : Word64).toNat = n % 2^64 := by simp [BitVec.toNat_ofNat] /-- Word values are always bounded by 2^64. -/ -theorem val_lt (w : Word64) : w.val < 2^64 := by - unfold Word64.val - have := BitVec.isLt w - simp at this - exact this +theorem val_lt (w : Word64) : w.toNat < 2^64 := by + exact BitVec.isLt w end Bignum diff --git a/BignumTests.lean b/BignumTests.lean new file mode 100644 index 0000000..a6bd19d --- /dev/null +++ b/BignumTests.lean @@ -0,0 +1,9 @@ +/- +Copyright (c) 2025 Alexandre Rademaker. All rights reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Author: Alexandre Rademaker +-/ +module + +public import BignumTests.Arm.Machine.Decode +public import BignumTests.Arm.Machine.Loader diff --git a/BignumTests/Arm/Machine/Decode.lean b/BignumTests/Arm/Machine/Decode.lean new file mode 100644 index 0000000..f15e1f6 --- /dev/null +++ b/BignumTests/Arm/Machine/Decode.lean @@ -0,0 +1,49 @@ +/- +Copyright (c) 2025 Alexandre Rademaker. All rights reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Author: Alexandre Rademaker +-/ +module + +public meta import Bignum.Arm.Machine.Decode + +open Bignum.Arm + +/-! +## Tests for ARM Instruction Decoding +-/ + +-- Test 1: Decode ADD X2, X1, X0 = 0x8b000022 +-- #eval decode 0x8b000022 + +-- Test 2: Decode SUB X2, X2, X1 = 0xcb010042 +-- #eval decode 0xcb010042 + +-- Test 3: Invalid encoding → none +-- #eval decode 0x00000000 + +-- Test 4: Invalid encoding → none +-- #eval decode 0xffffffff + +-- Test 5: Endianness — bytes [0x22, 0x00, 0x00, 0x8b] → 0x8b000022 = 2332033058 +#eval ((0x22 : UInt32) ||| ((0x00 : UInt32) <<< 8) ||| + ((0x00 : UInt32) <<< 16) ||| ((0x8b : UInt32) <<< 24)) + +-- Test 6: Byte list decoding (Simple.lean program) +-- #eval decodeBytes [0x22, 0x00, 0x00, 0x8b, 0x42, 0x00, 0x01, 0xcb] + +-- Test 7: Program creation from bytes +-- #eval Program.fromBytes (BitVec.ofNat 64 0) [0x22, 0x00, 0x00, 0x8b, 0x42, 0x00, 0x01, 0xcb] + +-- Test 8: Bit extraction from 0x8b000022 (ADD X2, X1, X0) +#eval extractBits 0x8b000022 31 31 -- sf bit = 1 +#eval extractBits 0x8b000022 30 30 -- op bit = 0 +#eval extractBits 0x8b000022 29 29 -- S bit = 0 +#eval extractBits 0x8b000022 28 21 -- opcode = 0b01011000 = 88 +#eval extractBits 0x8b000022 4 0 -- Rd = 2 + +-- Test 9: Register decoding +-- #eval decodeReg 0 -- X0 +-- #eval decodeReg 1 -- X1 +-- #eval decodeReg 2 -- X2 +-- #eval decodeReg 31 -- SP diff --git a/BignumTests/Arm/Machine/Loader.lean b/BignumTests/Arm/Machine/Loader.lean new file mode 100644 index 0000000..25be675 --- /dev/null +++ b/BignumTests/Arm/Machine/Loader.lean @@ -0,0 +1,36 @@ +/- +Copyright (c) 2025 Alexandre Rademaker. All rights reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Author: Alexandre Rademaker +-/ +module + +public meta import Bignum.Arm.Machine.Loader + +open Bignum.Arm + +/-! +## Tests for the ELF Loader utilities +-/ + +-- compareBytes: equal lists +#eval compareBytes [] [] +-- Expected: ByteCompareResult.ok + +#eval compareBytes [0x22, 0x00] [0x22, 0x00] +-- Expected: ByteCompareResult.ok + +-- compareBytes: length mismatch +#eval compareBytes [0x22, 0x00] [0x22] +-- Expected: ByteCompareResult.lengthMismatch 2 1 + +-- compareBytes: byte mismatch at index 1 +#eval compareBytes [0x22, 0x01] [0x22, 0x00] +-- Expected: ByteCompareResult.byteMismatch 1 0x01 0x00 + +-- formatBytesAsLeanDef +#eval formatBytesAsLeanDef "simple_mc" [0x22, 0x00, 0x00, 0x8b, 0x42, 0x00, 0x01, 0xcb] + +-- hexDump +#eval hexDump [0x21, 0x00, 0x00, 0x8b, 0x42, 0x00, 0x00, 0x8b, + 0x43, 0x00, 0x80, 0xd2, 0x21, 0x7c, 0x03, 0x9b] diff --git a/lake-manifest.json b/lake-manifest.json index 12ee045..f367c59 100644 --- a/lake-manifest.json +++ b/lake-manifest.json @@ -1,11 +1,11 @@ -{"version": "1.1.0", +{"version": "1.2.0", "packagesDir": ".lake/packages", "packages": [{"url": "https://github.com/leanprover/cslib", "type": "git", "subDir": null, "scope": "leanprover", - "rev": "b91aeb4e8ebb4a7a2f8972fcc784e73acae19f8c", + "rev": "3ea6f2129df7e286211a378ccb423aa65746e8e4", "name": "cslib", "manifestFile": "lake-manifest.json", "inputRev": "main", @@ -15,7 +15,7 @@ "type": "git", "subDir": null, "scope": "leanprover-community", - "rev": "11b0237d9d488e52b9d350b6251086b22dfdf891", + "rev": "d2962c4edce550edb489343fa7b499ea5bb25969", "name": "mathlib", "manifestFile": "lake-manifest.json", "inputRev": "master", @@ -25,7 +25,7 @@ "type": "git", "subDir": null, "scope": "leanprover-community", - "rev": "7311586e1a56af887b1081d05e80c11b6c41d212", + "rev": "86210d4ad1b08b086d0bd638637a75246523dbb8", "name": "plausible", "manifestFile": "lake-manifest.json", "inputRev": "main", @@ -35,7 +35,7 @@ "type": "git", "subDir": null, "scope": "leanprover-community", - "rev": "5ce7f0a355f522a952a3d678d696bd563bb4fd28", + "rev": "c5d5b8fe6e5158def25cd28eb94e4141ad97c843", "name": "LeanSearchClient", "manifestFile": "lake-manifest.json", "inputRev": "main", @@ -45,7 +45,7 @@ "type": "git", "subDir": null, "scope": "leanprover-community", - "rev": "875ad9d88ed684e39c16bdea260e6ecfa15afd60", + "rev": "cdab3938ccabbdb044be6896e251b5814bec932e", "name": "importGraph", "manifestFile": "lake-manifest.json", "inputRev": "main", @@ -55,51 +55,52 @@ "type": "git", "subDir": null, "scope": "leanprover-community", - "rev": "6d65c6e0a25b8a52c13c3adeb63ecde3bfbb6294", + "rev": "2db6054a44326f8c0230ee0570e2ddb894816511", "name": "proofwidgets", "manifestFile": "lake-manifest.json", - "inputRev": "v0.0.86", + "inputRev": "v0.0.98", "inherited": true, "configFile": "lakefile.lean"}, {"url": "https://github.com/leanprover-community/aesop", "type": "git", "subDir": null, "scope": "leanprover-community", - "rev": "f08e838d4f9aea519f3cde06260cfb686fd4bab0", + "rev": "f0c6e183ea26531e82773feb4b73ab6595ca17a5", "name": "aesop", "manifestFile": "lake-manifest.json", - "inputRev": "master", + "inputRev": "v4.30.0-rc2", "inherited": true, "configFile": "lakefile.toml"}, {"url": "https://github.com/leanprover-community/quote4", "type": "git", "subDir": null, "scope": "leanprover-community", - "rev": "23324752757bf28124a518ec284044c8db79fee5", + "rev": "1cc7e819b9b9bc1e87c9edcccb62e0269e00a809", "name": "Qq", "manifestFile": "lake-manifest.json", - "inputRev": "master", + "inputRev": "v4.30.0-rc2", "inherited": true, "configFile": "lakefile.toml"}, {"url": "https://github.com/leanprover-community/batteries", "type": "git", "subDir": null, "scope": "leanprover-community", - "rev": "cabbb5a025bfbbc50af9184ed2dfdde6ea4f53a7", + "rev": "5c57f3857ba81924a88b2cdf4f062e34ec04ff11", "name": "batteries", "manifestFile": "lake-manifest.json", - "inputRev": "main", + "inputRev": "v4.30.0-rc2", "inherited": true, "configFile": "lakefile.toml"}, {"url": "https://github.com/leanprover/lean4-cli", "type": "git", "subDir": null, "scope": "leanprover", - "rev": "28e0856d4424863a85b18f38868c5420c55f9bae", + "rev": "13567aed1ac4f12aea9484178e07e51f8c9f7658", "name": "Cli", "manifestFile": "lake-manifest.json", - "inputRev": "v4.28.0-rc1", + "inputRev": "v4.30.0-rc2", "inherited": true, "configFile": "lakefile.toml"}], "name": "bignum", - "lakeDir": ".lake"} + "lakeDir": ".lake", + "fixedToolchain": false} diff --git a/lakefile.toml b/lakefile.toml index 3d0b732..d6f143e 100644 --- a/lakefile.toml +++ b/lakefile.toml @@ -1,7 +1,7 @@ name = "bignum" version = "0.1.0" keywords = ["math"] -defaultTargets = ["Bignum"] +defaultTargets = ["Bignum", "BignumTests"] [leanOptions] pp.unicode.fun = true # pretty-prints `fun a ↦ b` @@ -20,6 +20,11 @@ scope = "leanprover" [[lean_lib]] name = "Bignum" +globs = ["Bignum", "Bignum.**"] + +[[lean_lib]] +name = "BignumTests" +needs = ["Bignum"] [[lean_exe]] name = "sh" diff --git a/lean-toolchain b/lean-toolchain index 3e9b4e1..635bb95 100644 --- a/lean-toolchain +++ b/lean-toolchain @@ -1 +1 @@ -leanprover/lean4:v4.28.0-rc1 \ No newline at end of file +leanprover/lean4:v4.30.0-rc2 \ No newline at end of file diff --git a/s2n-bignum b/s2n-bignum index f51c822..c3ed931 160000 --- a/s2n-bignum +++ b/s2n-bignum @@ -1 +1 @@ -Subproject commit f51c82275cbbbfea35df80e2e1aa0b292f3c2187 +Subproject commit c3ed931ce9f873713c994db74a7772e06ec6c2c6