From 3d7080c10eb3cf5c55be7d62b31532af098341fc Mon Sep 17 00:00:00 2001 From: Harald van Dijk Date: Sat, 6 Dec 2025 21:46:12 +0000 Subject: [PATCH] [AArch64] Treat NOP as a separate instruction. Previously, nop was treated as just an alias for hint #0. The consequence of that was that all the general rules for hint instructions applied to nop too, in particular that during binary analysis, they were assumed to have unknown effects. This commit adds AArch64::NOP as a standalone instruction with no side effects. The scheduling update in A55-load-store-alias.s is probably not entirely accurate, but should be more accurate than the previous result. --- .../Target/AArch64/AArch64MCPlusBuilder.cpp | 6 +-- .../Target/AArch64/AArch64A53Fix835769.cpp | 7 ++- llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp | 10 ++-- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 6 +-- llvm/lib/Target/AArch64/AArch64InstrInfo.td | 6 ++- .../AArch64/Cortex/A55-load-store-alias.s | 48 +++++++++---------- .../AArch64/Cortex/A55-load-store-noalias.s | 2 +- 7 files changed, 42 insertions(+), 43 deletions(-) diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp index af87d5c12b5ce..5881d3fba70f6 100644 --- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp +++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp @@ -1862,14 +1862,12 @@ class AArch64MCPlusBuilder : public MCPlusBuilder { } bool isNoop(const MCInst &Inst) const override { - return Inst.getOpcode() == AArch64::HINT && - Inst.getOperand(0).getImm() == 0; + return Inst.getOpcode() == AArch64::NOP; } void createNoop(MCInst &Inst) const override { - Inst.setOpcode(AArch64::HINT); + Inst.setOpcode(AArch64::NOP); Inst.clear(); - Inst.addOperand(MCOperand::createImm(0)); } bool isTrap(const MCInst &Inst) const override { diff --git a/llvm/lib/Target/AArch64/AArch64A53Fix835769.cpp b/llvm/lib/Target/AArch64/AArch64A53Fix835769.cpp index a51f63073403b..407714adfb8b8 100644 --- a/llvm/lib/Target/AArch64/AArch64A53Fix835769.cpp +++ b/llvm/lib/Target/AArch64/AArch64A53Fix835769.cpp @@ -178,11 +178,10 @@ static void insertNopBeforeInstruction(MachineBasicBlock &MBB, MachineInstr* MI, MachineInstr *I = getLastNonPseudo(MBB, TII); assert(I && "Expected instruction"); DebugLoc DL = I->getDebugLoc(); - BuildMI(I->getParent(), DL, TII->get(AArch64::HINT)).addImm(0); - } - else { + BuildMI(I->getParent(), DL, TII->get(AArch64::NOP)); + } else { DebugLoc DL = MI->getDebugLoc(); - BuildMI(MBB, MI, DL, TII->get(AArch64::HINT)).addImm(0); + BuildMI(MBB, MI, DL, TII->get(AArch64::NOP)); } ++NumNopsAdded; diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp index 8267414e78955..574316160fbcb 100644 --- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -472,7 +472,7 @@ void AArch64AsmPrinter::emitSled(const MachineInstr &MI, SledKind Kind) { EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::B).addImm(8)); for (int8_t I = 0; I < NoopsInSledCount; I++) - EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::HINT).addImm(0)); + EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::NOP)); OutStreamer->emitLabel(Target); recordSled(CurSled, MI, Kind, 2); @@ -1697,7 +1697,7 @@ void AArch64AsmPrinter::LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM, // Emit nops. for (unsigned i = 0; i < NumNOPBytes; i += 4) - EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::HINT).addImm(0)); + EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::NOP)); } // Lower a patchpoint of the form: @@ -1731,7 +1731,7 @@ void AArch64AsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM, assert((NumBytes - EncodedBytes) % 4 == 0 && "Invalid number of NOP bytes requested!"); for (unsigned i = EncodedBytes; i < NumBytes; i += 4) - EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::HINT).addImm(0)); + EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::NOP)); } void AArch64AsmPrinter::LowerSTATEPOINT(MCStreamer &OutStreamer, StackMaps &SM, @@ -1740,7 +1740,7 @@ void AArch64AsmPrinter::LowerSTATEPOINT(MCStreamer &OutStreamer, StackMaps &SM, if (unsigned PatchBytes = SOpers.getNumPatchBytes()) { assert(PatchBytes % 4 == 0 && "Invalid number of NOP bytes requested!"); for (unsigned i = 0; i < PatchBytes; i += 4) - EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::HINT).addImm(0)); + EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::NOP)); } else { // Lower call target and choose correct opcode const MachineOperand &CallTarget = SOpers.getCallTarget(); @@ -2122,7 +2122,7 @@ bool AArch64AsmPrinter::emitDeactivationSymbolRelocation(Value *DS) { if (isa(DS)) { // Just emit the nop directly. - EmitToStreamer(MCInstBuilder(AArch64::HINT).addImm(0)); + EmitToStreamer(MCInstBuilder(AArch64::NOP)); return true; } MCSymbol *Dot = OutContext.createTempSymbol(); diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 904577b8233d5..ac4c1acedf05c 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -6901,12 +6901,10 @@ bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, void AArch64InstrInfo::insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const { DebugLoc DL; - BuildMI(MBB, MI, DL, get(AArch64::HINT)).addImm(0); + BuildMI(MBB, MI, DL, get(AArch64::NOP)); } -MCInst AArch64InstrInfo::getNop() const { - return MCInstBuilder(AArch64::HINT).addImm(0); -} +MCInst AArch64InstrInfo::getNop() const { return MCInstBuilder(AArch64::NOP); } // AArch64 supports MachineCombiner. bool AArch64InstrInfo::useMachineCombiner() const { return true; } diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 64017d7cafca3..0cbe867497625 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -1544,7 +1544,6 @@ let hasSideEffects = 1, isCodeGenOnly = 1, isTerminator = 1, isBarrier = 1 in { //===----------------------------------------------------------------------===// def HINT : HintI<"hint">; -def : InstAlias<"nop", (HINT 0b000)>; def : InstAlias<"yield",(HINT 0b001)>; def : InstAlias<"wfe", (HINT 0b010)>; def : InstAlias<"wfi", (HINT 0b011)>; @@ -1554,6 +1553,11 @@ def : InstAlias<"dgh", (HINT 0b110)>; def : InstAlias<"esb", (HINT 0b10000)>, Requires<[HasRAS]>; def : InstAlias<"csdb", (HINT 20)>; +let CRm = 0b0000, hasSideEffects = 0 in +def NOP : SystemNoOperands<0b000, "hint\t#0">; + +def : InstAlias<"nop", (NOP)>; + let Predicates = [HasPCDPHINT] in { def STSHH: STSHHI; } diff --git a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-load-store-alias.s b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-load-store-alias.s index 3906975b41f6b..b0cfa5ff90cf1 100644 --- a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-load-store-alias.s +++ b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-load-store-alias.s @@ -12,12 +12,12 @@ ldr x3, [x10] # CHECK: Iterations: 3 # CHECK-NEXT: Instructions: 18 -# CHECK-NEXT: Total Cycles: 31 +# CHECK-NEXT: Total Cycles: 25 # CHECK-NEXT: Total uOps: 18 # CHECK: Dispatch Width: 2 -# CHECK-NEXT: uOps Per Cycle: 0.58 -# CHECK-NEXT: IPC: 0.58 +# CHECK-NEXT: uOps Per Cycle: 0.72 +# CHECK-NEXT: IPC: 0.72 # CHECK-NEXT: Block RThroughput: 3.0 # CHECK: Instruction Info: @@ -32,7 +32,7 @@ ldr x3, [x10] # CHECK-NEXT: 1 1 1.00 * str x1, [x10] # CHECK-NEXT: 1 1 1.00 * str x1, [x10] # CHECK-NEXT: 1 3 1.00 * ldr x2, [x10] -# CHECK-NEXT: 1 1 1.00 * * U nop +# CHECK-NEXT: 1 1 1.00 nop # CHECK-NEXT: 1 3 1.00 * ldr x2, [x10] # CHECK-NEXT: 1 3 1.00 * ldr x3, [x10] @@ -64,27 +64,27 @@ ldr x3, [x10] # CHECK-NEXT: - - - - - - - - - 1.00 - - ldr x3, [x10] # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 0 -# CHECK-NEXT: Index 0123456789 0123456789 +# CHECK-NEXT: 0123456789 +# CHECK-NEXT: Index 0123456789 01234 -# CHECK: [0,0] DE . . . . . . str x1, [x10] -# CHECK-NEXT: [0,1] .DE . . . . . . str x1, [x10] -# CHECK-NEXT: [0,2] . DeeE . . . . . ldr x2, [x10] -# CHECK-NEXT: [0,3] . DE . . . . . nop -# CHECK-NEXT: [0,4] . .DeeE. . . . . ldr x2, [x10] -# CHECK-NEXT: [0,5] . . DeeE . . . . ldr x3, [x10] -# CHECK-NEXT: [1,0] . . DE . . . . str x1, [x10] -# CHECK-NEXT: [1,1] . . .DE . . . . str x1, [x10] -# CHECK-NEXT: [1,2] . . . DeeE . . . ldr x2, [x10] -# CHECK-NEXT: [1,3] . . . DE . . . nop -# CHECK-NEXT: [1,4] . . . .DeeE. . . ldr x2, [x10] -# CHECK-NEXT: [1,5] . . . . DeeE . . ldr x3, [x10] -# CHECK-NEXT: [2,0] . . . . DE . . str x1, [x10] -# CHECK-NEXT: [2,1] . . . . .DE . . str x1, [x10] -# CHECK-NEXT: [2,2] . . . . . DeeE . ldr x2, [x10] -# CHECK-NEXT: [2,3] . . . . . DE . nop -# CHECK-NEXT: [2,4] . . . . . .DeeE. ldr x2, [x10] -# CHECK-NEXT: [2,5] . . . . . . DeeE ldr x3, [x10] +# CHECK: [0,0] DE . . . . . str x1, [x10] +# CHECK-NEXT: [0,1] .DE . . . . . str x1, [x10] +# CHECK-NEXT: [0,2] . DeeE . . . . ldr x2, [x10] +# CHECK-NEXT: [0,3] . DE . . . . nop +# CHECK-NEXT: [0,4] . DeeE . . . . ldr x2, [x10] +# CHECK-NEXT: [0,5] . DeeE . . . . ldr x3, [x10] +# CHECK-NEXT: [1,0] . . DE. . . . str x1, [x10] +# CHECK-NEXT: [1,1] . . DE . . . str x1, [x10] +# CHECK-NEXT: [1,2] . . DeeE . . . ldr x2, [x10] +# CHECK-NEXT: [1,3] . . . DE . . . nop +# CHECK-NEXT: [1,4] . . . DeeE . . ldr x2, [x10] +# CHECK-NEXT: [1,5] . . . DeeE . . ldr x3, [x10] +# CHECK-NEXT: [2,0] . . . .DE . . str x1, [x10] +# CHECK-NEXT: [2,1] . . . . DE . . str x1, [x10] +# CHECK-NEXT: [2,2] . . . . DeeE . ldr x2, [x10] +# CHECK-NEXT: [2,3] . . . . DE . nop +# CHECK-NEXT: [2,4] . . . . DeeE. ldr x2, [x10] +# CHECK-NEXT: [2,5] . . . . .DeeE ldr x3, [x10] # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions diff --git a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-load-store-noalias.s b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-load-store-noalias.s index dc681f4ce9479..c9704c58b627f 100644 --- a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-load-store-noalias.s +++ b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-load-store-noalias.s @@ -30,7 +30,7 @@ ldr x3, [x10] # CHECK-NEXT: 1 1 1.00 * str x1, [x10] # CHECK-NEXT: 1 1 1.00 * str x1, [x10] # CHECK-NEXT: 1 3 1.00 * ldr x2, [x10] -# CHECK-NEXT: 1 1 1.00 * * U nop +# CHECK-NEXT: 1 1 1.00 nop # CHECK-NEXT: 1 3 1.00 * ldr x2, [x10] # CHECK-NEXT: 1 3 1.00 * ldr x3, [x10]