Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -93,8 +93,8 @@ else()
# Check if the llvm SHA is consistent with llvm_version.txt
file(STRINGS ${LLVM_INCLUDE_DIR}/llvm/Support/VCSRevision.h REVISION_FILE_DATA)
message(DEBUG "VCSRevision: ${REVISION_FILE_DATA}")
string(REGEX MATCH "\"([^\"]*)\"" LLVM_SHA ${REVISION_FILE_DATA})
string(REPLACE "\"" "" LLVM_SHA ${LLVM_SHA})
string(REGEX MATCH "\\(([0-9a-fA-F]+)\\)" LLVM_SHA ${REVISION_FILE_DATA})
string(REPLACE "\"" "" LLVM_SHA ${CMAKE_MATCH_1}) # In CMake, CMAKE_MATCH_1 contains the first capture group
message(STATUS "llvm sha: \"${LLVM_SHA}\"")

if (IMEX_CHECK_LLVM_VERSION AND (NOT EXPECTED_LLVM_SHA STREQUAL LLVM_SHA))
Expand Down
2 changes: 1 addition & 1 deletion build_tools/llvm_version.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
50351218b3a4687079fe79f932aac0e00d5d990f
a8768c4838f67dfad0b3c1b2518e8521c9f6440f
398 changes: 25 additions & 373 deletions build_tools/patches/0001-Add-support-for-VectorAnyINTEL-capability.patch

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
From 8605245100a7e20cfd7fac73b28536e0f0d3c93b Mon Sep 17 00:00:00 2001
From 0560ef541b96e0e1e4b9e9597e52545efe06866c Mon Sep 17 00:00:00 2001
From: Garra1980 <igor.zamyatin@intel.com>
Date: Tue, 2 Dec 2025 19:04:17 +0100
Subject: [PATCH] Add serialization and de-serialization support for s
Date: Fri, 30 Jan 2026 20:27:27 +0100
Subject: [PATCH] Add-serialization and de serialization support for spirv

---
mlir/lib/Target/SPIRV/Deserialization/Deserializer.cpp | 6 ++++++
mlir/lib/Target/SPIRV/Serialization/Serializer.cpp | 6 ++++++
2 files changed, 12 insertions(+)

diff --git a/mlir/lib/Target/SPIRV/Deserialization/Deserializer.cpp b/mlir/lib/Target/SPIRV/Deserialization/Deserializer.cpp
index 50883d9ed5e7..c5da338f6339 100644
index 8d1f9c26fe59..922fefabe21c 100644
--- a/mlir/lib/Target/SPIRV/Deserialization/Deserializer.cpp
+++ b/mlir/lib/Target/SPIRV/Deserialization/Deserializer.cpp
@@ -283,6 +283,7 @@ LogicalResult spirv::Deserializer::processDecoration(ArrayRef<uint32_t> words) {
Expand All @@ -31,16 +31,16 @@ index 50883d9ed5e7..c5da338f6339 100644
if (words.size() != 2) {
return emitError(unknownLoc, "OpDecoration with ")
<< decorationName << "needs a single target <id>";
@@ -355,6 +360,7 @@ LogicalResult spirv::Deserializer::processDecoration(ArrayRef<uint32_t> words) {
break;
@@ -356,6 +361,7 @@ LogicalResult spirv::Deserializer::processDecoration(ArrayRef<uint32_t> words) {
case spirv::Decoration::Location:
case spirv::Decoration::SpecId:
case spirv::Decoration::Index:
+ case spirv::Decoration::FuncParamIOKindINTEL:
if (words.size() != 3) {
return emitError(unknownLoc, "OpDecoration with ")
<< decorationName << "needs a single integer literal";
diff --git a/mlir/lib/Target/SPIRV/Serialization/Serializer.cpp b/mlir/lib/Target/SPIRV/Serialization/Serializer.cpp
index c879a2b3e020..0ccaa72d319b 100644
index 840c9c990f9c..742bc4b7479c 100644
--- a/mlir/lib/Target/SPIRV/Serialization/Serializer.cpp
+++ b/mlir/lib/Target/SPIRV/Serialization/Serializer.cpp
@@ -335,8 +335,10 @@ LogicalResult Serializer::processDecorationAttr(Location loc, uint32_t resultID,
Expand All @@ -52,9 +52,9 @@ index c879a2b3e020..0ccaa72d319b 100644
case spirv::Decoration::DescriptorSet:
+ case spirv::Decoration::FuncParamIOKindINTEL:
case spirv::Decoration::Location:
case spirv::Decoration::Index:
if (auto intAttr = dyn_cast<IntegerAttr>(attr)) {
args.push_back(intAttr.getValue().getZExtValue());
@@ -374,6 +376,10 @@ LogicalResult Serializer::processDecorationAttr(Location loc, uint32_t resultID,
@@ -375,6 +377,10 @@ LogicalResult Serializer::processDecorationAttr(Location loc, uint32_t resultID,
case spirv::Decoration::Invariant:
case spirv::Decoration::Patch:
case spirv::Decoration::Coherent:
Expand Down
117 changes: 13 additions & 104 deletions build_tools/patches/wg_fa_support.patch
Original file line number Diff line number Diff line change
@@ -1,124 +1,33 @@
From 4342fab14630d1ee774ec78dea66fd78f200a3ad Mon Sep 17 00:00:00 2001
From 1556527c6b9b4a99438172118f4202a77731dcbc Mon Sep 17 00:00:00 2001
From: Garra1980 <igor.zamyatin@intel.com>
Date: Mon, 22 Dec 2025 16:40:20 +0100
Subject: [PATCH] wg_fa_support
Date: Tue, 20 Jan 2026 15:53:45 +0100
Subject: [PATCH] wg_fa

---
.../XeGPU/Transforms/XeGPUBlocking.cpp | 6 +-
.../XeGPU/Transforms/XeGPUBlocking.cpp | 3 +-
.../XeGPU/Transforms/XeGPUPropagateLayout.cpp | 59 +++++++++++++++++++
.../Transforms/XeGPUWgToSgDistribute.cpp | 12 ++--
3 files changed, 68 insertions(+), 9 deletions(-)
3 files changed, 67 insertions(+), 7 deletions(-)

diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp
index ba2753f517ce..dc4f05c0d914 100644
index 931834ba16d9..f59464a55d60 100644
--- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp
+++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp
@@ -228,7 +228,7 @@ XeGPUBlockingPass::getTileShape(Operation *op) const {
@@ -228,7 +228,8 @@ XeGPUBlockingPass::getTileShape(Operation *op) const {
if (isa<vector::MultiDimReductionOp>(op))
return getTileShape(op->getOpOperand(0));

- if (isa<vector::TransposeOp, vector::BroadcastOp>(op))
+ if (isa<vector::TransposeOp, vector::BroadcastOp, vector::ShapeCastOp>(op))
- if (isa<vector::TransposeOp, vector::BroadcastOp, vector::StepOp,
+ if (isa<vector::TransposeOp, vector::BroadcastOp,
+ vector::ShapeCastOp, vector::StepOp,
vector::ConstantMaskOp, vector::CreateMaskOp>(op))
return getTileShape(op->getOpResult(0));

return std::nullopt;
@@ -415,14 +415,14 @@ void XeGPUBlockingPass::runOnOperation() {
// Remove the layout attributes cached per operands.
for (OpOperand &opr : op->getOpOperands()) {
std::string name = xegpu::getTemporaryLayoutName(opr);
- if (op->hasAttrOfType<xegpu::LayoutAttr>(name))
+ if (op->hasAttrOfType<xegpu::DistributeLayoutAttr>(name))
op->removeAttr(name);
}

// Update the layout attributes per result.
for (OpResult result : op->getOpResults()) {
std::string name = xegpu::getTemporaryLayoutName(result);
- if (auto layout = op->getAttrOfType<xegpu::LayoutAttr>(name)) {
+ if (auto layout = op->getAttrOfType<xegpu::DistributeLayoutAttr>(name)) {
op->removeAttr(name);
if (!isa<LoopLikeOpInterface>(op))
xegpu::setDistributeLayoutAttr(result, layout.dropInstData());
diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp
index 7fc75e7294ea..94f31e653511 100644
--- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp
+++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp
@@ -1336,6 +1336,60 @@ static LogicalResult updateFunctionOpInterface(mlir::OpBuilder &builder,
return success();
}

+static LogicalResult resolveConflicts(Operation *op) {
+ auto r = op->walk([&](xegpu::LoadNdOp loadNdOp) -> WalkResult {
+ // Load op has a conflict if tensor desc layout is different from the its
+ // result layout.
+ auto getResultLayout = [](OpResult result) {
+ auto resultLayoutName = xegpu::getTemporaryLayoutName(result);
+ return result.getOwner()->getAttrOfType<xegpu::DistributeLayoutAttr>(
+ resultLayoutName);
+ };
+ auto hasConflict = [&getResultLayout](xegpu::LoadNdOp loadNdOp) -> bool {
+ auto tdescType = loadNdOp.getTensorDescType();
+ auto tdescLayout = tdescType.getLayout();
+ // auto resultLayoutName = xegpu::getTemporaryLayoutName(loadNdOp->getOpResult(0));
+ auto resultLayout = getResultLayout(loadNdOp->getOpResult(0));
+ return tdescLayout && resultLayout && tdescLayout != resultLayout;
+ };
+ if (hasConflict(loadNdOp)) {
+ OpBuilder builder(loadNdOp);
+ // Try to get the defining createNdDesc op.
+ auto createNdOp =
+ loadNdOp.getTensorDesc().getDefiningOp<xegpu::CreateNdDescOp>();
+ if (!createNdOp) {
+ DBGS() << "Failed to resolve LoadNdOp layout conflict: " << *loadNdOp
+ << "\n";
+ return WalkResult::interrupt();
+ }
+
+ builder.setInsertionPointAfter(createNdOp);
+ auto tdescType = loadNdOp.getTensorDescType();
+ auto expectedLayout = getResultLayout(loadNdOp->getOpResult(0));
+ auto newTensorDescType = xegpu::TensorDescType::get(
+ createNdOp.getContext(), tdescType.getShape(),
+ tdescType.getElementType(), tdescType.getEncoding(), expectedLayout);
+ auto newOp = xegpu::CreateNdDescOp::create(
+ builder, loadNdOp.getLoc(), newTensorDescType,
+ createNdOp->getOperands(), createNdOp->getAttrs());
+ // Replace only the conflicting uses of the createNdOp that can be
+ // resolved using the new layout.
+ createNdOp->replaceUsesWithIf(
+ ArrayRef<Value>(newOp.getResult()), [&](OpOperand &opnd) {
+ auto userLoadNdOp = dyn_cast<xegpu::LoadNdOp>(opnd.getOwner());
+ if (!userLoadNdOp)
+ return false;
+ auto resultLayout = getResultLayout(userLoadNdOp->getOpResult(0));
+ return hasConflict(userLoadNdOp) && resultLayout == expectedLayout;
+ });
+ }
+ return WalkResult::advance();
+ });
+ if (r.wasInterrupted())
+ return failure();
+ return success();
+}
+
namespace {
struct XeGPUPropagateLayoutPass final
: public xegpu::impl::XeGPUPropagateLayoutBase<XeGPUPropagateLayoutPass> {
@@ -1411,4 +1465,9 @@ void XeGPUPropagateLayoutPass::runOnOperation() {
signalPassFailure();
return;
}
+ if (failed(resolveConflicts(op))) {
+ DBGS() << "Failed to resolve layout conflicts after propagation.\n";
+ signalPassFailure();
+ return;
+ }
}
diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp
index 07572a495076..448d78f4dc4f 100644
index 8328c2797be4..679f11bd1d43 100644
--- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp
+++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp
@@ -1270,13 +1270,13 @@ struct WgToSgVectorTransposeOp
@@ -1582,13 +1582,13 @@ struct WgToSgVectorTransposeOp
SmallVector<int64_t> sourceSgLayout =
sourceLayout.getEffectiveSgLayoutAsInt();
SmallVector<int64_t> resultSgLayout = layout.getEffectiveSgLayoutAsInt();
Expand Down
2 changes: 1 addition & 1 deletion include/imex/Dialect/Region/IR/RegionOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ class Region_Op<string mnemonic, list<Trait> traits = []> :
//===----------------------------------------------------------------------===//

def EnvironmentRegionOp : Region_Op<"env_region", [
DeclareOpInterfaceMethods<RegionBranchOpInterface>,
DeclareOpInterfaceMethods<RegionBranchOpInterface, ["getSuccessorRegions", "getSuccessorInputs"]>,
SingleBlockImplicitTerminator<"::imex::region::EnvironmentRegionYieldOp">,
RecursiveMemoryEffects]> {
let summary = "Operation that executes its region with a specific environment";
Expand Down
16 changes: 15 additions & 1 deletion lib/Dialect/Region/IR/RegionOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,21 @@ void EnvironmentRegionOp::getSuccessorRegions(
}

// Otherwise, the region branches back to the parent operation.
regions.push_back(::mlir::RegionSuccessor(getOperation(), getResults()));
regions.push_back(::mlir::RegionSuccessor::parent());
}

// Legacy compatibility method with old signature
void EnvironmentRegionOp::getSuccessorRegions(
::mlir::Region &region,
::mlir::SmallVectorImpl<::mlir::RegionSuccessor> &regions) {
// The region is the body of the operation, so it branches back to parent
regions.push_back(::mlir::RegionSuccessor::parent());
}

::mlir::ValueRange
EnvironmentRegionOp::getSuccessorInputs(::mlir::RegionSuccessor successor) {
return successor.isParent() ? getOperation()->getResults()
: ::mlir::ValueRange();
}

void EnvironmentRegionOp::inlineIntoParent(::mlir::PatternRewriter &builder,
Expand Down
6 changes: 3 additions & 3 deletions lib/Dialect/XeTile/Transforms/Blocking.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1336,8 +1336,8 @@ class RewriteRegionBranchOp
// the region iter arguments will be used as the anchor if it is a loop,
// otherwise, the op results will be used as the anchor.
// TODO: is it safe to assume that first is always the entry successor?
auto anchor =
iface.hasLoop() ? successors[0].getSuccessorInputs() : op->getResults();
auto anchor = iface.hasLoop() ? iface.getSuccessorInputs(successors[0])
: op->getResults();

// Collect blockSZ for each value and check whether they need a rewrite
bool toChange = false;
Expand Down Expand Up @@ -1394,7 +1394,7 @@ class RewriteRegionBranchOp
{ // convert the region arguments for loops
if (iface.hasLoop()) {
rewriter.setInsertionPointToStart(&r->front());
auto arguments = llvm::to_vector(s.getSuccessorInputs());
auto arguments = llvm::to_vector(iface.getSuccessorInputs(s));
convertOperandsOrResults(
llvm::ArrayRef<Value>(arguments), blockSZs,
[&](int64_t i, Value arg, ShapedType type,
Expand Down
6 changes: 4 additions & 2 deletions lib/Transforms/VnniTransformation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -423,6 +423,8 @@ static void handleBranchTerminatorOpInterface(
if (!mlir::isa<mlir::RegionBranchOpInterface>(terminator->getParentOp()))
return;

auto iface =
mlir::cast<mlir::RegionBranchOpInterface>(terminator->getParentOp());
llvm::SmallVector<mlir::RegionSuccessor> successors;
llvm::SmallVector<mlir::Attribute> operands(terminator->getNumOperands(),
nullptr);
Expand All @@ -433,7 +435,7 @@ static void handleBranchTerminatorOpInterface(
continue;

mlir::OperandRange operands = terminator.getSuccessorOperands(successor);
mlir::ValueRange inputs = successor.getSuccessorInputs();
mlir::ValueRange inputs = iface.getSuccessorInputs(successor);
for (auto [arg, inp] : llvm::zip(operands, inputs)) {
if (analysis.getLayout(arg)) {
auto vecTy = mlir::cast<mlir::VectorType>(arg.getType());
Expand Down Expand Up @@ -461,7 +463,7 @@ static void handleBranchOpInterface(mlir::OpBuilder &builder,
continue;

mlir::OperandRange operands = branch.getEntrySuccessorOperands(successor);
mlir::ValueRange inputs = successor.getSuccessorInputs();
mlir::ValueRange inputs = branch.getSuccessorInputs(successor);

for (auto [arg, input] : llvm::zip(operands, inputs)) {
if (analysis.getLayout(input)) {
Expand Down
2 changes: 1 addition & 1 deletion test/Integration/Dialect/XeGPU/SG/xegpu-to-llvm.pp
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
xevm-attach-target
gpu.module(
xegpu-propagate-layout{layout-kind="lane"}
xegpu-optimize-block-loads
xegpu-optimize-peephole
canonicalize
cse
xegpu-propagate-layout{layout-kind="lane"}
Expand Down
2 changes: 1 addition & 1 deletion test/Integration/Dialect/XeGPU/WG/xegpu-to-llvm.pp
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
canonicalize
cse
xegpu-propagate-layout{layout-kind="lane"}
xegpu-optimize-block-loads
xegpu-optimize-peephole
canonicalize
cse
xegpu-propagate-layout{layout-kind="lane"}
Expand Down
Loading