Skip to content

Commit fc6d2fd

Browse files
committed
Merge branch 'tilereduce' of https://github.com/arhik/cuTile.jl into tilereduce
2 parents ff8b77b + dfffe98 commit fc6d2fd

4 files changed

Lines changed: 59 additions & 2 deletions

File tree

.github/workflows/CI.yml

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
name: CI
2+
3+
on:
4+
push:
5+
branches:
6+
- main
7+
pull_request:
8+
9+
jobs:
10+
test:
11+
name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }}
12+
runs-on: ${{ matrix.runner }}
13+
timeout-minutes: 15
14+
strategy:
15+
fail-fast: false
16+
matrix:
17+
version:
18+
- '1.11'
19+
- '1.12'
20+
os:
21+
- Linux
22+
- Windows
23+
- macOS
24+
arch:
25+
- x64
26+
- aarch64
27+
exclude:
28+
- os: Windows
29+
arch: aarch64
30+
- os: macOS
31+
arch: x64
32+
include:
33+
- os: Linux
34+
arch: x64
35+
runner: ubuntu-latest
36+
- os: Linux
37+
arch: aarch64
38+
runner: ubuntu-24.04-arm
39+
- os: Windows
40+
arch: x64
41+
runner: windows-latest
42+
- os: macOS
43+
arch: aarch64
44+
runner: macos-latest
45+
steps:
46+
- uses: actions/checkout@v4
47+
- uses: julia-actions/setup-julia@v2
48+
with:
49+
version: ${{ matrix.version }}
50+
arch: ${{ matrix.arch }}
51+
- uses: julia-actions/cache@v2
52+
- uses: julia-actions/julia-buildpkg@v1
53+
- uses: julia-actions/julia-runtest@v1
54+
with:
55+
test_args: '--quickfail'

Project.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ IRStructurizer = {path = "IRStructurizer"}
1818
CUDAExt = "CUDA"
1919

2020
[compat]
21+
julia = "1.11"
2122
CUDA_Compiler_jll = "0.4"
2223
CUDA_Tile_jll = "13.1"
2324

src/language/operations.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,7 @@ Indices are 1-indexed. Index tiles are broadcast to a common shape.
235235
stride1 = broadcast_to(stride1_0d, S)
236236

237237
# Compute linear index = idx0 * stride0 + idx1 * stride1
238-
linear_idx = idx0_i32 * stride0 + idx1_i32 * stride1
238+
linear_idx = idx0_i32 .* stride0 + idx1_i32 .* stride1
239239

240240
# Compute pointer tile
241241
ptr_tile = Intrinsics.offset(array.ptr, linear_idx)
@@ -317,7 +317,7 @@ Indices are 1-indexed. Index tiles and value tile must broadcast to same shape.
317317
stride1 = broadcast_to(stride1_0d, S)
318318

319319
# Compute linear index = idx0 * stride0 + idx1 * stride1
320-
linear_idx = idx0_i32 * stride0 + idx1_i32 * stride1
320+
linear_idx = idx0_i32 .* stride0 + idx1_i32 .* stride1
321321

322322
# Compute pointer tile
323323
ptr_tile = Intrinsics.offset(array.ptr, linear_idx)

test/Project.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
[deps]
2+
cuTile = "0dea8319-8c4a-4662-a73d-20234d115b9a"
23
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
34
FFTW = "7a1cc6ca-52ef-59f5-83cd-3a7055c09341"
45
FileCheck = "4e644321-382b-4b05-b0b6-5d23c3d944fb"

0 commit comments

Comments
 (0)