-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathl4-matmul.c
More file actions
36 lines (27 loc) · 1.08 KB
/
l4-matmul.c
File metadata and controls
36 lines (27 loc) · 1.08 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
#include <stddef.h>
#include <omp.h>
void matmul_blocked(const double* A, const double* B, double* C,
int M, int K, int N, int BS) {
// omp_set_num_threads(4);
#pragma omp parallel for collapse(2) schedule(static)
// 遍历 C 的块(输出块)
for (int ii = 0; ii < M; ii += BS) { // 行块
for (int jj = 0; jj < N; jj += BS) { // 列块
for (int kk = 0; kk < K; kk += BS) { // 累加维度块
// 处理边界(防止越界)
int i_max = (ii + BS < M) ? ii + BS : M;
int j_max = (jj + BS < N) ? jj + BS : N;
int k_max = (kk + BS < K) ? kk + BS : K;
for (int i = ii; i < i_max; i++) {
for (int j = jj; j < j_max; j++) {
double sum = C[i*N + j];
for (int k = kk; k < k_max; k++) {
sum += A[i*K + k] * B[k*N + j];
}
C[i*N + j] = sum;
}
}
}
}
}
}