diff --git a/examples/a2a3/tensormap_and_ringbuffer/paged_attention_unroll_manual_scope/kernels/aic/aic_pv_matmul.cpp b/examples/a2a3/tensormap_and_ringbuffer/paged_attention_unroll_manual_scope/kernels/aic/aic_pv_matmul.cpp index 07534e0df..8befa5c51 100644 --- a/examples/a2a3/tensormap_and_ringbuffer/paged_attention_unroll_manual_scope/kernels/aic/aic_pv_matmul.cpp +++ b/examples/a2a3/tensormap_and_ringbuffer/paged_attention_unroll_manual_scope/kernels/aic/aic_pv_matmul.cpp @@ -104,7 +104,7 @@ static __aicore__ void pv_matmul_n_impl( // Stage 1: TLOAD (MTE2: GM → L1[cur]) // Wait for MTE1 to release L1[cur] (reverse dep from previous iteration) - wait_flag(PIPE_MTE1, PIPE_MTE2, (event_t)cur); + wait_flag(PIPE_MTE1, PIPE_MTE2, static_cast<::event_t>(cur)); TLOAD(aMatTile[cur], pijGlobal); set_flag(PIPE_MTE2, PIPE_MTE1, EVENT_ID0); // forward: A in L1 ready TLOAD(bMatTile[cur], vjGlobal); @@ -112,22 +112,22 @@ static __aicore__ void pv_matmul_n_impl( // Stage 2: TMOV (MTE1: L1[cur] → L0[cur]) // Wait for M-pipe to release L0[cur] (reverse dep from previous iteration) - wait_flag(PIPE_M, PIPE_MTE1, (event_t)cur); + wait_flag(PIPE_M, PIPE_MTE1, static_cast<::event_t>(cur)); wait_flag(PIPE_MTE2, PIPE_MTE1, EVENT_ID0); // forward: wait A loaded TMOV(aTile[cur], aMatTile[cur]); wait_flag(PIPE_MTE2, PIPE_MTE1, EVENT_ID1); // forward: wait B loaded TMOV(bTile[cur], bMatTile[cur]); - set_flag(PIPE_MTE1, PIPE_MTE2, (event_t)cur); // reverse: release L1[cur] + set_flag(PIPE_MTE1, PIPE_MTE2, static_cast<::event_t>(cur)); // reverse: release L1[cur] // Stage 3: TMATMUL (M-pipe: L0A[cur] × L0B[cur] → L0C) - set_flag(PIPE_MTE1, PIPE_M, (event_t)cur); // forward: L0[cur] ready - wait_flag(PIPE_MTE1, PIPE_M, (event_t)cur); + set_flag(PIPE_MTE1, PIPE_M, static_cast<::event_t>(cur)); // forward: L0[cur] ready + wait_flag(PIPE_MTE1, PIPE_M, static_cast<::event_t>(cur)); if (i == 0) { TMATMUL(cTile, aTile[cur], bTile[cur]); } else { TMATMUL_ACC(cTile, cTile, aTile[cur], bTile[cur]); } - set_flag(PIPE_M, PIPE_MTE1, (event_t)cur); // reverse: release L0[cur] + set_flag(PIPE_M, PIPE_MTE1, static_cast<::event_t>(cur)); // reverse: release L0[cur] } // Drain outstanding reverse-dependency flags diff --git a/examples/a5/tensormap_and_ringbuffer/paged_attention_unroll_manual_scope/kernels/aic/aic_pv_matmul.cpp b/examples/a5/tensormap_and_ringbuffer/paged_attention_unroll_manual_scope/kernels/aic/aic_pv_matmul.cpp index 3628fe949..3644a5362 100644 --- a/examples/a5/tensormap_and_ringbuffer/paged_attention_unroll_manual_scope/kernels/aic/aic_pv_matmul.cpp +++ b/examples/a5/tensormap_and_ringbuffer/paged_attention_unroll_manual_scope/kernels/aic/aic_pv_matmul.cpp @@ -105,7 +105,7 @@ static __aicore__ void pv_matmul_n_impl( // Stage 1: TLOAD (MTE2: GM → L1[cur]) // Wait for MTE1 to release L1[cur] (reverse dep from previous iteration) - wait_flag(PIPE_MTE1, PIPE_MTE2, (event_t)cur); + wait_flag(PIPE_MTE1, PIPE_MTE2, static_cast<::event_t>(cur)); TLOAD(aMatTile[cur], pijGlobal); set_flag(PIPE_MTE2, PIPE_MTE1, EVENT_ID0); // forward: A in L1 ready TLOAD(bMatTile[cur], vjGlobal); @@ -113,22 +113,22 @@ static __aicore__ void pv_matmul_n_impl( // Stage 2: TMOV (MTE1: L1[cur] → L0[cur]) // Wait for M-pipe to release L0[cur] (reverse dep from previous iteration) - wait_flag(PIPE_M, PIPE_MTE1, (event_t)cur); + wait_flag(PIPE_M, PIPE_MTE1, static_cast<::event_t>(cur)); wait_flag(PIPE_MTE2, PIPE_MTE1, EVENT_ID0); // forward: wait A loaded TMOV(aTile[cur], aMatTile[cur]); wait_flag(PIPE_MTE2, PIPE_MTE1, EVENT_ID1); // forward: wait B loaded TMOV(bTile[cur], bMatTile[cur]); - set_flag(PIPE_MTE1, PIPE_MTE2, (event_t)cur); // reverse: release L1[cur] + set_flag(PIPE_MTE1, PIPE_MTE2, static_cast<::event_t>(cur)); // reverse: release L1[cur] // Stage 3: TMATMUL (M-pipe: L0A[cur] × L0B[cur] → L0C) - set_flag(PIPE_MTE1, PIPE_M, (event_t)cur); // forward: L0[cur] ready - wait_flag(PIPE_MTE1, PIPE_M, (event_t)cur); + set_flag(PIPE_MTE1, PIPE_M, static_cast<::event_t>(cur)); // forward: L0[cur] ready + wait_flag(PIPE_MTE1, PIPE_M, static_cast<::event_t>(cur)); if (i == 0) { TMATMUL(cTile, aTile[cur], bTile[cur]); } else { TMATMUL_ACC(cTile, cTile, aTile[cur], bTile[cur]); } - set_flag(PIPE_M, PIPE_MTE1, (event_t)cur); // reverse: release L0[cur] + set_flag(PIPE_M, PIPE_MTE1, static_cast<::event_t>(cur)); // reverse: release L0[cur] } // Drain outstanding reverse-dependency flags diff --git a/tests/st/a2a3/tensormap_and_ringbuffer/paged_attention_unroll/kernels/aic/aic_pv_matmul.cpp b/tests/st/a2a3/tensormap_and_ringbuffer/paged_attention_unroll/kernels/aic/aic_pv_matmul.cpp index 110e74d9d..ec55f0377 100644 --- a/tests/st/a2a3/tensormap_and_ringbuffer/paged_attention_unroll/kernels/aic/aic_pv_matmul.cpp +++ b/tests/st/a2a3/tensormap_and_ringbuffer/paged_attention_unroll/kernels/aic/aic_pv_matmul.cpp @@ -106,7 +106,7 @@ static __aicore__ void pv_matmul_n_impl( // Stage 1: TLOAD (MTE2: GM → L1[cur]) // Wait for MTE1 to release L1[cur] (reverse dep from previous iteration) - wait_flag(PIPE_MTE1, PIPE_MTE2, (event_t)cur); + wait_flag(PIPE_MTE1, PIPE_MTE2, static_cast<::event_t>(cur)); TLOAD(aMatTile[cur], pijGlobal); set_flag(PIPE_MTE2, PIPE_MTE1, EVENT_ID0); // forward: A in L1 ready TLOAD(bMatTile[cur], vjGlobal); @@ -114,22 +114,22 @@ static __aicore__ void pv_matmul_n_impl( // Stage 2: TMOV (MTE1: L1[cur] → L0[cur]) // Wait for M-pipe to release L0[cur] (reverse dep from previous iteration) - wait_flag(PIPE_M, PIPE_MTE1, (event_t)cur); + wait_flag(PIPE_M, PIPE_MTE1, static_cast<::event_t>(cur)); wait_flag(PIPE_MTE2, PIPE_MTE1, EVENT_ID0); // forward: wait A loaded TMOV(aTile[cur], aMatTile[cur]); wait_flag(PIPE_MTE2, PIPE_MTE1, EVENT_ID1); // forward: wait B loaded TMOV(bTile[cur], bMatTile[cur]); - set_flag(PIPE_MTE1, PIPE_MTE2, (event_t)cur); // reverse: release L1[cur] + set_flag(PIPE_MTE1, PIPE_MTE2, static_cast<::event_t>(cur)); // reverse: release L1[cur] // Stage 3: TMATMUL (M-pipe: L0A[cur] × L0B[cur] → L0C) - set_flag(PIPE_MTE1, PIPE_M, (event_t)cur); // forward: L0[cur] ready - wait_flag(PIPE_MTE1, PIPE_M, (event_t)cur); + set_flag(PIPE_MTE1, PIPE_M, static_cast<::event_t>(cur)); // forward: L0[cur] ready + wait_flag(PIPE_MTE1, PIPE_M, static_cast<::event_t>(cur)); if (i == 0) { TMATMUL(cTile, aTile[cur], bTile[cur]); } else { TMATMUL_ACC(cTile, cTile, aTile[cur], bTile[cur]); } - set_flag(PIPE_M, PIPE_MTE1, (event_t)cur); // reverse: release L0[cur] + set_flag(PIPE_M, PIPE_MTE1, static_cast<::event_t>(cur)); // reverse: release L0[cur] } // Drain outstanding reverse-dependency flags diff --git a/tests/st/a2a3/tensormap_and_ringbuffer/paged_attention_unroll_4dims/kernels/aic/aic_pv_matmul.cpp b/tests/st/a2a3/tensormap_and_ringbuffer/paged_attention_unroll_4dims/kernels/aic/aic_pv_matmul.cpp index 64891baa1..779a986c3 100644 --- a/tests/st/a2a3/tensormap_and_ringbuffer/paged_attention_unroll_4dims/kernels/aic/aic_pv_matmul.cpp +++ b/tests/st/a2a3/tensormap_and_ringbuffer/paged_attention_unroll_4dims/kernels/aic/aic_pv_matmul.cpp @@ -113,7 +113,7 @@ static __aicore__ void pv_matmul_n_impl( // Stage 1: TLOAD (MTE2: GM → L1[cur]) // Wait for MTE1 to release L1[cur] (reverse dep from previous iteration) - wait_flag(PIPE_MTE1, PIPE_MTE2, (event_t)cur); + wait_flag(PIPE_MTE1, PIPE_MTE2, static_cast<::event_t>(cur)); TLOAD(aMatTile[cur], pijGlobal); set_flag(PIPE_MTE2, PIPE_MTE1, EVENT_ID0); // forward: A in L1 ready TLOAD(bMatTile[cur], vjGlobal); @@ -121,22 +121,22 @@ static __aicore__ void pv_matmul_n_impl( // Stage 2: TMOV (MTE1: L1[cur] → L0[cur]) // Wait for M-pipe to release L0[cur] (reverse dep from previous iteration) - wait_flag(PIPE_M, PIPE_MTE1, (event_t)cur); + wait_flag(PIPE_M, PIPE_MTE1, static_cast<::event_t>(cur)); wait_flag(PIPE_MTE2, PIPE_MTE1, EVENT_ID0); // forward: wait A loaded TMOV(aTile[cur], aMatTile[cur]); wait_flag(PIPE_MTE2, PIPE_MTE1, EVENT_ID1); // forward: wait B loaded TMOV(bTile[cur], bMatTile[cur]); - set_flag(PIPE_MTE1, PIPE_MTE2, (event_t)cur); // reverse: release L1[cur] + set_flag(PIPE_MTE1, PIPE_MTE2, static_cast<::event_t>(cur)); // reverse: release L1[cur] // Stage 3: TMATMUL (M-pipe: L0A[cur] × L0B[cur] → L0C) - set_flag(PIPE_MTE1, PIPE_M, (event_t)cur); // forward: L0[cur] ready - wait_flag(PIPE_MTE1, PIPE_M, (event_t)cur); + set_flag(PIPE_MTE1, PIPE_M, static_cast<::event_t>(cur)); // forward: L0[cur] ready + wait_flag(PIPE_MTE1, PIPE_M, static_cast<::event_t>(cur)); if (i == 0) { TMATMUL(cTile, aTile[cur], bTile[cur]); } else { TMATMUL_ACC(cTile, cTile, aTile[cur], bTile[cur]); } - set_flag(PIPE_M, PIPE_MTE1, (event_t)cur); // reverse: release L0[cur] + set_flag(PIPE_M, PIPE_MTE1, static_cast<::event_t>(cur)); // reverse: release L0[cur] } // Drain outstanding reverse-dependency flags diff --git a/tests/st/a5/tensormap_and_ringbuffer/paged_attention_unroll/kernels/aic/aic_pv_matmul.cpp b/tests/st/a5/tensormap_and_ringbuffer/paged_attention_unroll/kernels/aic/aic_pv_matmul.cpp index bda8a9bc0..b32165d9a 100644 --- a/tests/st/a5/tensormap_and_ringbuffer/paged_attention_unroll/kernels/aic/aic_pv_matmul.cpp +++ b/tests/st/a5/tensormap_and_ringbuffer/paged_attention_unroll/kernels/aic/aic_pv_matmul.cpp @@ -105,7 +105,7 @@ static __aicore__ void pv_matmul_n_impl( // Stage 1: TLOAD (MTE2: GM → L1[cur]) // Wait for MTE1 to release L1[cur] (reverse dep from previous iteration) - wait_flag(PIPE_MTE1, PIPE_MTE2, (event_t)cur); + wait_flag(PIPE_MTE1, PIPE_MTE2, static_cast<::event_t>(cur)); TLOAD(aMatTile[cur], pijGlobal); set_flag(PIPE_MTE2, PIPE_MTE1, EVENT_ID0); // forward: A in L1 ready TLOAD(bMatTile[cur], vjGlobal); @@ -113,22 +113,22 @@ static __aicore__ void pv_matmul_n_impl( // Stage 2: TMOV (MTE1: L1[cur] → L0[cur]) // Wait for M-pipe to release L0[cur] (reverse dep from previous iteration) - wait_flag(PIPE_M, PIPE_MTE1, (event_t)cur); + wait_flag(PIPE_M, PIPE_MTE1, static_cast<::event_t>(cur)); wait_flag(PIPE_MTE2, PIPE_MTE1, EVENT_ID0); // forward: wait A loaded TMOV(aTile[cur], aMatTile[cur]); wait_flag(PIPE_MTE2, PIPE_MTE1, EVENT_ID1); // forward: wait B loaded TMOV(bTile[cur], bMatTile[cur]); - set_flag(PIPE_MTE1, PIPE_MTE2, (event_t)cur); // reverse: release L1[cur] + set_flag(PIPE_MTE1, PIPE_MTE2, static_cast<::event_t>(cur)); // reverse: release L1[cur] // Stage 3: TMATMUL (M-pipe: L0A[cur] × L0B[cur] → L0C) - set_flag(PIPE_MTE1, PIPE_M, (event_t)cur); // forward: L0[cur] ready - wait_flag(PIPE_MTE1, PIPE_M, (event_t)cur); + set_flag(PIPE_MTE1, PIPE_M, static_cast<::event_t>(cur)); // forward: L0[cur] ready + wait_flag(PIPE_MTE1, PIPE_M, static_cast<::event_t>(cur)); if (i == 0) { TMATMUL(cTile, aTile[cur], bTile[cur]); } else { TMATMUL_ACC(cTile, cTile, aTile[cur], bTile[cur]); } - set_flag(PIPE_M, PIPE_MTE1, (event_t)cur); // reverse: release L0[cur] + set_flag(PIPE_M, PIPE_MTE1, static_cast<::event_t>(cur)); // reverse: release L0[cur] } // Drain outstanding reverse-dependency flags