@@ -469,6 +469,132 @@ inline void vx_barrier_wait(int barrier_id, int phase) {
469469
470470#ifdef __cplusplus
471471}
472+
473+ // CTA Block Index Proxy Structures
474+ // These allow blockIdx.x, blockIdx.y, blockIdx.z to be used directly
475+ // without function call syntax, reading from RISC-V CSRs automatically
476+
477+ #ifndef VX_CSR_CTA_X
478+ #define VX_CSR_CTA_X 0xCC6
479+ #endif
480+
481+ #ifndef VX_CSR_CTA_Y
482+ #define VX_CSR_CTA_Y 0xCC7
483+ #endif
484+
485+ #ifndef VX_CSR_CTA_Z
486+ #define VX_CSR_CTA_Z 0xCC8
487+ #endif
488+
489+ #ifndef VX_CSR_BLOCK_DIM_X
490+ #define VX_CSR_BLOCK_DIM_X 0xCCA
472491#endif
473492
493+ #ifndef VX_CSR_BLOCK_DIM_Y
494+ #define VX_CSR_BLOCK_DIM_Y 0xCCB
495+ #endif
496+
497+ #ifndef VX_CSR_BLOCK_DIM_Z
498+ #define VX_CSR_BLOCK_DIM_Z 0xCCC
499+ #endif
500+
501+ #ifndef VX_CSR_CTA_WARP_ID
502+ #define VX_CSR_CTA_WARP_ID 0xCCD
503+ #endif
504+
505+ // Proxy structure for blockIdx with x, y, z members
506+ struct BlockIdx {
507+ struct X {
508+ // Implicit conversion to unsigned int triggers the CSR read
509+ inline operator unsigned int () const {
510+ unsigned int val ;
511+ __asm__ __volatile__ ("csrr %0 , %1 " : "=r" (val ) : "i" (VX_CSR_CTA_X ));
512+ return val ;
513+ }
514+ } x ;
515+
516+ struct Y {
517+ inline operator unsigned int () const {
518+ unsigned int val ;
519+ __asm__ __volatile__ ("csrr %0 , %1 " : "=r" (val ) : "i" (VX_CSR_CTA_Y ));
520+ return val ;
521+ }
522+ } y ;
523+
524+ struct Z {
525+ inline operator unsigned int () const {
526+ unsigned int val ;
527+ __asm__ __volatile__ ("csrr %0 , %1 " : "=r" (val ) : "i" (VX_CSR_CTA_Z ));
528+ return val ;
529+ }
530+ } z ;
531+ };
532+
533+ // Create a global instance of blockIdx
534+ // Marking it static ensures no linker errors if included in multiple files.
535+ // The struct holds no actual data, so the compiler will optimize it away.
536+ static const BlockIdx blockIdx ;
537+
538+ // Proxy structure for blockDim with x, y, z members
539+ struct BlockDim {
540+ struct X {
541+ // Implicit conversion to unsigned int triggers the CSR read
542+ inline operator unsigned int () const {
543+ unsigned int val ;
544+ __asm__ __volatile__ ("csrr %0 , %1 " : "=r" (val ) : "i" (VX_CSR_BLOCK_DIM_X ));
545+ return val ;
546+ }
547+ } x ;
548+
549+ struct Y {
550+ inline operator unsigned int () const {
551+ unsigned int val ;
552+ __asm__ __volatile__ ("csrr %0 , %1 " : "=r" (val ) : "i" (VX_CSR_BLOCK_DIM_Y ));
553+ return val ;
554+ }
555+ } y ;
556+
557+ struct Z {
558+ inline operator unsigned int () const {
559+ unsigned int val ;
560+ __asm__ __volatile__ ("csrr %0 , %1 " : "=r" (val ) : "i" (VX_CSR_BLOCK_DIM_Z ));
561+ return val ;
562+ }
563+ } z ;
564+ };
565+
566+ // Create a global instance of blockDim
567+ // Marking it static ensures no linker errors if included in multiple files.
568+ // The struct holds no actual data, so the compiler will optimize it away.
569+ static const BlockDim blockDim ;
570+
571+ // Proxy structure for threadIdx with x, y, z members
572+ // threadIdx.x gives the flat thread index within the CTA:
573+ // warp_local_id * NUM_THREADS + thread_id_within_warp
574+ struct ThreadIdx {
575+ struct X {
576+ inline operator unsigned int () const {
577+ unsigned int warp_local_id ;
578+ __asm__ __volatile__ ("csrr %0 , %1 " : "=r" (warp_local_id ) : "i" (VX_CSR_CTA_WARP_ID ));
579+ return warp_local_id * vx_num_threads () + vx_thread_id ();
580+ }
581+ } x ;
582+
583+ struct Y {
584+ inline operator unsigned int () const {
585+ return 0 ;
586+ }
587+ } y ;
588+
589+ struct Z {
590+ inline operator unsigned int () const {
591+ return 0 ;
592+ }
593+ } z ;
594+ };
595+
596+ static const ThreadIdx threadIdx ;
597+
598+ #endif // __cplusplus
599+
474600#endif // __VX_INTRINSICS_H__
0 commit comments