Skip to content

Commit f6fb22d

Browse files
committed
Update with latest development
1 parent a555c60 commit f6fb22d

491 files changed

Lines changed: 161927 additions & 1608 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,5 @@ thirdparty
77
include/record/wrap_defines.h
88
.clang-format
99
*.log
10-
*.err
10+
*.err
11+
.DS_Store

CMakeLists.txt

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ cmake_minimum_required(VERSION 3.8)
22
project(JSIToolkit)
33

44
option(USE_ROCM "Enable ROCM tracing & profiling" OFF)
5+
option(USE_MATRIX "Enable MATRIX tracing & profiling" OFF)
56
option(USE_LIBUNWIND "Enable Libunwind backtracing" ON)
67
option(USE_ALLOCA "Enable GNU C extension alloca" ON)
78

@@ -12,10 +13,12 @@ endif()
1213

1314
set(CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake;${CMAKE_MODULE_PATH}")
1415

16+
set(CMAKE_CXX_FLAGS_DEBUG "-fsanitize=address -fsanitize=undefined -fsanitize=leak ${CMAKE_CXX_FLAGS_DEBUG} -Wall")
17+
1518
set(CMAKE_CXX_STANDARD 20)
1619
set(CMAKE_CXX_STANDARD_REQUIRED ON)
17-
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -std=c++20")
18-
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -g")
20+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -gdwarf-4")
21+
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -gdwarf-4")
1922
try_compile(HAS_FS "${CMAKE_BINARY_DIR}/temp"
2023
"${CMAKE_SOURCE_DIR}/cmake/tests/has_filesystem.cc"
2124
CMAKE_FLAGS -DCMAKE_CXX_STANDARD=17 -DCMAKE_CXX_STANDARD_REQUIRED=ON
@@ -92,6 +95,7 @@ find_package (LibElf REQUIRED)
9295
find_package (LibDwarf REQUIRED)
9396
find_package(OpenMP REQUIRED)
9497
find_package(Boost REQUIRED)
98+
find_package(LLVMOpenMP REQUIRED)
9599

96100
set(ENV{PKG_CONFIG_PATH} $prefix)
97101
find_package(PkgConfig)
@@ -100,6 +104,7 @@ pkg_search_module(sqlite REQUIRED sqlite3)
100104
include_directories(${sqlite_INCLUDE_DIRS})
101105
link_directories(${sqlite_LIBRARY_DIRS})
102106

107+
add_definitions(-DFMT_HEADER_ONLY)
103108

104109
include_directories(SYSTEM ${Boost_INCLUDE_DIRS})
105110

@@ -120,6 +125,18 @@ endif ()
120125

121126
include_directories(./lib/pse/include/)
122127

128+
# find_package(fmt CONFIG REQUIRED)
129+
# get_target_property(fmt_INCLUDE_DIRS fmt::fmt INTERFACE_INCLUDE_DIRECTORIES)
130+
# message(INFO "fmt_INCLUDE_DIRS: ${fmt_INCLUDE_DIRS}")
131+
# link_libraries(fmt::fmt)
132+
# include_directories(${fmt_INCLUDE_DIRS})
133+
134+
# find_package(spdlog CONFIG REQUIRED)
135+
# link_libraries(spdlog::spdlog)
136+
# get_target_property(spdlog_INCLUDE_DIRS spdlog::spdlog INTERFACE_INCLUDE_DIRECTORIES)
137+
# message(INFO "spdlog_INCLUDE_DIRS: ${spdlog_INCLUDE_DIRS}")
138+
# include_directories(${spdlog_INCLUDE_DIRS})
139+
123140
add_subdirectory(lib)
124141
add_subdirectory(tool)
125142

README.md

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,13 +25,23 @@ For convenience, you can build and install the dependencies via spack:
2525
```
2626
git clone https://github.com/spack/spack.git
2727
. ./spack/share/spack/setup-env.sh
28-
spack install dyninst papi libunwind otf2
28+
spack install dyninst libunwind otf2
2929
spack install boost@1.86.0 sqlite@3.40.1 range-v3@0.12.0 fmt@10.2.1 spdlog@1.14.1 magic-enum@0.9.6
30-
spack load dyninst papi libunwind otf2
30+
spack load dyninst libunwind otf2
3131
spack load boost@1.86.0 sqlite@3.40.1 range-v3@0.12.0 fmt@10.2.1 spdlog@1.14.1 magic-enum@0.9.6
3232
cd ..
3333
```
3434

35+
If original PAPI is enough, you can build and install PAPI via spack:
36+
```
37+
spack install papi
38+
spack load papi
39+
```
40+
Otherwise use your custom version of PAPI and let JSI-Toolkit know its location:
41+
```
42+
export PAPI_PATH=<path-to-papi>
43+
```
44+
3545
#### Install of libdwarf
3646

3747
Libdwarf need to be installed manually (e.g., version 0.11.1). You can get instructions from: https://www.prevanders.net/dwarf.html#releases
@@ -44,6 +54,7 @@ cd build
4454
make -j
4555
make install
4656
export PKG_CONFIG_PATH=<path-to-libdwarf>/lib/pkgconfig:$PKG_CONFIG_PATH
57+
export CMAKE_PREFIX_PATH=<path-to-libdwarf>:$CMAKE_PREFIX_PATH
4758
```
4859

4960
### Build & Install

build.sh

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
#!/bin/bash
22
mkdir -p build && cd build
33
# ARCH_HYGON_C86 only valid for ARCH_HYGON_C86 platform.
4-
# cmake -DCMAKE_INSTALL_PREFIX=`pwd`/../install -DARCH_HYGON_C86=ON -DUSE_ROCM=ON -DCMAKE_BUILD_TYPE=RelWithDebInfo .. -DCMAKE_EXPORT_COMPILE_COMMANDS=ON
4+
cmake -DCMAKE_INSTALL_PREFIX=`pwd`/../install -DARCH_HYGON_C86=ON -DUSE_ROCM=ON -DCMAKE_BUILD_TYPE=RelWithDebInfo .. -DCMAKE_EXPORT_COMPILE_COMMANDS=ON
55
# CPU-only
6-
cmake -DCMAKE_INSTALL_PREFIX=`pwd`/../install -DCMAKE_BUILD_TYPE=RelWithDebInfo .. -DCMAKE_EXPORT_COMPILE_COMMANDS=ON
6+
# cmake -DCMAKE_INSTALL_PREFIX=`pwd`/../install -DCMAKE_BUILD_TYPE=RelWithDebInfo .. -DCMAKE_EXPORT_COMPILE_COMMANDS=ON
7+
# cmake -DCMAKE_INSTALL_PREFIX=`pwd`/../install -DUSE_MATRIX=ON -DCMAKE_BUILD_TYPE=RelWithDebInfo .. -DCMAKE_EXPORT_COMPILE_COMMANDS=ON
78

89
make -j install

clean_build.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,4 @@
11
rm -rf build; rm -rf install; cd include/record; rm -rf wrap_defines.h; cd ../..;
2+
cd lib/accl/matrix/device/
3+
rm -f libmtpmu.a MT_PMU_collector.o libmt_dev.a mt_double_buffer.o instrumented_func_dev.o
4+
cd ../../../..

cmake/FindLLVMOpenMP.cmake

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
# FindLLVMOpenMP.cmake
2+
#
3+
# This module finds if LLVM OpenMP is installed and sets the necessary variables
4+
#
5+
# It defines the following variables:
6+
# LLVMOpenMP_FOUND - Set to true if LLVM OpenMP is found
7+
# LLVMOpenMP_INCLUDE_DIRS - Include directories for LLVM OpenMP
8+
# LLVMOpenMP_LIBRARIES - Libraries for LLVM OpenMP
9+
message(STATUS "LLVMOpenMP_DIR: $ENV{LLVMOpenMP_DIR}")
10+
message(STATUS "Searching for ompt.h in ${LLVMOpenMP_DIR}/include")
11+
message(STATUS "Searching for libomp.so in ${LLVMOpenMP_DIR}/lib")
12+
13+
find_path(LLVMOpenMP_INCLUDE_DIR
14+
NAMES ompt.h
15+
HINTS $ENV{LLVMOpenMP_INCLUDE_DIR}
16+
PATH_SUFFIXES include
17+
DOC "Path to the LLVM OpenMP include directory"
18+
)
19+
20+
find_library(LLVMOpenMP_LIBRARY
21+
NAMES omp
22+
HINTS $ENV{LLVMOpenMP_DIR}
23+
PATH_SUFFIXES lib
24+
DOC "Path to the LLVM OpenMP library"
25+
)
26+
27+
include(FindPackageHandleStandardArgs)
28+
find_package_handle_standard_args(LLVMOpenMP DEFAULT_MSG
29+
LLVMOpenMP_INCLUDE_DIR LLVMOpenMP_LIBRARY)
30+
31+
if(LLVMOpenMP_FOUND)
32+
set(LLVMOpenMP_INCLUDE_DIRS ${LLVMOpenMP_INCLUDE_DIR})
33+
set(LLVMOpenMP_LIBRARIES ${LLVMOpenMP_LIBRARY})
34+
message(STATUS "Found ompt.h in ${LLVMOpenMP_INCLUDE_DIRS}")
35+
message(STATUS "Found libomp.so in ${LLVMOpenMP_LIBRARIES}")
36+
else()
37+
set(LLVMOpenMP_INCLUDE_DIRS)
38+
set(LLVMOpenMP_LIBRARIES)
39+
endif()
40+
41+
mark_as_advanced(LLVMOpenMP_INCLUDE_DIR LLVMOpenMP_LIBRARY)
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
2+
3+
#ifndef MATRIX_PMU_COLLECTOR_C_MT_PMU_COLLECTOR_H
4+
#define MATRIX_PMU_COLLECTOR_C_MT_PMU_COLLECTOR_H
5+
6+
#ifdef __cplusplus
7+
extern "C" {
8+
#endif
9+
10+
#define MAX_PMU_NUM 26
11+
#define MAX_EVENTSET_NUM 24
12+
13+
#define PMU_NULL (-1)
14+
15+
typedef enum {
16+
PMU_OK = 0,
17+
PMU_EINVAL = -1,
18+
PMU_ENOMEM = -2,
19+
PMU_ESYS = -3,
20+
PMU_EBIGSET = -4,
21+
PMU_ENOEVNT = -7,
22+
PMU_EISRUN = -10
23+
}pmu_api_status_t;
24+
25+
typedef struct {
26+
int eventNum;
27+
int events[MAX_PMU_NUM];
28+
int running;
29+
}ESInfo;
30+
31+
void MT_PMU_collector_init();
32+
void MT_PMU_collector_fin();
33+
void MT_PMU_collector_get_all(unsigned long*);
34+
int MT_PMU_collector_get_num();
35+
36+
pmu_api_status_t PMU_library_init ();
37+
pmu_api_status_t PMU_add_event (int index, int Event);
38+
pmu_api_status_t PMU_add_events (int EventSet, int *Events, int number);
39+
pmu_api_status_t PMU_add_named_event (int EventSet, const char *EventName);
40+
pmu_api_status_t PMU_create_eventset (int *EventSet);
41+
pmu_api_status_t PMU_destroy_eventset (int *EventSet);
42+
pmu_api_status_t PMU_cleanup_eventset (int EventSet);
43+
44+
pmu_api_status_t PMU_start (int EventSet);
45+
pmu_api_status_t PMU_stop (int EventSet, unsigned long *values);
46+
pmu_api_status_t PMU_read (int EventSet, unsigned long *values);
47+
pmu_api_status_t PMU_reset (int EventSet);
48+
void PMU_shutdown();
49+
50+
#include <string.h>
51+
52+
static const char *code2name[26] =
53+
{"CYCLE", "BRTK", "IACK", "EXEP", "L1DRWM", "L1DRWH", "L1PRM", "L1PRH", "DPStall",
54+
"VMStall", "SMCSStall", "STALL", "NOP", "NonNOP", "EACK", "SIEU", "SMAC1",
55+
"SMAC2", "SBR", "SLD", "VIEU", "VMAC1", "VMAC2", "VMAC3", "VLS0", "VLS1"};
56+
57+
static pmu_api_status_t PMU_event_name_to_code(const char *EventName, int *EventCode) {
58+
if (strcmp(EventName, "CYCLE") == 0) *EventCode = 0;
59+
else if (strcmp(EventName, "BRTK") == 0) *EventCode = 1;
60+
else if (strcmp(EventName, "IACK") == 0) *EventCode = 2;
61+
else if (strcmp(EventName, "EXEP") == 0) *EventCode = 3;
62+
else if (strcmp(EventName, "L1DRWM") == 0) *EventCode = 4;
63+
else if (strcmp(EventName, "L1DRWH") == 0) *EventCode = 5;
64+
else if (strcmp(EventName, "L1PRM") == 0) *EventCode = 6;
65+
else if (strcmp(EventName, "L1PRH") == 0) *EventCode = 7;
66+
else if (strcmp(EventName, "DPStall") == 0) *EventCode = 8;
67+
else if (strcmp(EventName, "VMStall") == 0) *EventCode = 9;
68+
else if (strcmp(EventName, "SMCSStall") == 0) *EventCode = 10;
69+
else if (strcmp(EventName, "STALL") == 0) *EventCode = 11;
70+
else if (strcmp(EventName, "NOP") == 0) *EventCode = 12;
71+
else if (strcmp(EventName, "NonNOP") == 0) *EventCode = 13;
72+
else if (strcmp(EventName, "EACK") == 0) *EventCode = 14;
73+
else if (strcmp(EventName, "SIEU") == 0) *EventCode = 15;
74+
else if (strcmp(EventName, "SMAC1") == 0) *EventCode = 16;
75+
else if (strcmp(EventName, "SMAC2") == 0) *EventCode = 17;
76+
else if (strcmp(EventName, "SBR") == 0) *EventCode = 18;
77+
else if (strcmp(EventName, "SLD") == 0) *EventCode = 19;
78+
else if (strcmp(EventName, "VIEU") == 0) *EventCode = 20;
79+
else if (strcmp(EventName, "VMAC1") == 0) *EventCode = 21;
80+
else if (strcmp(EventName, "VMAC2") == 0) *EventCode = 22;
81+
else if (strcmp(EventName, "VMAC3") == 0) *EventCode = 23;
82+
else if (strcmp(EventName, "VLS0") == 0) *EventCode = 24;
83+
else if (strcmp(EventName, "VLS1") == 0) *EventCode = 25;
84+
else {
85+
*EventCode = -1;
86+
return PMU_ENOEVNT;
87+
}
88+
return PMU_OK;
89+
}
90+
91+
static pmu_api_status_t PMU_event_code_to_name(int EventCode, char *EventName) {
92+
if (EventCode < 0 || EventCode >= 26) {
93+
strcpy(EventName, "Invalid Event Code");
94+
return PMU_ENOEVNT;
95+
}
96+
strcpy(EventName, code2name[EventCode]);
97+
return PMU_OK;
98+
}
99+
100+
#ifdef __cplusplus
101+
}
102+
#endif
103+
104+
#endif //MATRIX_PMU_COLLECTOR_C_MT_PMU_COLLECTOR_H

include/instrument/backtrace.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,14 +115,20 @@ class BacktraceTree {
115115

116116
void *backtrace_context_ip(backtrace_context_t ctx) const;
117117

118+
void backtrace_set_max_size(int max_bt_size);
119+
118120
// uint64_t backtrace_get_callsite();
119121

122+
int rank() const {return _rank;}
123+
void set_rank(int r) { _rank = r;}
124+
120125
private:
121126
BacktraceMode _mode;
122127
char *_loaded_string_buffer = nullptr;
123128
std::vector<void *> _backtrace_buffer;
124129
std::vector<backtrace_node_t> _bt_nodes;
125130
int _max_bt_size;
131+
int _rank;
126132
};
127133

128134
}}// namespace jsi::toolkit
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
2+
3+
#ifndef INSTRUMENTED_FUN_DEV_H
4+
#define INSTRUMENTED_FUN_DEV_H
5+
6+
#include <stdint.h>
7+
#include <stdbool.h>
8+
9+
#define DDR_BASE ((void*)0x880000000)
10+
#define AM_BASE ((void*)0x400000000)
11+
#define SM_BASE ((void*)0x400100400)
12+
#define HBM_BASE ((void*)0x890000000)
13+
14+
extern unsigned long correlation_id[24];
15+
16+
// instrumented dev sync api
17+
void *instrumented_vector_malloc(unsigned int bytes);
18+
int instrumented_vector_free(void *ptr);
19+
int instrumented_vector_load(void *mem, void *buf, unsigned int bytes);
20+
int instrumented_vector_store(void *buf, void *mem, unsigned int bytes);
21+
void *instrumented_scalar_malloc(unsigned int bytes);
22+
int instrumented_scalar_free(void *ptr);
23+
int instrumented_scalar_load(void *mem, void *buf, unsigned int bytes);
24+
int instrumented_scalar_store(void *buf, void *mem, unsigned int bytes);
25+
void *instrumented_hbm_malloc(unsigned long bytes);
26+
void instrumented_hbm_free(void *ptr);
27+
28+
// instrumented dev async api
29+
void instrumented_kernel(const char* name, uint32_t host_pid, uint64_t kernel_cid, uint32_t kid, uint32_t phase);
30+
void instrumented_func(void *func, const char *func_name, unsigned long cid, uint32_t phase);
31+
32+
int instrumented_vector_load_async(void *mem, void *buf, unsigned int bytes);
33+
int instrumented_vector_store_async(void *buf, void *mem, unsigned int bytes);
34+
int instrumented_scalar_load_async(void *mem, void *buf, unsigned int bytes);
35+
int instrumented_scalar_store_async(void *buf, void *mem, unsigned int bytes);
36+
37+
unsigned int instrumented_dma_p2p(void *src, unsigned long src_row_num, unsigned int src_row_size, int src_row_step,
38+
void *dst, unsigned long dst_row_num, unsigned int dst_row_size, int dst_row_step,
39+
bool row_syn, unsigned int synmask);
40+
41+
42+
unsigned int instrumented_dma_broadcast(void *src, unsigned long src_row_num, unsigned int src_row_size, int src_row_step,
43+
void *dst, unsigned long dst_row_num, unsigned int dst_row_size, int dst_row_step,
44+
unsigned core_id, unsigned int barrier_id);
45+
46+
unsigned int instrumented_dma_segment(void *src, unsigned long src_row_num, unsigned int src_row_size, int src_row_step,
47+
void *dst, unsigned long dst_row_num, unsigned int dst_row_size, int dst_row_step,
48+
unsigned int c_start, unsigned int c_num, unsigned int c_step, unsigned int barrier_id);
49+
50+
unsigned int instrumented_dma_sg(void *src_base, void *src_index, unsigned long src_row_num, unsigned int src_row_size,
51+
int src_row_step, void *dst, unsigned long dst_row_num, unsigned int dst_row_size, int dst_row_step);
52+
53+
void instrumented_dma_wait(unsigned int ch);
54+
void instrumented_dma_wait_p2p(unsigned int ch_no);
55+
void instrumented_dma_wait_sg(unsigned int ch_no);
56+
57+
void instrumented_group_barrier(unsigned int b_id);
58+
void instrumented_core_barrier(unsigned int b_id, unsigned int num);
59+
void instrumented_core_barrier_wait(unsigned int b_id, unsigned int num, unsigned long wait_clk);
60+
int instrumented_rwlock_try_rdlock(unsigned int lock_id);
61+
int instrumented_rwlock_try_wrlock(unsigned int lock_id);
62+
void instrumented_rwlock_rdlock(unsigned int lock_id);
63+
void instrumented_rwlock_wrlock(unsigned int lock_id);
64+
void instrumented_rwlock_unlock(unsigned int lock_id);
65+
66+
unsigned long instrumented_intr_handler_register(void(*func)(int no));
67+
void instrumented_cpu_interrupt(unsigned long val);
68+
69+
#endif
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
2+
#ifndef INSTRUMENTED_FUNC_HOST
3+
#define INSTRUMENTED_FUNC_HOST
4+
5+
#include <cstdint>
6+
// instrumented host apis
7+
void* instrumented_hthread_malloc(int, int, int);
8+
void instrumented_hthread_free(void*);
9+
int instrumented_hthread_group_create6(int, int, const char*, int, int, uint64_t*);
10+
int instrumented_hthread_group_create2(int, int);
11+
int instrumented_hthread_group_masked_create(int, uint32_t, const char*, int, int, uint64_t*);
12+
int instrumented_hthread_group_exec(int, const char*, int, int, uint64_t*);
13+
int instrumented_hthread_group_wait(int);
14+
int instrumented_hthread_group_destroy(int);
15+
int instrumented_hthread_dat_load(int, const char *);
16+
int instrumented_hthread_dat_unload(int);
17+
int instrumented_hthread_dev_open(int);
18+
int instrumented_hthread_dev_close(int);
19+
20+
int instrumented_hthread_dev_owner(int);
21+
int instrumented_hthread_group_get_status(int);
22+
int instrumented_hthread_barrier_create(int);
23+
void instrumented_hthread_barrier_destroy(int);
24+
int instrumented_hthread_rwlock_create(int);
25+
void instrumented_hthread_rwlock_destroy(int);
26+
void instrumented_hthread_intr_send(int g_id, int t_id, unsigned long intr_id);
27+
unsigned long instrumented_hthread_handler_register(int thread_id, void (*func)(int id, unsigned long val));
28+
29+
#endif

0 commit comments

Comments
 (0)