Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion libkineto/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ set(KINETO_LIBRARY_TYPE "default" CACHE STRING
set_property(CACHE KINETO_LIBRARY_TYPE PROPERTY STRINGS default shared)
option(KINETO_BUILD_TESTS "Build kineto unit tests" ON)

set(LIBKINETO_DIR "${CMAKE_CURRENT_SOURCE_DIR}")
set(LIBKINETO_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/src")
set(LIBKINETO_INCLUDE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/include")
set(LIBKINETO_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR})
Expand Down Expand Up @@ -181,8 +182,10 @@ elseif(KINETO_LIBRARY_TYPE STREQUAL "static")
$<TARGET_OBJECTS:kineto_api>)
elseif(KINETO_LIBRARY_TYPE STREQUAL "shared")
add_library(kineto SHARED
$<TARGET_OBJECTS:kineto_base>)
$<TARGET_OBJECTS:kineto_base>
$<TARGET_OBJECTS:kineto_api>)
set_property(TARGET kineto_base PROPERTY POSITION_INDEPENDENT_CODE ON)
set_property(TARGET kineto_api PROPERTY POSITION_INDEPENDENT_CODE ON)
set_target_properties(kineto PROPERTIES
CXX_VISIBILITY_PRESET hidden)
else()
Expand Down Expand Up @@ -217,4 +220,5 @@ install(EXPORT kinetoLibraryConfig DESTINATION share/cmake/kineto

if(KINETO_BUILD_TESTS)
add_subdirectory(test)
add_subdirectory("${LIBKINETO_THIRDPARTY_DIR}/googletest")
endif()
3 changes: 3 additions & 0 deletions libkineto/include/ActivityProfilerInterface.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,9 @@ class ActivityProfilerInterface {
const std::set<ActivityType>& activityTypes,
const std::string& configStr = "") {}

// Toggle GPU tracing as a trace is running to omit certain parts of a graph
virtual void toggleCollectionDynamic(const bool enable) {}

// Start recording, potentially reusing any buffers allocated since
// prepareTrace was called.
virtual void startTrace() {}
Expand Down
24 changes: 16 additions & 8 deletions libkineto/include/ActivityType.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

#include <array>
#include <string>
#include <set>

namespace libkineto {

Expand All @@ -24,23 +25,30 @@ enum class ActivityType {
GPU_MEMSET,
CONCURRENT_KERNEL, // on-device kernels
EXTERNAL_CORRELATION,
MUSA_RUNTIME, // host side musa runtime events
MUSA_DRIVER, // host side musa driver events
CUDA_RUNTIME, // host side cuda runtime events
CUDA_DRIVER, // host side cuda driver events
CPU_INSTANT_EVENT, // host side point-like events
PYTHON_FUNCTION,
OVERHEAD, // MUPTI induced overhead events sampled from its overhead API.
OVERHEAD, // CUPTI induced overhead events sampled from its overhead API.
MTIA_RUNTIME, // host side MTIA runtime events
MTIA_CCP_EVENTS, // MTIA ondevice CCP events
CUDA_SYNC, // synchronization events between runtime and kernels

// Optional Activity types
MUSA_SYNC, // synchronization events between runtime and kernels
GLOW_RUNTIME, // host side glow runtime events
MTIA_RUNTIME, // host side MTIA runtime events
MUSA_PROFILER_RANGE, // MUPTI Profiler range for performance metrics
MTIA_CCP_EVENTS, // MTIA ondevice CCP events
CUDA_PROFILER_RANGE, // MUPTI Profiler range for performance metrics
HPU_OP, // HPU host side runtime event
XPU_RUNTIME, // host side xpu runtime events
COLLECTIVE_COMM, // collective communication
MTIA_WORKLOADD, // MTIA workloadd events

// PRIVATEUSE1 Activity types are used for custom backends.
// The corresponding device type is `DeviceType::PrivateUse1` in PyTorch.
PRIVATEUSE1_RUNTIME, // host side privateUse1 runtime events
PRIVATEUSE1_DRIVER, // host side privateUse1 driver events

ENUM_COUNT, // This is to add buffer and not used for any profiling logic. Add your new type before it.
OPTIONAL_ACTIVITY_TYPE_START = MUSA_SYNC,
OPTIONAL_ACTIVITY_TYPE_START = GLOW_RUNTIME,
};

const char* toString(ActivityType t);
Expand Down
6 changes: 5 additions & 1 deletion libkineto/include/Config.h
Original file line number Diff line number Diff line change
Expand Up @@ -493,4 +493,8 @@ class Config : public AbstractConfig {

constexpr char kUseDaemonEnvVar[] = "KINETO_USE_DAEMON";

} // namespace KINETO_NAMESPACE
#if __linux__
bool isDaemonEnvVarSet();
#endif

} // namespace libkineto
2 changes: 1 addition & 1 deletion libkineto/include/GenericTraceActivity.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ constexpr unsigned int kLinkAsyncCpuGpu = 2;
class GenericTraceActivity : public ITraceActivity {
public:
GenericTraceActivity()
: activityType(ActivityType::ENUM_COUNT), traceSpan_(NULL) {}
: activityType(ActivityType::ENUM_COUNT), traceSpan_(nullptr) {}

GenericTraceActivity(
const TraceSpan& trace, ActivityType type, const std::string& name)
Expand Down
29 changes: 26 additions & 3 deletions libkineto/include/IActivityProfiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,19 @@ enum class TraceStatus {
};

/* DeviceInfo:
* Can be used to specify process name, PID and device label
* Can be used to specify process name, sort order, PID and device label.
* The sort order is determined by the sortIndex field to handle ordering of
* processes and gpu rows in the trace viewer.
*/
struct DeviceInfo {
DeviceInfo(int64_t id, const std::string& name, const std::string& label)
: id(id), name(name), label(label) {}
DeviceInfo(
int64_t id,
int64_t sortIndex,
const std::string& name,
const std::string& label)
: id(id), sortIndex(sortIndex), name(name), label(label) {}
int64_t id; // process id
int64_t sortIndex; // position in trace view
const std::string name; // process name
const std::string label; // device label
};
Expand All @@ -66,6 +73,10 @@ struct ResourceInfo {
int64_t deviceId; // id of device which owns this resource (specified in DeviceInfo.id)
const std::string name; // resource name
};

using getLinkedActivityCallback =
std::function<const ITraceActivity*(int32_t)>;

/* IActivityProfilerSession:
* an opaque object that can be used by a high level profiler to
* start/stop and return trace events.
Expand All @@ -91,6 +102,12 @@ class IActivityProfilerSession {
// processes trace activities using logger
virtual void processTrace(ActivityLogger& logger) = 0;

virtual void processTrace(ActivityLogger& logger,
getLinkedActivityCallback /*getLinkedActivity*/,
int64_t /*startTime*/, int64_t /*endTime*/) {
processTrace(logger);
}

// returns device info used in this trace, could be nullptr
virtual std::unique_ptr<DeviceInfo> getDeviceInfo() = 0;

Expand All @@ -103,6 +120,12 @@ class IActivityProfilerSession {
// XXX define trace formats
// virtual save(string name, TraceFormat format)

virtual void pushCorrelationId(uint64_t /*id*/) {}
virtual void popCorrelationId() {}

virtual void pushUserCorrelationId(uint64_t /*id*/) {}
virtual void popUserCorrelationId() {}

protected:
TraceStatus status_ = TraceStatus::READY;
};
Expand Down
4 changes: 0 additions & 4 deletions libkineto/include/ThreadUtil.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,3 @@ std::string processName(int32_t pid);
std::vector<std::pair<int32_t, std::string>> pidCommandPairsOfAncestors();

} // namespace libkineto

#ifdef HAS_ROCTRACER
using namespace libkineto;
#endif
15 changes: 11 additions & 4 deletions libkineto/src/output_base.h → libkineto/include/output_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,19 +14,26 @@
#include <thread>
#include <unordered_map>

#include "ActivityBuffers.h"
// TODO(T90238193)
// @lint-ignore-every CLANGTIDY facebook-hte-RelativeInclude
#include "IActivityProfiler.h"
#include "GenericTraceActivity.h"
#include "ThreadUtil.h"
#include "TraceSpan.h"

namespace KINETO_NAMESPACE {
class Config;
struct ActivityBuffers;
}

namespace libkineto {

using namespace KINETO_NAMESPACE;

// Used by sortIndex to put GPU tracks at the bottom
// of the trace timelines. The largest valid CPU PID is 4,194,304,
// so 5000000 is enough to guarantee that GPU tracks are sorted after CPU.
constexpr int64_t kExceedMaxPid = 5000000;

class ActivityLogger {
public:

Expand Down Expand Up @@ -60,7 +67,7 @@ class ActivityLogger {
}

virtual void finalizeTrace(
const KINETO_NAMESPACE::Config& config,
const Config& config,
std::unique_ptr<ActivityBuffers> buffers,
int64_t endTime,
std::unordered_map<std::string, std::vector<std::string>>& metadata) = 0;
Expand All @@ -69,4 +76,4 @@ class ActivityLogger {
ActivityLogger() = default;
};

} // namespace KINETO_NAMESPACE
} // namespace libkineto
2 changes: 1 addition & 1 deletion libkineto/include/time_since_epoch.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ namespace libkineto {
template <class ClockT>
inline int64_t timeSinceEpoch(
const std::chrono::time_point<ClockT>& t) {
return std::chrono::duration_cast<std::chrono::microseconds>(
return std::chrono::duration_cast<std::chrono::nanoseconds>(
t.time_since_epoch())
.count();
}
Expand Down
1 change: 1 addition & 0 deletions libkineto/libkineto_defs.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ def get_libkineto_public_headers():
"include/ThreadUtil.h",
"include/libkineto.h",
"include/time_since_epoch.h",
"include/output_base.h",
]

# kineto code should be updated to not have to
Expand Down
8 changes: 4 additions & 4 deletions libkineto/sample_programs/README.md
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
# How to Run Sample Programs

To run `kineto_playground.cpp` in the `sample_programs` folder, you can use the following steps: (Note: scripts below are hard-coded to a specific set of sample programs, you can modify them to work with a different program. TODO: make these scripts more flexible.)
To run `kineto_mupti_profiler.cpp` in the `sample_programs` folder, you can use the following steps: (Note: scripts below are hard-coded to a specific set of sample programs, you can modify them to work with a different program. TODO: make these scripts more flexible.)

1. `./build-cu.sh`
- this generates `kplay-cu.o`
1. `./build-mu.sh`
- this generates `kplay-mu.o`
2. `./build.sh`
- this generates binary called `main`
3. Run `./main`
- runs your code defined in `kineto_playground.cpp`
- runs your code defined in `kineto_mupti_profiler.cpp`
3 changes: 2 additions & 1 deletion libkineto/sample_programs/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ g++ \
kineto_mupti_profiler.cpp \
-o main \
kplay_mu.o \
/usr/local/lib/libkineto.a \
/home/pytorch/torch/lib/libkineto.a \
-I../include \
-I/usr/local/musa/include \
-I../third_party/fmt/include \
-I/usr/local/include/kineto \
Expand Down
16 changes: 10 additions & 6 deletions libkineto/sample_programs/kineto_mupti_profiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

using namespace kineto;

static const std::string kFileName = "/tmp/kineto_playground_trace.json";
static const std::string kFileName = "./kineto_playground_trace.json";

int main() {
warmup();
Expand All @@ -34,18 +34,22 @@ int main() {
libkineto::ActivityType::GPU_MEMSET,
libkineto::ActivityType::CONCURRENT_KERNEL,
libkineto::ActivityType::EXTERNAL_CORRELATION,
libkineto::ActivityType::MUSA_RUNTIME,
libkineto::ActivityType::MUSA_DRIVER,
libkineto::ActivityType::CUDA_RUNTIME,
libkineto::ActivityType::CUDA_DRIVER,
libkineto::ActivityType::CPU_INSTANT_EVENT,
libkineto::ActivityType::PYTHON_FUNCTION,
libkineto::ActivityType::OVERHEAD,
libkineto::ActivityType::MUSA_SYNC,
libkineto::ActivityType::GLOW_RUNTIME,
libkineto::ActivityType::MTIA_RUNTIME,
libkineto::ActivityType::MUSA_PROFILER_RANGE,
libkineto::ActivityType::MTIA_CCP_EVENTS,
libkineto::ActivityType::CUDA_SYNC,
libkineto::ActivityType::GLOW_RUNTIME,
libkineto::ActivityType::CUDA_PROFILER_RANGE,
libkineto::ActivityType::HPU_OP,
libkineto::ActivityType::XPU_RUNTIME,
libkineto::ActivityType::COLLECTIVE_COMM,
libkineto::ActivityType::MTIA_WORKLOADD,
libkineto::ActivityType::PRIVATEUSE1_RUNTIME,
libkineto::ActivityType::PRIVATEUSE1_DRIVER,
};

libkineto_init(false, true);
Expand Down
2 changes: 1 addition & 1 deletion libkineto/sample_programs/kineto_playground.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

using namespace kineto;

static const std::string kFileName = "/tmp/kineto_playground_trace.json";
static const std::string kFileName = "./kineto_playground_trace.json";

int main() {
warmup();
Expand Down
14 changes: 6 additions & 8 deletions libkineto/src/ActivityProfilerController.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,6 @@
#include "ActivityTrace.h"

#include "MuptiActivityApi.h"
#ifdef HAS_ROCTRACER
#include "RoctracerActivityApi.h"
#endif

#include "ThreadUtil.h"
#include "output_json.h"
Expand All @@ -45,14 +42,11 @@ void ActivityProfilerController::setLoggerCollectorFactory(
ActivityProfilerController::ActivityProfilerController(
ConfigLoader& configLoader, bool cpuOnly)
: configLoader_(configLoader) {
#ifdef HAS_ROCTRACER
profiler_ = std::make_unique<MuptiActivityProfiler>(
RoctracerActivityApi::singleton(), cpuOnly);
#else
// Initialize ChromeTraceBaseTime first of all.
profiler_ = std::make_unique<MuptiActivityProfiler>(
MuptiActivityApi::singleton(), cpuOnly);
#endif
configLoader_.addHandler(ConfigLoader::ConfigKind::ActivityProfiler, this);
ChromeTraceBaseTime::singleton().init();

#if !USE_GOOGLE_LOG
if (loggerCollectorFactory()) {
Expand Down Expand Up @@ -363,6 +357,10 @@ void ActivityProfilerController::prepareTrace(const Config& config) {
profiler_->setSyncProfilingRunning(true);
}

void ActivityProfilerController::toggleCollectionDynamic(const bool enable) {
profiler_->toggleCollectionDynamic(enable);
}

void ActivityProfilerController::startTrace() {
if (profiler_->isOnDemandProfilingRunning()) {
LOG(WARNING) << "Ignored startTrace request - on-demand profiler busy";
Expand Down
1 change: 1 addition & 0 deletions libkineto/src/ActivityProfilerController.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ class ActivityProfilerController : public ConfigLoader::ConfigHandler {

// These API are used for Synchronous Tracing.
void prepareTrace(const Config& config);
void toggleCollectionDynamic(const bool enable);
void startTrace();
void step();
std::unique_ptr<ActivityTraceInterface> stopTrace();
Expand Down
16 changes: 4 additions & 12 deletions libkineto/src/ActivityProfilerProxy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,6 @@
#include "MuptiActivityApi.h"
#include "Logger.h"
#include <chrono>
#ifdef HAS_ROCTRACER
#include "RoctracerActivityApi.h"
#endif

namespace KINETO_NAMESPACE {

Expand Down Expand Up @@ -82,6 +79,10 @@ void ActivityProfilerProxy::prepareTrace(
controller_->prepareTrace(config);
}

void ActivityProfilerProxy::toggleCollectionDynamic(const bool enable) {
controller_->toggleCollectionDynamic(enable);
}

void ActivityProfilerProxy::startTrace() {
controller_->startTrace();
}
Expand All @@ -102,20 +103,11 @@ bool ActivityProfilerProxy::isActive() {
void ActivityProfilerProxy::pushCorrelationId(uint64_t id) {
MuptiActivityApi::pushCorrelationID(id,
MuptiActivityApi::CorrelationFlowType::Default);
#ifdef HAS_ROCTRACER
// FIXME: bad design here
RoctracerActivityApi::pushCorrelationID(id,
RoctracerActivityApi::CorrelationFlowType::Default);
#endif
}

void ActivityProfilerProxy::popCorrelationId() {
MuptiActivityApi::popCorrelationID(
MuptiActivityApi::CorrelationFlowType::Default);
#ifdef HAS_ROCTRACER
RoctracerActivityApi::popCorrelationID(
RoctracerActivityApi::CorrelationFlowType::Default);
#endif
}

void ActivityProfilerProxy::pushUserCorrelationId(uint64_t id) {
Expand Down
2 changes: 2 additions & 0 deletions libkineto/src/ActivityProfilerProxy.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ class ActivityProfilerProxy : public ActivityProfilerInterface {
const std::set<ActivityType>& activityTypes,
const std::string& configStr = "") override;

void toggleCollectionDynamic(const bool enable) override;

void startTrace() override;
void step() override;
std::unique_ptr<ActivityTraceInterface> stopTrace() override;
Expand Down
Loading
Loading