From acef997ebbf274953bae4f0443b20cb9775b0016 Mon Sep 17 00:00:00 2001 From: Anatoly Myachev Date: Fri, 29 Nov 2024 12:20:42 +0100 Subject: [PATCH 01/13] Windows debug Signed-off-by: Anatoly Myachev --- CMakeLists.txt | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b20a69e620..8883c5bd63 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -18,16 +18,9 @@ include(CTest) list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake") # Options -if(WIN32) - set(DEFAULT_BUILD_PROTON OFF) -else() - set(DEFAULT_BUILD_PROTON ON) -endif() - -# Define the option with the determined default value -option(TRITON_BUILD_PROTON "Build the Triton Proton profiler" ${DEFAULT_BUILD_PROTON}) option(TRITON_BUILD_TUTORIALS "Build C++ Triton tutorials" ON) option(TRITON_BUILD_PYTHON_MODULE "Build Python Triton bindings" OFF) +option(TRITON_BUILD_PROTON "Build the Triton Proton profiler" ON) option(TRITON_BUILD_UT "Build C++ Triton Unit Tests" ON) option(TRITON_BUILD_WITH_CCACHE "Build with ccache (if available)" ON) set(TRITON_CODEGEN_BACKENDS "" CACHE STRING "Enable different codegen backends") From 675ebc04fdf7c4904ce0131acb3d3657c388a228 Mon Sep 17 00:00:00 2001 From: Anatoly Myachev Date: Fri, 29 Nov 2024 12:21:05 +0100 Subject: [PATCH 02/13] revert me Signed-off-by: Anatoly Myachev --- .github/workflows/build-test.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml index df056ac320..9dbc031d72 100644 --- a/.github/workflows/build-test.yml +++ b/.github/workflows/build-test.yml @@ -44,10 +44,6 @@ on: type: boolean default: false - pull_request: - branches: - - main - - release/** push: branches: - main From 2da37f1b804946bf6c29b3e3e7fe46b51146afbf Mon Sep 17 00:00:00 2001 From: Anatoly Myachev Date: Fri, 29 Nov 2024 13:29:48 +0100 Subject: [PATCH 03/13] adapt 'Dispatch.h' for Windows Signed-off-by: Anatoly Myachev --- .../proton/csrc/include/Driver/Dispatch.h | 36 +++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/third_party/proton/csrc/include/Driver/Dispatch.h b/third_party/proton/csrc/include/Driver/Dispatch.h index 1d8ec017cd..34e58ae9aa 100644 --- a/third_party/proton/csrc/include/Driver/Dispatch.h +++ b/third_party/proton/csrc/include/Driver/Dispatch.h @@ -1,7 +1,13 @@ #ifndef PROTON_DRIVER_DISPATCH_H_ #define PROTON_DRIVER_DISPATCH_H_ +#ifdef WIN32 +#define WIN32_LEAN_AND_MEAN +#define NOMINMAX +#include +#else #include +#endif #include #include @@ -57,6 +63,31 @@ template class Dispatch { public: Dispatch() = delete; +#ifdef WIN32 + static void init(const char *name, void **lib) { + if (*lib == nullptr) { + // First reuse the existing handle + *lib = GetModuleHandle(libraryName); + } + if (*lib == nullptr) { + // If not found, try to load it from LD_LIBRARY_PATH + *lib = LoadLibraryA(name); + } + if (*lib == nullptr) { + // If still not found, try to load it from the default path + auto dir = std::string(ExternLib::defaultDir); + if (dir.length() > 0) { + auto fullPath = dir + "/" + name; + *lib = LoadLibraryA(fullPath.c_str()); + } + } + if (*lib == nullptr) { + throw std::runtime_error("Could not find `" + std::string(name) + + "`. Make sure it is in your " + "LD_LIBRARY_PATH."); + } + } +#else static void init(const char *name, void **lib) { if (*lib == nullptr) { // First reuse the existing handle @@ -80,6 +111,7 @@ template class Dispatch { "LD_LIBRARY_PATH."); } } +#endif static void check(typename ExternLib::RetType ret, const char *functionName) { if (ret != ExternLib::success) { @@ -94,7 +126,11 @@ template class Dispatch { exec(FnT &handler, const char *functionName, Args... args) { init(ExternLib::name, &ExternLib::lib); if (handler == nullptr) { +#ifdef Win32 + handler = reinterpret_cast(GetProcAddress(ExternLib::lib, functionName)); +#else handler = reinterpret_cast(dlsym(ExternLib::lib, functionName)); +#endif if (handler == nullptr) { throw std::runtime_error("Failed to load " + std::string(ExternLib::name)); From 0cf6f52d576da1dbef78165557e4ffa36b456e49 Mon Sep 17 00:00:00 2001 From: Anatoly Myachev Date: Fri, 29 Nov 2024 13:41:24 +0100 Subject: [PATCH 04/13] fix Signed-off-by: Anatoly Myachev --- third_party/proton/csrc/include/Driver/Dispatch.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third_party/proton/csrc/include/Driver/Dispatch.h b/third_party/proton/csrc/include/Driver/Dispatch.h index 34e58ae9aa..e75ce4e591 100644 --- a/third_party/proton/csrc/include/Driver/Dispatch.h +++ b/third_party/proton/csrc/include/Driver/Dispatch.h @@ -67,7 +67,7 @@ template class Dispatch { static void init(const char *name, void **lib) { if (*lib == nullptr) { // First reuse the existing handle - *lib = GetModuleHandle(libraryName); + *lib = GetModuleHandle(name); } if (*lib == nullptr) { // If not found, try to load it from LD_LIBRARY_PATH From d4f5932d40bd0ee501736ab063840ac06e0b42a1 Mon Sep 17 00:00:00 2001 From: Anatoly Myachev Date: Fri, 29 Nov 2024 13:51:28 +0100 Subject: [PATCH 05/13] fix Signed-off-by: Anatoly Myachev --- third_party/proton/csrc/include/Driver/Dispatch.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third_party/proton/csrc/include/Driver/Dispatch.h b/third_party/proton/csrc/include/Driver/Dispatch.h index e75ce4e591..513cc21789 100644 --- a/third_party/proton/csrc/include/Driver/Dispatch.h +++ b/third_party/proton/csrc/include/Driver/Dispatch.h @@ -126,7 +126,7 @@ template class Dispatch { exec(FnT &handler, const char *functionName, Args... args) { init(ExternLib::name, &ExternLib::lib); if (handler == nullptr) { -#ifdef Win32 +#ifdef WIN32 handler = reinterpret_cast(GetProcAddress(ExternLib::lib, functionName)); #else handler = reinterpret_cast(dlsym(ExternLib::lib, functionName)); From 165327a4b93dc242a6e34d62d45334144c313496 Mon Sep 17 00:00:00 2001 From: Anatoly Myachev Date: Fri, 29 Nov 2024 13:54:49 +0100 Subject: [PATCH 06/13] windows like library names Signed-off-by: Anatoly Myachev --- third_party/proton/csrc/lib/Driver/GPU/CudaApi.cpp | 4 ++++ third_party/proton/csrc/lib/Driver/GPU/CuptiApi.cpp | 5 +++++ third_party/proton/csrc/lib/Driver/GPU/HipApi.cpp | 4 ++++ third_party/proton/csrc/lib/Driver/GPU/HsaApi.cpp | 4 ++++ third_party/proton/csrc/lib/Driver/GPU/RoctracerApi.cpp | 5 +++++ 5 files changed, 22 insertions(+) diff --git a/third_party/proton/csrc/lib/Driver/GPU/CudaApi.cpp b/third_party/proton/csrc/lib/Driver/GPU/CudaApi.cpp index d1617b48a7..57b255c445 100644 --- a/third_party/proton/csrc/lib/Driver/GPU/CudaApi.cpp +++ b/third_party/proton/csrc/lib/Driver/GPU/CudaApi.cpp @@ -10,7 +10,11 @@ struct ExternLibCuda : public ExternLibBase { // https://forums.developer.nvidia.com/t/wsl2-libcuda-so-and-libcuda-so-1-should-be-symlink/236301 // On WSL, "libcuda.so" and "libcuda.so.1" may not be linked, so we use // "libcuda.so.1" instead. +#ifdef WIN32 + static constexpr const char *name = "nvcuda.dll"; +#else static constexpr const char *name = "libcuda.so.1"; +#endif static constexpr const char *defaultDir = ""; static constexpr RetType success = CUDA_SUCCESS; static void *lib; diff --git a/third_party/proton/csrc/lib/Driver/GPU/CuptiApi.cpp b/third_party/proton/csrc/lib/Driver/GPU/CuptiApi.cpp index 2c399d31c7..6dec8016d7 100644 --- a/third_party/proton/csrc/lib/Driver/GPU/CuptiApi.cpp +++ b/third_party/proton/csrc/lib/Driver/GPU/CuptiApi.cpp @@ -10,7 +10,12 @@ namespace cupti { #define TOSTRING(x) STRINGIFY(x) struct ExternLibCupti : public ExternLibBase { using RetType = CUptiResult; +#ifdef WIN32 + static constexpr const char *name = "cupti.dll"; +#else static constexpr const char *name = "libcupti.so"; +#endif + #ifdef CUPTI_LIB_DIR static constexpr const char *defaultDir = TOSTRING(CUPTI_LIB_DIR); #else diff --git a/third_party/proton/csrc/lib/Driver/GPU/HipApi.cpp b/third_party/proton/csrc/lib/Driver/GPU/HipApi.cpp index 9e8ef8d225..8189e07802 100644 --- a/third_party/proton/csrc/lib/Driver/GPU/HipApi.cpp +++ b/third_party/proton/csrc/lib/Driver/GPU/HipApi.cpp @@ -9,7 +9,11 @@ namespace hip { struct ExternLibHip : public ExternLibBase { using RetType = hipError_t; +#ifdef WIN32 + static constexpr const char *name = "amdhip64.dll"; +#else static constexpr const char *name = "libamdhip64.so"; +#endif static constexpr const char *defaultDir = ""; static constexpr RetType success = hipSuccess; static void *lib; diff --git a/third_party/proton/csrc/lib/Driver/GPU/HsaApi.cpp b/third_party/proton/csrc/lib/Driver/GPU/HsaApi.cpp index 7c607b4b99..5bdb7caa64 100644 --- a/third_party/proton/csrc/lib/Driver/GPU/HsaApi.cpp +++ b/third_party/proton/csrc/lib/Driver/GPU/HsaApi.cpp @@ -7,7 +7,11 @@ namespace hsa { struct ExternLibHsa : public ExternLibBase { using RetType = hsa_status_t; +#ifdef WIN32 + static constexpr const char *name = "hsa-runtime64.dll"; +#else static constexpr const char *name = "libhsa-runtime64.so"; +#endif static constexpr const char *defaultDir = ""; static constexpr RetType success = HSA_STATUS_SUCCESS; static void *lib; diff --git a/third_party/proton/csrc/lib/Driver/GPU/RoctracerApi.cpp b/third_party/proton/csrc/lib/Driver/GPU/RoctracerApi.cpp index a6dcdcf346..e2704af0bb 100644 --- a/third_party/proton/csrc/lib/Driver/GPU/RoctracerApi.cpp +++ b/third_party/proton/csrc/lib/Driver/GPU/RoctracerApi.cpp @@ -7,7 +7,12 @@ namespace roctracer { struct ExternLibRoctracer : public ExternLibBase { using RetType = roctracer_status_t; +#ifdef WIN32 + // Looks like there is no support for Windows yet + static constexpr const char *name = "roctracer64.dll"; +#else static constexpr const char *name = "libroctracer64.so"; +#endif static constexpr const char *defaultDir = ""; static constexpr RetType success = ROCTRACER_STATUS_SUCCESS; static void *lib; From 834197af679f5227b8d0bc35562e942d1f9493a2 Mon Sep 17 00:00:00 2001 From: Anatoly Myachev Date: Fri, 29 Nov 2024 15:42:56 +0100 Subject: [PATCH 07/13] fix Signed-off-by: Anatoly Myachev --- third_party/proton/csrc/include/Driver/Dispatch.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/third_party/proton/csrc/include/Driver/Dispatch.h b/third_party/proton/csrc/include/Driver/Dispatch.h index 513cc21789..240ed6af09 100644 --- a/third_party/proton/csrc/include/Driver/Dispatch.h +++ b/third_party/proton/csrc/include/Driver/Dispatch.h @@ -127,7 +127,8 @@ template class Dispatch { init(ExternLib::name, &ExternLib::lib); if (handler == nullptr) { #ifdef WIN32 - handler = reinterpret_cast(GetProcAddress(ExternLib::lib, functionName)); + handler = reinterpret_cast( + GetProcAddress((HMODULE)(ExternLib::lib), functionName)); #else handler = reinterpret_cast(dlsym(ExternLib::lib, functionName)); #endif From 6ca8002c2bed6eccfee45b207e003f6b38631f4f Mon Sep 17 00:00:00 2001 From: Anatoly Myachev Date: Fri, 29 Nov 2024 15:53:08 +0100 Subject: [PATCH 08/13] remove empty line Signed-off-by: Anatoly Myachev --- third_party/proton/csrc/lib/Driver/GPU/CuptiApi.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/third_party/proton/csrc/lib/Driver/GPU/CuptiApi.cpp b/third_party/proton/csrc/lib/Driver/GPU/CuptiApi.cpp index 6dec8016d7..3aadbf7008 100644 --- a/third_party/proton/csrc/lib/Driver/GPU/CuptiApi.cpp +++ b/third_party/proton/csrc/lib/Driver/GPU/CuptiApi.cpp @@ -15,7 +15,6 @@ struct ExternLibCupti : public ExternLibBase { #else static constexpr const char *name = "libcupti.so"; #endif - #ifdef CUPTI_LIB_DIR static constexpr const char *defaultDir = TOSTRING(CUPTI_LIB_DIR); #else From 068e166d606e4574fc0552daa492d1dca415ccf1 Mon Sep 17 00:00:00 2001 From: Anatoly Myachev Date: Fri, 29 Nov 2024 16:13:55 +0100 Subject: [PATCH 09/13] more guards for 'dlsym' Signed-off-by: Anatoly Myachev --- third_party/proton/csrc/lib/Driver/GPU/HipApi.cpp | 10 ++++++++++ third_party/proton/csrc/lib/Driver/GPU/HsaApi.cpp | 5 +++++ .../proton/csrc/lib/Driver/GPU/RoctracerApi.cpp | 15 +++++++++++++++ 3 files changed, 30 insertions(+) diff --git a/third_party/proton/csrc/lib/Driver/GPU/HipApi.cpp b/third_party/proton/csrc/lib/Driver/GPU/HipApi.cpp index 8189e07802..16669d88b3 100644 --- a/third_party/proton/csrc/lib/Driver/GPU/HipApi.cpp +++ b/third_party/proton/csrc/lib/Driver/GPU/HipApi.cpp @@ -74,8 +74,13 @@ const char *getKernelNameRef(const hipFunction_t f) { static hipKernelNameRef_t func = nullptr; Dispatch::init(ExternLibHip::name, &ExternLibHip::lib); if (func == nullptr) +#ifdef WIN32 + func = reinterpret_cast( + GetProcAddress((HMODULE)ExternLibHip::lib, "hipKernelNameRef")); +#else func = reinterpret_cast( dlsym(ExternLibHip::lib, "hipKernelNameRef")); +#endif return (func ? func(f) : NULL); } @@ -85,8 +90,13 @@ const char *getKernelNameRefByPtr(const void *hostFunction, static hipKernelNameRefByPtr_t func = nullptr; Dispatch::init(ExternLibHip::name, &ExternLibHip::lib); if (func == nullptr) +#ifdef WIN32 + func = reinterpret_cast( + GetProcAddress((HMODULE)ExternLibHip::lib, "hipKernelNameRefByPtr")); +#else func = reinterpret_cast( dlsym(ExternLibHip::lib, "hipKernelNameRefByPtr")); +#endif return (func ? func(hostFunction, stream) : NULL); } diff --git a/third_party/proton/csrc/lib/Driver/GPU/HsaApi.cpp b/third_party/proton/csrc/lib/Driver/GPU/HsaApi.cpp index 5bdb7caa64..adfd2e0fcc 100644 --- a/third_party/proton/csrc/lib/Driver/GPU/HsaApi.cpp +++ b/third_party/proton/csrc/lib/Driver/GPU/HsaApi.cpp @@ -30,8 +30,13 @@ hsa_status_t iterateAgents(hsa_status_t (*callback)(hsa_agent_t agent, static hsa_iterate_agents_t func = nullptr; Dispatch::init(ExternLibHsa::name, &ExternLibHsa::lib); if (func == nullptr) +#ifdef WIN32 + func = reinterpret_cast( + GetProcAddress((HMODULE)ExternLibHip::lib, "hsa_iterate_agents")); +#else func = reinterpret_cast( dlsym(ExternLibHsa::lib, "hsa_iterate_agents")); +#endif return (func ? func(callback, data) : HSA_STATUS_ERROR_FATAL); } diff --git a/third_party/proton/csrc/lib/Driver/GPU/RoctracerApi.cpp b/third_party/proton/csrc/lib/Driver/GPU/RoctracerApi.cpp index e2704af0bb..35e0a3907b 100644 --- a/third_party/proton/csrc/lib/Driver/GPU/RoctracerApi.cpp +++ b/third_party/proton/csrc/lib/Driver/GPU/RoctracerApi.cpp @@ -32,8 +32,13 @@ void start() { Dispatch::init(ExternLibRoctracer::name, &ExternLibRoctracer::lib); if (func == nullptr) +#ifdef WIN32 + func = reinterpret_cast( + GetProcAddress((HMODULE)ExternLibHip::lib, "roctracer_start")); +#else func = reinterpret_cast( dlsym(ExternLibRoctracer::lib, "roctracer_start")); +#endif if (func) func(); } @@ -44,8 +49,13 @@ void stop() { Dispatch::init(ExternLibRoctracer::name, &ExternLibRoctracer::lib); if (func == nullptr) +#ifdef WIN32 + func = reinterpret_cast( + GetProcAddress((HMODULE)ExternLibHip::lib, "roctracer_stop")); +#else func = reinterpret_cast( dlsym(ExternLibRoctracer::lib, "roctracer_stop")); +#endif if (func) func(); } @@ -56,8 +66,13 @@ char *getOpString(uint32_t domain, uint32_t op, uint32_t kind) { Dispatch::init(ExternLibRoctracer::name, &ExternLibRoctracer::lib); if (func == nullptr) +#ifdef WIN32 + func = reinterpret_cast( + GetProcAddress((HMODULE)ExternLibHip::lib, "roctracer_op_string")); +#else func = reinterpret_cast( dlsym(ExternLibRoctracer::lib, "roctracer_op_string")); +#endif return (func ? func(domain, op, kind) : NULL); } From d29d5dbf6081e378c9b17d359de6866061a911b5 Mon Sep 17 00:00:00 2001 From: Anatoly Myachev Date: Fri, 29 Nov 2024 17:19:01 +0100 Subject: [PATCH 10/13] remove 'hsa_flag_isset64'; fixes Signed-off-by: Anatoly Myachev --- third_party/amd/backend/include/hsa/hsa_ext_amd.h | 11 ----------- third_party/proton/csrc/lib/Driver/GPU/HsaApi.cpp | 2 +- .../proton/csrc/lib/Driver/GPU/RoctracerApi.cpp | 6 +++--- 3 files changed, 4 insertions(+), 15 deletions(-) diff --git a/third_party/amd/backend/include/hsa/hsa_ext_amd.h b/third_party/amd/backend/include/hsa/hsa_ext_amd.h index f9f60edeb9..593c075d05 100644 --- a/third_party/amd/backend/include/hsa/hsa_ext_amd.h +++ b/third_party/amd/backend/include/hsa/hsa_ext_amd.h @@ -69,17 +69,6 @@ extern "C" { * @{ */ -/** - * @brief Macro to use to determine that a flag is set when querying flags within uint8_t[8] - * types - */ -static __inline__ __attribute__((always_inline)) bool hsa_flag_isset64(uint8_t* value, - uint32_t bit) { - unsigned int index = bit / 8; - unsigned int subBit = bit % 8; - return ((uint8_t*)value)[index] & (1 << subBit); -} - /** * @brief A fixed-size type used to represent ::hsa_signal_condition_t constants. */ diff --git a/third_party/proton/csrc/lib/Driver/GPU/HsaApi.cpp b/third_party/proton/csrc/lib/Driver/GPU/HsaApi.cpp index adfd2e0fcc..424fc34388 100644 --- a/third_party/proton/csrc/lib/Driver/GPU/HsaApi.cpp +++ b/third_party/proton/csrc/lib/Driver/GPU/HsaApi.cpp @@ -32,7 +32,7 @@ hsa_status_t iterateAgents(hsa_status_t (*callback)(hsa_agent_t agent, if (func == nullptr) #ifdef WIN32 func = reinterpret_cast( - GetProcAddress((HMODULE)ExternLibHip::lib, "hsa_iterate_agents")); + GetProcAddress((HMODULE)ExternLibHsa::lib, "hsa_iterate_agents")); #else func = reinterpret_cast( dlsym(ExternLibHsa::lib, "hsa_iterate_agents")); diff --git a/third_party/proton/csrc/lib/Driver/GPU/RoctracerApi.cpp b/third_party/proton/csrc/lib/Driver/GPU/RoctracerApi.cpp index 35e0a3907b..89fcac1ce7 100644 --- a/third_party/proton/csrc/lib/Driver/GPU/RoctracerApi.cpp +++ b/third_party/proton/csrc/lib/Driver/GPU/RoctracerApi.cpp @@ -34,7 +34,7 @@ void start() { if (func == nullptr) #ifdef WIN32 func = reinterpret_cast( - GetProcAddress((HMODULE)ExternLibHip::lib, "roctracer_start")); + GetProcAddress((HMODULE)ExternLibRoctracer::lib, "roctracer_start")); #else func = reinterpret_cast( dlsym(ExternLibRoctracer::lib, "roctracer_start")); @@ -51,7 +51,7 @@ void stop() { if (func == nullptr) #ifdef WIN32 func = reinterpret_cast( - GetProcAddress((HMODULE)ExternLibHip::lib, "roctracer_stop")); + GetProcAddress((HMODULE)ExternLibRoctracer::lib, "roctracer_stop")); #else func = reinterpret_cast( dlsym(ExternLibRoctracer::lib, "roctracer_stop")); @@ -68,7 +68,7 @@ char *getOpString(uint32_t domain, uint32_t op, uint32_t kind) { if (func == nullptr) #ifdef WIN32 func = reinterpret_cast( - GetProcAddress((HMODULE)ExternLibHip::lib, "roctracer_op_string")); + GetProcAddress((HMODULE)ExternLibRoctracer::lib, "roctracer_op_string")); #else func = reinterpret_cast( dlsym(ExternLibRoctracer::lib, "roctracer_op_string")); From 8af80ca63360008210833235e2c00cdf68f856ea Mon Sep 17 00:00:00 2001 From: Anatoly Myachev Date: Fri, 29 Nov 2024 18:24:05 +0100 Subject: [PATCH 11/13] don't use c++20 standart in 'CuptiPCSampling.cpp' Signed-off-by: Anatoly Myachev --- .../csrc/lib/Driver/GPU/RoctracerApi.cpp | 4 +- .../lib/Profiler/Cupti/CuptiPCSampling.cpp | 98 ++++++++++--------- 2 files changed, 54 insertions(+), 48 deletions(-) diff --git a/third_party/proton/csrc/lib/Driver/GPU/RoctracerApi.cpp b/third_party/proton/csrc/lib/Driver/GPU/RoctracerApi.cpp index 89fcac1ce7..9449f688ad 100644 --- a/third_party/proton/csrc/lib/Driver/GPU/RoctracerApi.cpp +++ b/third_party/proton/csrc/lib/Driver/GPU/RoctracerApi.cpp @@ -67,8 +67,8 @@ char *getOpString(uint32_t domain, uint32_t op, uint32_t kind) { &ExternLibRoctracer::lib); if (func == nullptr) #ifdef WIN32 - func = reinterpret_cast( - GetProcAddress((HMODULE)ExternLibRoctracer::lib, "roctracer_op_string")); + func = reinterpret_cast(GetProcAddress( + (HMODULE)ExternLibRoctracer::lib, "roctracer_op_string")); #else func = reinterpret_cast( dlsym(ExternLibRoctracer::lib, "roctracer_op_string")); diff --git a/third_party/proton/csrc/lib/Profiler/Cupti/CuptiPCSampling.cpp b/third_party/proton/csrc/lib/Profiler/Cupti/CuptiPCSampling.cpp index 19b50214b9..b6acee6152 100644 --- a/third_party/proton/csrc/lib/Profiler/Cupti/CuptiPCSampling.cpp +++ b/third_party/proton/csrc/lib/Profiler/Cupti/CuptiPCSampling.cpp @@ -15,10 +15,10 @@ namespace { uint64_t getCubinCrc(const char *cubin, size_t size) { CUpti_GetCubinCrcParams cubinCrcParams = { - .size = CUpti_GetCubinCrcParamsSize, - .cubinSize = size, - .cubin = cubin, - .cubinCrc = 0, + /*size=*/CUpti_GetCubinCrcParamsSize, + /*cubinSize*/ size, + /*cubin*/ cubin, + /*cubinCrc*/ 0, }; cupti::getCubinCrc(&cubinCrcParams); return cubinCrcParams.cubinCrc; @@ -27,10 +27,10 @@ uint64_t getCubinCrc(const char *cubin, size_t size) { size_t getNumStallReasons(CUcontext context) { size_t numStallReasons = 0; CUpti_PCSamplingGetNumStallReasonsParams numStallReasonsParams = { - .size = CUpti_PCSamplingGetNumStallReasonsParamsSize, - .pPriv = NULL, - .ctx = context, - .numStallReasons = &numStallReasons}; + /*size*/ CUpti_PCSamplingGetNumStallReasonsParamsSize, + /*pPriv*/ NULL, + /*ctx*/ context, + /*numStallReasons*/ &numStallReasons}; cupti::pcSamplingGetNumStallReasons(&numStallReasonsParams); return numStallReasons; } @@ -39,14 +39,14 @@ std::tuple getSassToSourceCorrelation(const char *functionName, uint64_t pcOffset, const char *cubin, size_t cubinSize) { CUpti_GetSassToSourceCorrelationParams sassToSourceParams = { - .size = CUpti_GetSassToSourceCorrelationParamsSize, - .cubin = cubin, - .functionName = functionName, - .cubinSize = cubinSize, - .lineNumber = 0, - .pcOffset = pcOffset, - .fileName = NULL, - .dirName = NULL, + /*size*/ CUpti_GetSassToSourceCorrelationParamsSize, + /*cubin*/ cubin, + /*functionName*/ functionName, + /*cubinSize*/ cubinSize, + /*lineNumber*/ 0, + /*pcOffset*/ pcOffset, + /*fileName*/ NULL, + /*dirName*/ NULL, }; // Get source can fail if the line mapping is not available in the cubin so we // don't check the return value @@ -77,12 +77,12 @@ getStallReasonNamesAndIndices(CUcontext context, size_t numStallReasons) { static_cast(std::calloc(numStallReasons, sizeof(uint32_t))); // Initialize the names with 128 characters to avoid buffer overflow CUpti_PCSamplingGetStallReasonsParams stallReasonsParams = { - .size = CUpti_PCSamplingGetStallReasonsParamsSize, - .pPriv = NULL, - .ctx = context, - .numStallReasons = numStallReasons, - .stallReasonIndex = stallReasonIndices, - .stallReasons = stallReasonNames, + /*size*/ CUpti_PCSamplingGetStallReasonsParamsSize, + /*pPriv*/ NULL, + /*ctx*/ context, + /*numStallReasons*/ numStallReasons, + /*stallReasonIndex*/ stallReasonIndices, + /*stallReasons*/ stallReasonNames, }; cupti::pcSamplingGetStallReasons(&stallReasonsParams); return std::make_pair(stallReasonNames, stallReasonIndices); @@ -143,9 +143,15 @@ CUpti_PCSamplingData allocPCSamplingData(size_t collectNumPCs, CUPTI_API_VERSION >= CUPTI_CUDA12_4_VERSION) pcDataSize -= CUPTI_CUDA12_4_PC_DATA_PADDING_SIZE; CUpti_PCSamplingData pcSamplingData{ - .size = pcDataSize, - .collectNumPcs = collectNumPCs, - .pPcData = static_cast( + /*size*/ pcDataSize, + /*collectNumPcs*/ collectNumPCs, + /*totalSamples*/ 0, + /*droppedSamples*/ 0, + /*totalNumPcs*/ 0, + /*remainingNumPcs*/ 0, + /*rangeId*/ 0, + /*pPcData*/ + static_cast( std::calloc(collectNumPCs, sizeof(CUpti_PCSamplingPCData)))}; for (size_t i = 0; i < collectNumPCs; ++i) { pcSamplingData.pPcData[i].stallReason = @@ -157,36 +163,36 @@ CUpti_PCSamplingData allocPCSamplingData(size_t collectNumPCs, void enablePCSampling(CUcontext context) { CUpti_PCSamplingEnableParams params = { - .size = CUpti_PCSamplingEnableParamsSize, - .pPriv = NULL, - .ctx = context, + /*size*/ CUpti_PCSamplingEnableParamsSize, + /*pPriv*/ NULL, + /*ctx*/ context, }; cupti::pcSamplingEnable(¶ms); } void disablePCSampling(CUcontext context) { CUpti_PCSamplingDisableParams params = { - .size = CUpti_PCSamplingDisableParamsSize, - .pPriv = NULL, - .ctx = context, + /*size*/ CUpti_PCSamplingDisableParamsSize, + /*pPriv*/ NULL, + /*ctx*/ context, }; cupti::pcSamplingDisable(¶ms); } void startPCSampling(CUcontext context) { CUpti_PCSamplingStartParams params = { - .size = CUpti_PCSamplingStartParamsSize, - .pPriv = NULL, - .ctx = context, + /*size*/ CUpti_PCSamplingStartParamsSize, + /*pPriv*/ NULL, + /*ctx*/ context, }; cupti::pcSamplingStart(¶ms); } void stopPCSampling(CUcontext context) { CUpti_PCSamplingStopParams params = { - .size = CUpti_PCSamplingStopParamsSize, - .pPriv = NULL, - .ctx = context, + /*size*/ CUpti_PCSamplingStopParamsSize, + /*pPriv*/ NULL, + /*ctx*/ context, }; cupti::pcSamplingStop(¶ms); } @@ -194,10 +200,10 @@ void stopPCSampling(CUcontext context) { void getPCSamplingData(CUcontext context, CUpti_PCSamplingData *pcSamplingData) { CUpti_PCSamplingGetDataParams params = { - .size = CUpti_PCSamplingGetDataParamsSize, - .pPriv = NULL, - .ctx = context, - .pcSamplingData = pcSamplingData, + /*size*/ CUpti_PCSamplingGetDataParamsSize, + /*pPriv*/ NULL, + /*ctx*/ context, + /*pcSamplingData*/ pcSamplingData, }; cupti::pcSamplingGetData(¶ms); } @@ -206,11 +212,11 @@ void setConfigurationAttribute( CUcontext context, std::vector &configurationInfos) { CUpti_PCSamplingConfigurationInfoParams infoParams = { - .size = CUpti_PCSamplingConfigurationInfoParamsSize, - .pPriv = NULL, - .ctx = context, - .numAttributes = configurationInfos.size(), - .pPCSamplingConfigurationInfo = configurationInfos.data(), + /*size*/ CUpti_PCSamplingConfigurationInfoParamsSize, + /*pPriv*/ NULL, + /*ctx*/ context, + /*numAttributes*/ configurationInfos.size(), + /*pPCSamplingConfigurationInfo*/ configurationInfos.data(), }; cupti::pcSamplingSetConfigurationAttribute(&infoParams); } From 7075bcf06dfc3fa05509ed44bb24fac2d9969426 Mon Sep 17 00:00:00 2001 From: Anatoly Myachev Date: Fri, 29 Nov 2024 21:38:37 +0100 Subject: [PATCH 12/13] fix 'error C3861: 'aligned_alloc': identifier not found' Signed-off-by: Anatoly Myachev --- .../proton/csrc/lib/Profiler/Cupti/CuptiProfiler.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/third_party/proton/csrc/lib/Profiler/Cupti/CuptiProfiler.cpp b/third_party/proton/csrc/lib/Profiler/Cupti/CuptiProfiler.cpp index fa0ad0bfda..e6eb84aef5 100644 --- a/third_party/proton/csrc/lib/Profiler/Cupti/CuptiProfiler.cpp +++ b/third_party/proton/csrc/lib/Profiler/Cupti/CuptiProfiler.cpp @@ -12,6 +12,10 @@ #include #include +#ifdef WIN32 +#define aligned_alloc _aligned_malloc +#endif + namespace proton { template <> @@ -257,7 +261,11 @@ void CuptiProfiler::CuptiProfilerPimpl::completeBuffer(CUcontext ctx, } } while (true); +#ifdef WIN32 + _aligned_free(buffer); +#else std::free(buffer); +#endif profiler.correlation.complete(maxCorrelationId); } From 311b27eebceacfe3338025fcf3c4dfd3448c77cc Mon Sep 17 00:00:00 2001 From: Anatoly Myachev Date: Fri, 29 Nov 2024 22:20:57 +0100 Subject: [PATCH 13/13] remove '' and '' Signed-off-by: Anatoly Myachev --- .../proton/csrc/lib/Profiler/RocTracer/RoctracerProfiler.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/third_party/proton/csrc/lib/Profiler/RocTracer/RoctracerProfiler.cpp b/third_party/proton/csrc/lib/Profiler/RocTracer/RoctracerProfiler.cpp index ca93678e1c..ae38cbe243 100644 --- a/third_party/proton/csrc/lib/Profiler/RocTracer/RoctracerProfiler.cpp +++ b/third_party/proton/csrc/lib/Profiler/RocTracer/RoctracerProfiler.cpp @@ -16,9 +16,6 @@ #include #include -#include -#include - namespace proton { template <>