Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Windows debug #2875

Draft
wants to merge 14 commits into
base: main
Choose a base branch
from
4 changes: 0 additions & 4 deletions .github/workflows/build-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,6 @@ on:
type: boolean
default: false

pull_request:
branches:
- main
- release/**
push:
branches:
- main
Expand Down
9 changes: 1 addition & 8 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,9 @@ include(CTest)
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")

# Options
if(WIN32)
set(DEFAULT_BUILD_PROTON OFF)
else()
set(DEFAULT_BUILD_PROTON ON)
endif()

# Define the option with the determined default value
option(TRITON_BUILD_PROTON "Build the Triton Proton profiler" ${DEFAULT_BUILD_PROTON})
option(TRITON_BUILD_TUTORIALS "Build C++ Triton tutorials" ON)
option(TRITON_BUILD_PYTHON_MODULE "Build Python Triton bindings" OFF)
option(TRITON_BUILD_PROTON "Build the Triton Proton profiler" ON)
option(TRITON_BUILD_UT "Build C++ Triton Unit Tests" ON)
option(TRITON_BUILD_WITH_CCACHE "Build with ccache (if available)" ON)
set(TRITON_CODEGEN_BACKENDS "" CACHE STRING "Enable different codegen backends")
Expand Down
11 changes: 0 additions & 11 deletions third_party/amd/backend/include/hsa/hsa_ext_amd.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,17 +69,6 @@ extern "C" {
* @{
*/

/**
* @brief Macro to use to determine that a flag is set when querying flags within uint8_t[8]
* types
*/
static __inline__ __attribute__((always_inline)) bool hsa_flag_isset64(uint8_t* value,
uint32_t bit) {
unsigned int index = bit / 8;
unsigned int subBit = bit % 8;
return ((uint8_t*)value)[index] & (1 << subBit);
}

/**
* @brief A fixed-size type used to represent ::hsa_signal_condition_t constants.
*/
Expand Down
37 changes: 37 additions & 0 deletions third_party/proton/csrc/include/Driver/Dispatch.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
#ifndef PROTON_DRIVER_DISPATCH_H_
#define PROTON_DRIVER_DISPATCH_H_

#ifdef WIN32
#define WIN32_LEAN_AND_MEAN
#define NOMINMAX
#include <windows.h>
#else
#include <dlfcn.h>
#endif

#include <stdexcept>
#include <string>
Expand Down Expand Up @@ -57,6 +63,31 @@ template <typename ExternLib> class Dispatch {
public:
Dispatch() = delete;

#ifdef WIN32
static void init(const char *name, void **lib) {
if (*lib == nullptr) {
// First reuse the existing handle
*lib = GetModuleHandle(name);
}
if (*lib == nullptr) {
// If not found, try to load it from LD_LIBRARY_PATH
*lib = LoadLibraryA(name);
}
if (*lib == nullptr) {
// If still not found, try to load it from the default path
auto dir = std::string(ExternLib::defaultDir);
if (dir.length() > 0) {
auto fullPath = dir + "/" + name;
*lib = LoadLibraryA(fullPath.c_str());
}
}
if (*lib == nullptr) {
throw std::runtime_error("Could not find `" + std::string(name) +
"`. Make sure it is in your "
"LD_LIBRARY_PATH.");
}
}
#else
static void init(const char *name, void **lib) {
if (*lib == nullptr) {
// First reuse the existing handle
Expand All @@ -80,6 +111,7 @@ template <typename ExternLib> class Dispatch {
"LD_LIBRARY_PATH.");
}
}
#endif

static void check(typename ExternLib::RetType ret, const char *functionName) {
if (ret != ExternLib::success) {
Expand All @@ -94,7 +126,12 @@ template <typename ExternLib> class Dispatch {
exec(FnT &handler, const char *functionName, Args... args) {
init(ExternLib::name, &ExternLib::lib);
if (handler == nullptr) {
#ifdef WIN32
handler = reinterpret_cast<FnT>(
GetProcAddress((HMODULE)(ExternLib::lib), functionName));
#else
handler = reinterpret_cast<FnT>(dlsym(ExternLib::lib, functionName));
#endif
if (handler == nullptr) {
throw std::runtime_error("Failed to load " +
std::string(ExternLib::name));
Expand Down
4 changes: 4 additions & 0 deletions third_party/proton/csrc/lib/Driver/GPU/CudaApi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,11 @@ struct ExternLibCuda : public ExternLibBase {
// https://forums.developer.nvidia.com/t/wsl2-libcuda-so-and-libcuda-so-1-should-be-symlink/236301
// On WSL, "libcuda.so" and "libcuda.so.1" may not be linked, so we use
// "libcuda.so.1" instead.
#ifdef WIN32
static constexpr const char *name = "nvcuda.dll";
#else
static constexpr const char *name = "libcuda.so.1";
#endif
static constexpr const char *defaultDir = "";
static constexpr RetType success = CUDA_SUCCESS;
static void *lib;
Expand Down
4 changes: 4 additions & 0 deletions third_party/proton/csrc/lib/Driver/GPU/CuptiApi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,11 @@ namespace cupti {
#define TOSTRING(x) STRINGIFY(x)
struct ExternLibCupti : public ExternLibBase {
using RetType = CUptiResult;
#ifdef WIN32
static constexpr const char *name = "cupti.dll";
#else
static constexpr const char *name = "libcupti.so";
#endif
#ifdef CUPTI_LIB_DIR
static constexpr const char *defaultDir = TOSTRING(CUPTI_LIB_DIR);
#else
Expand Down
14 changes: 14 additions & 0 deletions third_party/proton/csrc/lib/Driver/GPU/HipApi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,11 @@ namespace hip {

struct ExternLibHip : public ExternLibBase {
using RetType = hipError_t;
#ifdef WIN32
static constexpr const char *name = "amdhip64.dll";
#else
static constexpr const char *name = "libamdhip64.so";
#endif
static constexpr const char *defaultDir = "";
static constexpr RetType success = hipSuccess;
static void *lib;
Expand Down Expand Up @@ -70,8 +74,13 @@ const char *getKernelNameRef(const hipFunction_t f) {
static hipKernelNameRef_t func = nullptr;
Dispatch<ExternLibHip>::init(ExternLibHip::name, &ExternLibHip::lib);
if (func == nullptr)
#ifdef WIN32
func = reinterpret_cast<hipKernelNameRef_t>(
GetProcAddress((HMODULE)ExternLibHip::lib, "hipKernelNameRef"));
#else
func = reinterpret_cast<hipKernelNameRef_t>(
dlsym(ExternLibHip::lib, "hipKernelNameRef"));
#endif
return (func ? func(f) : NULL);
}

Expand All @@ -81,8 +90,13 @@ const char *getKernelNameRefByPtr(const void *hostFunction,
static hipKernelNameRefByPtr_t func = nullptr;
Dispatch<ExternLibHip>::init(ExternLibHip::name, &ExternLibHip::lib);
if (func == nullptr)
#ifdef WIN32
func = reinterpret_cast<hipKernelNameRefByPtr_t>(
GetProcAddress((HMODULE)ExternLibHip::lib, "hipKernelNameRefByPtr"));
#else
func = reinterpret_cast<hipKernelNameRefByPtr_t>(
dlsym(ExternLibHip::lib, "hipKernelNameRefByPtr"));
#endif
return (func ? func(hostFunction, stream) : NULL);
}

Expand Down
9 changes: 9 additions & 0 deletions third_party/proton/csrc/lib/Driver/GPU/HsaApi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,11 @@ namespace hsa {

struct ExternLibHsa : public ExternLibBase {
using RetType = hsa_status_t;
#ifdef WIN32
static constexpr const char *name = "hsa-runtime64.dll";
#else
static constexpr const char *name = "libhsa-runtime64.so";
#endif
static constexpr const char *defaultDir = "";
static constexpr RetType success = HSA_STATUS_SUCCESS;
static void *lib;
Expand All @@ -26,8 +30,13 @@ hsa_status_t iterateAgents(hsa_status_t (*callback)(hsa_agent_t agent,
static hsa_iterate_agents_t func = nullptr;
Dispatch<ExternLibHsa>::init(ExternLibHsa::name, &ExternLibHsa::lib);
if (func == nullptr)
#ifdef WIN32
func = reinterpret_cast<hsa_iterate_agents_t>(
GetProcAddress((HMODULE)ExternLibHsa::lib, "hsa_iterate_agents"));
#else
func = reinterpret_cast<hsa_iterate_agents_t>(
dlsym(ExternLibHsa::lib, "hsa_iterate_agents"));
#endif
return (func ? func(callback, data) : HSA_STATUS_ERROR_FATAL);
}

Expand Down
20 changes: 20 additions & 0 deletions third_party/proton/csrc/lib/Driver/GPU/RoctracerApi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,12 @@ namespace roctracer {

struct ExternLibRoctracer : public ExternLibBase {
using RetType = roctracer_status_t;
#ifdef WIN32
// Looks like there is no support for Windows yet
static constexpr const char *name = "roctracer64.dll";
#else
static constexpr const char *name = "libroctracer64.so";
#endif
static constexpr const char *defaultDir = "";
static constexpr RetType success = ROCTRACER_STATUS_SUCCESS;
static void *lib;
Expand All @@ -27,8 +32,13 @@ void start() {
Dispatch<ExternLibRoctracer>::init(ExternLibRoctracer::name,
&ExternLibRoctracer::lib);
if (func == nullptr)
#ifdef WIN32
func = reinterpret_cast<roctracer_start_t>(
GetProcAddress((HMODULE)ExternLibRoctracer::lib, "roctracer_start"));
#else
func = reinterpret_cast<roctracer_start_t>(
dlsym(ExternLibRoctracer::lib, "roctracer_start"));
#endif
if (func)
func();
}
Expand All @@ -39,8 +49,13 @@ void stop() {
Dispatch<ExternLibRoctracer>::init(ExternLibRoctracer::name,
&ExternLibRoctracer::lib);
if (func == nullptr)
#ifdef WIN32
func = reinterpret_cast<roctracer_stop_t>(
GetProcAddress((HMODULE)ExternLibRoctracer::lib, "roctracer_stop"));
#else
func = reinterpret_cast<roctracer_stop_t>(
dlsym(ExternLibRoctracer::lib, "roctracer_stop"));
#endif
if (func)
func();
}
Expand All @@ -51,8 +66,13 @@ char *getOpString(uint32_t domain, uint32_t op, uint32_t kind) {
Dispatch<ExternLibRoctracer>::init(ExternLibRoctracer::name,
&ExternLibRoctracer::lib);
if (func == nullptr)
#ifdef WIN32
func = reinterpret_cast<roctracer_op_string_t>(GetProcAddress(
(HMODULE)ExternLibRoctracer::lib, "roctracer_op_string"));
#else
func = reinterpret_cast<roctracer_op_string_t>(
dlsym(ExternLibRoctracer::lib, "roctracer_op_string"));
#endif
return (func ? func(domain, op, kind) : NULL);
}

Expand Down
8 changes: 8 additions & 0 deletions third_party/proton/csrc/lib/Profiler/Cupti/CuptiProfiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@
#include <memory>
#include <stdexcept>

#ifdef WIN32
#define aligned_alloc _aligned_malloc
#endif

namespace proton {

template <>
Expand Down Expand Up @@ -257,7 +261,11 @@ void CuptiProfiler::CuptiProfilerPimpl::completeBuffer(CUcontext ctx,
}
} while (true);

#ifdef WIN32
_aligned_free(buffer);
#else
std::free(buffer);
#endif

profiler.correlation.complete(maxCorrelationId);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,6 @@
#include <mutex>
#include <tuple>

#include <cxxabi.h>
#include <unistd.h>

namespace proton {

template <>
Expand Down