From 53bdffa1e5b738f59d84941f183252c6a373e1b9 Mon Sep 17 00:00:00 2001 From: Maneesh Gupta Date: Tue, 2 May 2023 09:34:18 +0000 Subject: [PATCH 01/27] SWDEV-1 - - Switch to new patch version Change-Id: I0a0814d335e8c3d9a80c1217d8d2df62e7baa548 --- hipamd/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hipamd/CMakeLists.txt b/hipamd/CMakeLists.txt index 00c6a3f04..94bb8ed63 100755 --- a/hipamd/CMakeLists.txt +++ b/hipamd/CMakeLists.txt @@ -49,7 +49,7 @@ set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE) ############################# option(BUILD_HIPIFY_CLANG "Enable building the CUDA->HIP converter" OFF) option(__HIP_ENABLE_PCH "Enable/Disable pre-compiled hip headers" ON) -option(HIP_OFFICIAL_BUILD "Enable/Disable for mainline/staging builds" OFF) +option(HIP_OFFICIAL_BUILD "Enable/Disable for mainline/staging builds" ON) # Disable file reorg backward compatibility for ASAN packaging if(NOT ENABLE_ASAN_PACKAGING) option(FILE_REORG_BACKWARD_COMPATIBILITY "Enable File Reorg with backward compatibility" ON) From 20792433da431750c78a64d9af0f7e153e7f1678 Mon Sep 17 00:00:00 2001 From: Rahul Garg Date: Wed, 31 May 2023 18:13:22 +0000 Subject: [PATCH 02/27] SWDEV-403384 - Change OpenCL version number from 3581 to 3582 Change-Id: Ia451c84750d33c96c2a435469694794d78fb2725 --- rocclr/utils/versions.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocclr/utils/versions.hpp b/rocclr/utils/versions.hpp index c4e0b8383..9be8848d4 100644 --- a/rocclr/utils/versions.hpp +++ b/rocclr/utils/versions.hpp @@ -28,7 +28,7 @@ #endif // AMD_PLATFORM_NAME #ifndef AMD_PLATFORM_BUILD_NUMBER -#define AMD_PLATFORM_BUILD_NUMBER 3581 +#define AMD_PLATFORM_BUILD_NUMBER 3582 #endif // AMD_PLATFORM_BUILD_NUMBER #ifndef AMD_PLATFORM_REVISION_NUMBER From c080c1405660e9fadbc1e13d945afff83b35e42d Mon Sep 17 00:00:00 2001 From: Rahul Garg Date: Thu, 15 Jun 2023 18:06:45 +0000 Subject: [PATCH 03/27] SWDEV-1 - Bump OpenCL build number to 3583 For last mainline promotion based on 15-May-2023 build. Change-Id: Ia2381211df6807df1c8cb265a625c726cd7ab290 --- rocclr/utils/versions.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocclr/utils/versions.hpp b/rocclr/utils/versions.hpp index 9be8848d4..b70a8aa29 100644 --- a/rocclr/utils/versions.hpp +++ b/rocclr/utils/versions.hpp @@ -28,7 +28,7 @@ #endif // AMD_PLATFORM_NAME #ifndef AMD_PLATFORM_BUILD_NUMBER -#define AMD_PLATFORM_BUILD_NUMBER 3582 +#define AMD_PLATFORM_BUILD_NUMBER 3583 #endif // AMD_PLATFORM_BUILD_NUMBER #ifndef AMD_PLATFORM_REVISION_NUMBER From 276d659929e7981a1b85fc48d362ce54898a3808 Mon Sep 17 00:00:00 2001 From: Maneesh Gupta Date: Wed, 12 Jul 2023 22:56:30 +0000 Subject: [PATCH 04/27] SWDEV-2 - Change OpenCL version number from 3583 to 3588 Change-Id: Ic3916acc6706444717b0c520ed5056e78a76505f --- rocclr/utils/versions.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocclr/utils/versions.hpp b/rocclr/utils/versions.hpp index b70a8aa29..0c0dcab1e 100644 --- a/rocclr/utils/versions.hpp +++ b/rocclr/utils/versions.hpp @@ -28,7 +28,7 @@ #endif // AMD_PLATFORM_NAME #ifndef AMD_PLATFORM_BUILD_NUMBER -#define AMD_PLATFORM_BUILD_NUMBER 3583 +#define AMD_PLATFORM_BUILD_NUMBER 3588 #endif // AMD_PLATFORM_BUILD_NUMBER #ifndef AMD_PLATFORM_REVISION_NUMBER From f5021ed145522ac23ca5fb2736cc8e8be5256f03 Mon Sep 17 00:00:00 2001 From: Maneesh Gupta Date: Wed, 19 Jul 2023 06:44:07 +0000 Subject: [PATCH 05/27] SWDEV-2 - Change OpenCL version number from 3588 to 3590 Change-Id: I85fef1bdc27aea2350ad36184625ebcf3c971c51 --- rocclr/utils/versions.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocclr/utils/versions.hpp b/rocclr/utils/versions.hpp index 0c0dcab1e..08fd3c311 100644 --- a/rocclr/utils/versions.hpp +++ b/rocclr/utils/versions.hpp @@ -28,7 +28,7 @@ #endif // AMD_PLATFORM_NAME #ifndef AMD_PLATFORM_BUILD_NUMBER -#define AMD_PLATFORM_BUILD_NUMBER 3588 +#define AMD_PLATFORM_BUILD_NUMBER 3590 #endif // AMD_PLATFORM_BUILD_NUMBER #ifndef AMD_PLATFORM_REVISION_NUMBER From cefafdba230f82c1b1e5da09146f2a83daa2896d Mon Sep 17 00:00:00 2001 From: Rakesh Roy Date: Thu, 24 Aug 2023 06:21:47 -0700 Subject: [PATCH 06/27] SWDEV-2 - Change OpenCL version number from 3590 to 3594 Change-Id: Ia4fb1f59720c62fc872c0ed7cc4e8625353d7079 --- rocclr/utils/versions.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocclr/utils/versions.hpp b/rocclr/utils/versions.hpp index 08fd3c311..4355beeed 100644 --- a/rocclr/utils/versions.hpp +++ b/rocclr/utils/versions.hpp @@ -28,7 +28,7 @@ #endif // AMD_PLATFORM_NAME #ifndef AMD_PLATFORM_BUILD_NUMBER -#define AMD_PLATFORM_BUILD_NUMBER 3590 +#define AMD_PLATFORM_BUILD_NUMBER 3594 #endif // AMD_PLATFORM_BUILD_NUMBER #ifndef AMD_PLATFORM_REVISION_NUMBER From fe4cce8326ad553df1f96eb1496e892220a86e6e Mon Sep 17 00:00:00 2001 From: Rakesh Roy Date: Tue, 5 Sep 2023 12:41:01 +0000 Subject: [PATCH 07/27] SWDEV-2 - Change OpenCL version number from 3594 to 3596 Change-Id: I0bb923950f814052f51d15d59fabaa2cafd679a9 --- rocclr/utils/versions.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocclr/utils/versions.hpp b/rocclr/utils/versions.hpp index 4355beeed..dbc6a3090 100644 --- a/rocclr/utils/versions.hpp +++ b/rocclr/utils/versions.hpp @@ -28,7 +28,7 @@ #endif // AMD_PLATFORM_NAME #ifndef AMD_PLATFORM_BUILD_NUMBER -#define AMD_PLATFORM_BUILD_NUMBER 3594 +#define AMD_PLATFORM_BUILD_NUMBER 3596 #endif // AMD_PLATFORM_BUILD_NUMBER #ifndef AMD_PLATFORM_REVISION_NUMBER From dd5573216c6a52e83f7f1b037435ad1c88a1b590 Mon Sep 17 00:00:00 2001 From: Rakesh Roy Date: Fri, 29 Sep 2023 14:37:21 +0000 Subject: [PATCH 08/27] SWDEV-2 - Change OpenCL version number from 3596 to 3600 Change-Id: Icc81cb530ffd2ed10daf1c3cbf4c973ec9ab67f1 --- rocclr/utils/versions.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocclr/utils/versions.hpp b/rocclr/utils/versions.hpp index dbc6a3090..97a5b5555 100644 --- a/rocclr/utils/versions.hpp +++ b/rocclr/utils/versions.hpp @@ -28,7 +28,7 @@ #endif // AMD_PLATFORM_NAME #ifndef AMD_PLATFORM_BUILD_NUMBER -#define AMD_PLATFORM_BUILD_NUMBER 3596 +#define AMD_PLATFORM_BUILD_NUMBER 3600 #endif // AMD_PLATFORM_BUILD_NUMBER #ifndef AMD_PLATFORM_REVISION_NUMBER From a505a6a2c44a75574f0e45eb69b948b6ee16ec80 Mon Sep 17 00:00:00 2001 From: Rakesh Roy Date: Mon, 16 Oct 2023 21:31:07 +0530 Subject: [PATCH 09/27] SWDEV-2 - Change OpenCL version number from 3600 to 3602 Change-Id: I7a99a04040e1e1de4f1c9d7ea6e06302f03d5f26 --- rocclr/utils/versions.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocclr/utils/versions.hpp b/rocclr/utils/versions.hpp index 97a5b5555..1f8b4586f 100644 --- a/rocclr/utils/versions.hpp +++ b/rocclr/utils/versions.hpp @@ -28,7 +28,7 @@ #endif // AMD_PLATFORM_NAME #ifndef AMD_PLATFORM_BUILD_NUMBER -#define AMD_PLATFORM_BUILD_NUMBER 3600 +#define AMD_PLATFORM_BUILD_NUMBER 3602 #endif // AMD_PLATFORM_BUILD_NUMBER #ifndef AMD_PLATFORM_REVISION_NUMBER From e8a52205e6517c4103ebb811c509027f2ef824d4 Mon Sep 17 00:00:00 2001 From: Maneesh Gupta Date: Sat, 7 Oct 2023 10:43:03 +0000 Subject: [PATCH 10/27] SWDEV-418100 - Move nv headers to hipother repo Change-Id: Ib54546e366b61bdb83789d8264f30f1bfb875605 --- hipamd/CMakeLists.txt | 33 +- hipamd/hip-backward-compat.cmake | 2 +- hipamd/include/hip/hcc_detail | 1 - hipamd/include/hip/nvcc_detail | 1 - .../nvidia_detail/nvidia_channel_descriptor.h | 28 - .../hip/nvidia_detail/nvidia_hip_atomics.h | 67 - .../hip/nvidia_detail/nvidia_hip_bf16.h | 32 - .../hip/nvidia_detail/nvidia_hip_complex.h | 119 - .../nvidia_hip_cooperative_groups.h | 12 - .../hip/nvidia_detail/nvidia_hip_gl_interop.h | 44 - .../nvidia_detail/nvidia_hip_math_constants.h | 126 - .../hip/nvidia_detail/nvidia_hip_runtime.h | 124 - .../nvidia_detail/nvidia_hip_runtime_api.h | 3793 ----------------- .../nvidia_detail/nvidia_hip_texture_types.h | 6 - .../nvidia_detail/nvidia_hip_unsafe_atomics.h | 100 - .../include/hip/nvidia_detail/nvidia_hiprtc.h | 172 - hipamd/packaging/CMakeLists.txt | 6 +- 17 files changed, 10 insertions(+), 4656 deletions(-) delete mode 120000 hipamd/include/hip/hcc_detail delete mode 120000 hipamd/include/hip/nvcc_detail delete mode 100644 hipamd/include/hip/nvidia_detail/nvidia_channel_descriptor.h delete mode 100644 hipamd/include/hip/nvidia_detail/nvidia_hip_atomics.h delete mode 100644 hipamd/include/hip/nvidia_detail/nvidia_hip_bf16.h delete mode 100644 hipamd/include/hip/nvidia_detail/nvidia_hip_complex.h delete mode 100644 hipamd/include/hip/nvidia_detail/nvidia_hip_cooperative_groups.h delete mode 100644 hipamd/include/hip/nvidia_detail/nvidia_hip_gl_interop.h delete mode 100644 hipamd/include/hip/nvidia_detail/nvidia_hip_math_constants.h delete mode 100644 hipamd/include/hip/nvidia_detail/nvidia_hip_runtime.h delete mode 100644 hipamd/include/hip/nvidia_detail/nvidia_hip_runtime_api.h delete mode 100644 hipamd/include/hip/nvidia_detail/nvidia_hip_texture_types.h delete mode 100644 hipamd/include/hip/nvidia_detail/nvidia_hip_unsafe_atomics.h delete mode 100644 hipamd/include/hip/nvidia_detail/nvidia_hiprtc.h diff --git a/hipamd/CMakeLists.txt b/hipamd/CMakeLists.txt index 1facd7e42..9f68e5a70 100755 --- a/hipamd/CMakeLists.txt +++ b/hipamd/CMakeLists.txt @@ -43,7 +43,6 @@ list(APPEND CMAKE_MODULE_PATH ${HIP_COMMON_DIR}/cmake) ############################# # Options ############################# -option(BUILD_HIPIFY_CLANG "Enable building the CUDA->HIP converter" OFF) option(__HIP_ENABLE_PCH "Enable/Disable pre-compiled hip headers" ON) option(HIP_OFFICIAL_BUILD "Enable/Disable for mainline/staging builds" ON) option(FILE_REORG_BACKWARD_COMPATIBILITY "Enable File Reorg with backward compatibility" OFF) @@ -63,6 +62,7 @@ endif() message(STATUS "HIPCC_BIN_DIR found at ${HIPCC_BIN_DIR}") message(STATUS "HIP_COMMON_DIR found at ${HIP_COMMON_DIR}") +message(STATUS "HIPNV_DIR found at ${HIPNV_DIR}") set(HIP_COMMON_INCLUDE_DIR ${HIP_COMMON_DIR}/include) set(HIP_COMMON_BIN_DIR ${HIP_COMMON_DIR}/bin) set(__HIPCONFIG_EXECUTABLE__ ${HIP_COMMON_DIR}/bin/hipconfig) @@ -270,11 +270,6 @@ set(CONFIG_PACKAGE_INSTALL_DIR ${LIB_INSTALL_DIR}/cmake/hip) set(CONFIG_LANG_PACKAGE_INSTALL_DIR ${LIB_INSTALL_DIR}/cmake/hip-lang) set(CONFIG_RTC_PACKAGE_INSTALL_DIR ${LIB_INSTALL_DIR}/cmake/hiprtc) -# Build clang hipify if enabled -if (BUILD_HIPIFY_CLANG) - add_subdirectory(hipify-clang) -endif() - # Generate hip_version.h set(_versionInfoHeader "// Auto-generated by cmake\n @@ -363,6 +358,9 @@ if(NOT ${INSTALL_SOURCE} EQUAL 0) endif() install(DIRECTORY include DESTINATION .) + if(DEFINED HIPNV_DIR) + install(DIRECTORY ${HIPNV_DIR}/include/hip/ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/hip/) + endif() install(DIRECTORY ${HIP_COMMON_INCLUDE_DIR}/hip/ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/hip/) if(WIN32) install(DIRECTORY ${HIP_COMMON_DIR}/cmake DESTINATION .) @@ -461,29 +459,6 @@ if(CLANGFORMAT_EXE) WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) endif() -############################# -# Testing steps -############################# -# HIT is not compatible with Windows -if(NOT WIN32) - set(HIP_ROOT_DIR ${CMAKE_CURRENT_BINARY_DIR}) - set(HIP_SRC_PATH ${CMAKE_CURRENT_SOURCE_DIR}) - if(HIP_PLATFORM STREQUAL "nvidia") - execute_process(COMMAND "${CMAKE_COMMAND}" -E copy_directory "${HIP_SRC_PATH}/include" "${HIP_ROOT_DIR}/include" RESULT_VARIABLE COPY_COMMAND_OP ERROR_QUIET) - endif() - execute_process(COMMAND "${CMAKE_COMMAND}" -E copy_directory "${HIP_COMMON_INCLUDE_DIR}/hip/" "${HIP_ROOT_DIR}/include/hip/" RESULT_VARIABLE COPY_COMMAND_OP ERROR_QUIET) - execute_process(COMMAND "${CMAKE_COMMAND}" -E copy_directory "${HIP_COMMON_DIR}/cmake" "${HIP_ROOT_DIR}/cmake" RESULT_VARIABLE COPY_COMMAND_OP ERROR_QUIET) - if(${COPY_COMMAND_OP} EQUAL 0) - execute_process(COMMAND "${CMAKE_COMMAND}" -E copy_directory "${HIP_COMMON_BIN_DIR}" "${HIP_ROOT_DIR}/bin" RESULT_VARIABLE COPY_COMMAND_OP ERROR_QUIET) - endif() - - file(COPY ${HIPCC_BIN_DIR}/hipcc DESTINATION ${HIP_ROOT_DIR}/bin/) - file(COPY ${HIPCC_BIN_DIR}/hipcc.pl DESTINATION ${HIP_ROOT_DIR}/bin/) - file(COPY ${HIPCC_BIN_DIR}/hipconfig DESTINATION ${HIP_ROOT_DIR}/bin/) - file(COPY ${HIPCC_BIN_DIR}/hipconfig.pl DESTINATION ${HIP_ROOT_DIR}/bin/) - file(COPY ${HIPCC_BIN_DIR}/hipvars.pm DESTINATION ${HIP_ROOT_DIR}/bin/) -endif() - ############################# # Code analysis ############################# diff --git a/hipamd/hip-backward-compat.cmake b/hipamd/hip-backward-compat.cmake index a625b6321..a0ef58b4e 100644 --- a/hipamd/hip-backward-compat.cmake +++ b/hipamd/hip-backward-compat.cmake @@ -88,7 +88,7 @@ function(generate_wrapper_header) endforeach() #find all header files from include/hip/nvidia_detail - file(GLOB include_files ${HIP_SRC_INC_DIR}/${HIP_NVIDIA_DETAIL_DIR}/*) + file(GLOB include_files ${HIPNV_DIR}/include/hip/${HIP_NVIDIA_DETAIL_DIR}/*) #Convert the list of files into #includes foreach(header_file ${include_files}) # set include guard diff --git a/hipamd/include/hip/hcc_detail b/hipamd/include/hip/hcc_detail deleted file mode 120000 index 4931d4897..000000000 --- a/hipamd/include/hip/hcc_detail +++ /dev/null @@ -1 +0,0 @@ -amd_detail \ No newline at end of file diff --git a/hipamd/include/hip/nvcc_detail b/hipamd/include/hip/nvcc_detail deleted file mode 120000 index e02ee85e4..000000000 --- a/hipamd/include/hip/nvcc_detail +++ /dev/null @@ -1 +0,0 @@ -nvidia_detail \ No newline at end of file diff --git a/hipamd/include/hip/nvidia_detail/nvidia_channel_descriptor.h b/hipamd/include/hip/nvidia_detail/nvidia_channel_descriptor.h deleted file mode 100644 index b5873be17..000000000 --- a/hipamd/include/hip/nvidia_detail/nvidia_channel_descriptor.h +++ /dev/null @@ -1,28 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#ifndef HIP_INCLUDE_HIP_NVIDIA_DETAIL_CHANNEL_DESCRIPTOR_H -#define HIP_INCLUDE_HIP_NVIDIA_DETAIL_CHANNEL_DESCRIPTOR_H - -#include "channel_descriptor.h" - -#endif diff --git a/hipamd/include/hip/nvidia_detail/nvidia_hip_atomics.h b/hipamd/include/hip/nvidia_detail/nvidia_hip_atomics.h deleted file mode 100644 index 19fa9673b..000000000 --- a/hipamd/include/hip/nvidia_detail/nvidia_hip_atomics.h +++ /dev/null @@ -1,67 +0,0 @@ -/* -Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#ifndef HIP_INCLUDE_HIP_NVIDIA_DETAIL_HIP_ATOMICS_H -#define HIP_INCLUDE_HIP_NVIDIA_DETAIL_HIP_ATOMICS_H - - -__device__ inline float atomicMax(float* addr, float val) { - int ret = __float_as_int(*addr); - while (val > __int_as_float(ret)) { - int old = ret; - if ((ret = atomicCAS((int *)addr, old, __float_as_int(val))) == old) - break; - } - return __int_as_float(ret); -} -__device__ inline double atomicMax(double* addr, double val) { - unsigned long long ret = __double_as_longlong(*addr); - while (val > __longlong_as_double(ret)) { - unsigned long long old = ret; - if ((ret = atomicCAS((unsigned long long *)addr, old, __double_as_longlong(val))) == old) - break; - } - return __longlong_as_double(ret); -} - -__device__ inline float atomicMin(float* addr, float val) { - int ret = __float_as_int(*addr); - while (val < __int_as_float(ret)) { - int old = ret; - if ((ret = atomicCAS((int *)addr, old, __float_as_int(val))) == old) - break; - } - return __int_as_float(ret); -} - -__device__ inline double atomicMin(double* addr, double val) { - unsigned long long ret = __double_as_longlong(*addr); - while (val < __longlong_as_double(ret)) { - unsigned long long old = ret; - if ((ret = atomicCAS((unsigned long long *)addr, old, __double_as_longlong(val))) == old) - break; - } - return __longlong_as_double(ret); -} - - -#endif diff --git a/hipamd/include/hip/nvidia_detail/nvidia_hip_bf16.h b/hipamd/include/hip/nvidia_detail/nvidia_hip_bf16.h deleted file mode 100644 index 118996af1..000000000 --- a/hipamd/include/hip/nvidia_detail/nvidia_hip_bf16.h +++ /dev/null @@ -1,32 +0,0 @@ -/* -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - - -#ifndef HIP_INCLUDE_HIP_NVIDIA_DETAIL_HIP_FP16_H -#define HIP_INCLUDE_HIP_NVIDIA_DETAIL_HIP_FP16_H - -#include - -typedef struct __nv_bfloat16 __hip_bfloat16; -typedef struct __nv_bfloat162 __hip_bfloat162; - -#endif // HIP_INCLUDE_HIP_NVIDIA_DETAIL_HIP_FP16_H diff --git a/hipamd/include/hip/nvidia_detail/nvidia_hip_complex.h b/hipamd/include/hip/nvidia_detail/nvidia_hip_complex.h deleted file mode 100644 index c6a7cc28b..000000000 --- a/hipamd/include/hip/nvidia_detail/nvidia_hip_complex.h +++ /dev/null @@ -1,119 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#ifndef HIP_INCLUDE_HIP_NVIDIA_DETAIL_HIP_COMPLEX_H -#define HIP_INCLUDE_HIP_NVIDIA_DETAIL_HIP_COMPLEX_H - -#include "cuComplex.h" - -typedef cuFloatComplex hipFloatComplex; - -__device__ __host__ static inline float hipCrealf(hipFloatComplex z) { return cuCrealf(z); } - -__device__ __host__ static inline float hipCimagf(hipFloatComplex z) { return cuCimagf(z); } - -__device__ __host__ static inline hipFloatComplex make_hipFloatComplex(float a, float b) { - return make_cuFloatComplex(a, b); -} - -__device__ __host__ static inline hipFloatComplex hipConjf(hipFloatComplex z) { return cuConjf(z); } - -__device__ __host__ static inline float hipCsqabsf(hipFloatComplex z) { - return cuCabsf(z) * cuCabsf(z); -} - -__device__ __host__ static inline hipFloatComplex hipCaddf(hipFloatComplex p, hipFloatComplex q) { - return cuCaddf(p, q); -} - -__device__ __host__ static inline hipFloatComplex hipCsubf(hipFloatComplex p, hipFloatComplex q) { - return cuCsubf(p, q); -} - -__device__ __host__ static inline hipFloatComplex hipCmulf(hipFloatComplex p, hipFloatComplex q) { - return cuCmulf(p, q); -} - -__device__ __host__ static inline hipFloatComplex hipCdivf(hipFloatComplex p, hipFloatComplex q) { - return cuCdivf(p, q); -} - -__device__ __host__ static inline float hipCabsf(hipFloatComplex z) { return cuCabsf(z); } - -typedef cuDoubleComplex hipDoubleComplex; - -__device__ __host__ static inline double hipCreal(hipDoubleComplex z) { return cuCreal(z); } - -__device__ __host__ static inline double hipCimag(hipDoubleComplex z) { return cuCimag(z); } - -__device__ __host__ static inline hipDoubleComplex make_hipDoubleComplex(double a, double b) { - return make_cuDoubleComplex(a, b); -} - -__device__ __host__ static inline hipDoubleComplex hipConj(hipDoubleComplex z) { return cuConj(z); } - -__device__ __host__ static inline double hipCsqabs(hipDoubleComplex z) { - return cuCabs(z) * cuCabs(z); -} - -__device__ __host__ static inline hipDoubleComplex hipCadd(hipDoubleComplex p, hipDoubleComplex q) { - return cuCadd(p, q); -} - -__device__ __host__ static inline hipDoubleComplex hipCsub(hipDoubleComplex p, hipDoubleComplex q) { - return cuCsub(p, q); -} - -__device__ __host__ static inline hipDoubleComplex hipCmul(hipDoubleComplex p, hipDoubleComplex q) { - return cuCmul(p, q); -} - -__device__ __host__ static inline hipDoubleComplex hipCdiv(hipDoubleComplex p, hipDoubleComplex q) { - return cuCdiv(p, q); -} - -__device__ __host__ static inline double hipCabs(hipDoubleComplex z) { return cuCabs(z); } - -typedef cuFloatComplex hipComplex; - -__device__ __host__ static inline hipComplex make_hipComplex(float x, float y) { - return make_cuComplex(x, y); -} - -__device__ __host__ static inline hipFloatComplex hipComplexDoubleToFloat(hipDoubleComplex z) { - return cuComplexDoubleToFloat(z); -} - -__device__ __host__ static inline hipDoubleComplex hipComplexFloatToDouble(hipFloatComplex z) { - return cuComplexFloatToDouble(z); -} - -__device__ __host__ static inline hipComplex hipCfmaf(hipComplex p, hipComplex q, hipComplex r) { - return cuCfmaf(p, q, r); -} - -__device__ __host__ static inline hipDoubleComplex hipCfma(hipDoubleComplex p, hipDoubleComplex q, - hipDoubleComplex r) { - return cuCfma(p, q, r); -} - -#endif diff --git a/hipamd/include/hip/nvidia_detail/nvidia_hip_cooperative_groups.h b/hipamd/include/hip/nvidia_detail/nvidia_hip_cooperative_groups.h deleted file mode 100644 index fc98ae228..000000000 --- a/hipamd/include/hip/nvidia_detail/nvidia_hip_cooperative_groups.h +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef HIP_INCLUDE_HIP_NVIDIA_DETAIL_HIP_COOPERATIVE_GROUPS_H -#define HIP_INCLUDE_HIP_NVIDIA_DETAIL_HIP_COOPERATIVE_GROUPS_H - -// Include CUDA headers -#include -#include - -// Include HIP wrapper headers around CUDA -#include -#include - -#endif // HIP_INCLUDE_HIP_NVIDIA_DETAIL_HIP_COOPERATIVE_GROUPS_H diff --git a/hipamd/include/hip/nvidia_detail/nvidia_hip_gl_interop.h b/hipamd/include/hip/nvidia_detail/nvidia_hip_gl_interop.h deleted file mode 100644 index 000d5e7c0..000000000 --- a/hipamd/include/hip/nvidia_detail/nvidia_hip_gl_interop.h +++ /dev/null @@ -1,44 +0,0 @@ -/* -Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ -#ifndef HIP_INCLUDE_NVIDIA_HIP_GL_INTEROP_H -#define HIP_INCLUDE_NVIDIA_HIP_GL_INTEROP_H - -#include - -typedef enum cudaGLDeviceList hipGLDeviceList; -#define hipGLDeviceListAll cudaGLDeviceListAll -#define hipGLDeviceListCurrentFrame cudaGLDeviceListCurrentFrame -#define hipGLDeviceListNextFrame cudaGLDeviceListNextFrame - -inline static hipError_t hipGLGetDevices(unsigned int* pHipDeviceCount, int* pHipDevices, unsigned int hipDeviceCount, - hipGLDeviceList deviceList) { - return hipCUDAErrorTohipError(cudaGLGetDevices(pHipDeviceCount, pHipDevices, hipDeviceCount, deviceList)); -} - -inline static hipError_t hipGraphicsGLRegisterBuffer(hipGraphicsResource** resource, GLuint buffer, unsigned int flags) { - return hipCUDAErrorTohipError(cudaGraphicsGLRegisterBuffer(resource, buffer, flags)); -} - -inline static hipError_t hipGraphicsGLRegisterImage(hipGraphicsResource** resource, GLuint image, GLenum target, unsigned int flags) { - return hipCUDAErrorTohipError(cudaGraphicsGLRegisterImage(resource, image, target, flags)); -} -#endif \ No newline at end of file diff --git a/hipamd/include/hip/nvidia_detail/nvidia_hip_math_constants.h b/hipamd/include/hip/nvidia_detail/nvidia_hip_math_constants.h deleted file mode 100644 index 8b53e853f..000000000 --- a/hipamd/include/hip/nvidia_detail/nvidia_hip_math_constants.h +++ /dev/null @@ -1,126 +0,0 @@ -/* -Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ -#ifndef NVIDIA_HIP_MATH_CONSTANTS_H -#define NVIDIA_HIP_MATH_CONSTANTS_H - -#include - -// single precision constants -#define HIP_INF_F CUDART_INF_F -#define HIP_NAN_F CUDART_NAN_F -#define HIP_MIN_DENORM_F CUDART_MIN_DENORM_F -#define HIP_MAX_NORMAL_F CUDART_MAX_NORMAL_F -#define HIP_NEG_ZERO_F CUDART_NEG_ZERO_F -#define HIP_ZERO_F CUDART_ZERO_F -#define HIP_ONE_F CUDART_ONE_F -#define HIP_SQRT_HALF_F CUDART_SQRT_HALF_F -#define HIP_SQRT_HALF_HI_F CUDART_SQRT_HALF_HI_F -#define HIP_SQRT_HALF_LO_F CUDART_SQRT_HALF_LO_F -#define HIP_SQRT_TWO_F CUDART_SQRT_TWO_F -#define HIP_THIRD_F CUDART_THIRD_F -#define HIP_PIO4_F CUDART_PIO4_F -#define HIP_PIO2_F CUDART_PIO2_F -#define HIP_3PIO4_F CUDART_3PIO4_F -#define HIP_2_OVER_PI_F CUDART_2_OVER_PI_F -#define HIP_SQRT_2_OVER_PI_F CUDART_SQRT_2_OVER_PI_F -#define HIP_PI_F CUDART_PI_F -#define HIP_L2E_F CUDART_L2E_F -#define HIP_L2T_F CUDART_L2T_F -#define HIP_LG2_F CUDART_LG2_F -#define HIP_LGE_F CUDART_LGE_F -#define HIP_LN2_F CUDART_LN2_F -#define HIP_LNT_F CUDART_LNT_F -#define HIP_LNPI_F CUDART_LNPI_F -#define HIP_TWO_TO_M126_F CUDART_TWO_TO_M126_F -#define HIP_TWO_TO_126_F CUDART_TWO_TO_126_F -#define HIP_NORM_HUGE_F CUDART_NORM_HUGE_F -#define HIP_TWO_TO_23_F CUDART_TWO_TO_23_F -#define HIP_TWO_TO_24_F CUDART_TWO_TO_24_F -#define HIP_TWO_TO_31_F CUDART_TWO_TO_31_F -#define HIP_TWO_TO_32_F CUDART_TWO_TO_32_F -#define HIP_REMQUO_BITS_F CUDART_REMQUO_BITS_F -#define HIP_REMQUO_MASK_F CUDART_REMQUO_MASK_F -#define HIP_TRIG_PLOSS_F CUDART_TRIG_PLOSS_F - -// double precision constants -#define HIP_INF CUDART_INF -#define HIP_NAN CUDART_NAN -#define HIP_NEG_ZERO CUDART_NEG_ZERO -#define HIP_MIN_DENORM CUDART_MIN_DENORM -#define HIP_ZERO CUDART_ZERO -#define HIP_ONE CUDART_ONE -#define HIP_SQRT_TWO CUDART_SQRT_TWO -#define HIP_SQRT_HALF CUDART_SQRT_HALF -#define HIP_SQRT_HALF_HI CUDART_SQRT_HALF_HI -#define HIP_SQRT_HALF_LO CUDART_SQRT_HALF_LO -#define HIP_THIRD CUDART_THIRD -#define HIP_TWOTHIRD CUDART_TWOTHIRD -#define HIP_PIO4 CUDART_PIO4 -#define HIP_PIO4_HI CUDART_PIO4_HI -#define HIP_PIO4_LO CUDART_PIO4_LO -#define HIP_PIO2 CUDART_PIO2 -#define HIP_PIO2_HI CUDART_PIO2_HI -#define HIP_PIO2_LO CUDART_PIO2_LO -#define HIP_3PIO4 CUDART_3PIO4 -#define HIP_2_OVER_PI CUDART_2_OVER_PI -#define HIP_PI CUDART_PI -#define HIP_PI_HI CUDART_PI_HI -#define HIP_PI_LO CUDART_PI_LO -#define HIP_SQRT_2PI CUDART_SQRT_2PI -#define HIP_SQRT_2PI_HI CUDART_SQRT_2PI_HI -#define HIP_SQRT_2PI_LO CUDART_SQRT_2PI_LO -#define HIP_SQRT_PIO2 CUDART_SQRT_PIO2 -#define HIP_SQRT_PIO2_HI CUDART_SQRT_PIO2_HI -#define HIP_SQRT_PIO2_LO CUDART_SQRT_PIO2_LO -#define HIP_SQRT_2OPI CUDART_SQRT_2OPI -#define HIP_L2E CUDART_L2E -#define HIP_L2E_HI CUDART_L2E_HI -#define HIP_L2E_LO CUDART_L2E_LO -#define HIP_L2T CUDART_L2T -#define HIP_LG2 CUDART_LG2 -#define HIP_LG2_HI CUDART_LG2_HI -#define HIP_LG2_LO CUDART_LG2_LO -#define HIP_LGE CUDART_LGE -#define HIP_LGE_HI CUDART_LGE_HI -#define HIP_LGE_LO CUDART_LGE_LO -#define HIP_LN2 CUDART_LN2 -#define HIP_LN2_HI CUDART_LN2_HI -#define HIP_LN2_LO CUDART_LN2_LO -#define HIP_LNT CUDART_LNT -#define HIP_LNT_HI CUDART_LNT_HI -#define HIP_LNT_LO CUDART_LNT_LO -#define HIP_LNPI CUDART_LNPI -#define HIP_LN2_X_1024 CUDART_LN2_X_1024 -#define HIP_LN2_X_1025 CUDART_LN2_X_1025 -#define HIP_LN2_X_1075 CUDART_LN2_X_1075 -#define HIP_LG2_X_1024 CUDART_LG2_X_1024 -#define HIP_LG2_X_1075 CUDART_LG2_X_1075 -#define HIP_TWO_TO_23 CUDART_TWO_TO_23 -#define HIP_TWO_TO_52 CUDART_TWO_TO_52 -#define HIP_TWO_TO_53 CUDART_TWO_TO_53 -#define HIP_TWO_TO_54 CUDART_TWO_TO_54 -#define HIP_TWO_TO_M54 CUDART_TWO_TO_M54 -#define HIP_TWO_TO_M1022 CUDART_TWO_TO_M1022 -#define HIP_TRIG_PLOSS CUDART_TRIG_PLOSS -#define HIP_DBL2INT_CVT CUDART_DBL2INT_CVT - -#endif diff --git a/hipamd/include/hip/nvidia_detail/nvidia_hip_runtime.h b/hipamd/include/hip/nvidia_detail/nvidia_hip_runtime.h deleted file mode 100644 index c63e35700..000000000 --- a/hipamd/include/hip/nvidia_detail/nvidia_hip_runtime.h +++ /dev/null @@ -1,124 +0,0 @@ -/* -Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#ifndef HIP_INCLUDE_HIP_NVIDIA_DETAIL_HIP_RUNTIME_H -#define HIP_INCLUDE_HIP_NVIDIA_DETAIL_HIP_RUNTIME_H - -#include - -#include - -#define HIP_KERNEL_NAME(...) __VA_ARGS__ - -typedef int hipLaunchParm; - -#define hipLaunchKernelGGLInternal(kernelName, numBlocks, numThreads, memPerBlock, streamId, ...) \ - do { \ - kernelName<<>>(__VA_ARGS__); \ - } while (0) - -#define hipLaunchKernelGGL(kernelName, ...) hipLaunchKernelGGLInternal((kernelName), __VA_ARGS__) - -#define hipReadModeElementType cudaReadModeElementType - -#ifdef __CUDA_ARCH__ - - -// 32-bit Atomics: -#define __HIP_ARCH_HAS_GLOBAL_INT32_ATOMICS__ (__CUDA_ARCH__ >= 110) -#define __HIP_ARCH_HAS_GLOBAL_FLOAT_ATOMIC_EXCH__ (__CUDA_ARCH__ >= 110) -#define __HIP_ARCH_HAS_SHARED_INT32_ATOMICS__ (__CUDA_ARCH__ >= 120) -#define __HIP_ARCH_HAS_SHARED_FLOAT_ATOMIC_EXCH__ (__CUDA_ARCH__ >= 120) -#define __HIP_ARCH_HAS_FLOAT_ATOMIC_ADD__ (__CUDA_ARCH__ >= 200) - -// 64-bit Atomics: -#define __HIP_ARCH_HAS_GLOBAL_INT64_ATOMICS__ (__CUDA_ARCH__ >= 200) -#define __HIP_ARCH_HAS_SHARED_INT64_ATOMICS__ (__CUDA_ARCH__ >= 120) - -// Doubles -#define __HIP_ARCH_HAS_DOUBLES__ (__CUDA_ARCH__ >= 120) - -// warp cross-lane operations: -#define __HIP_ARCH_HAS_WARP_VOTE__ (__CUDA_ARCH__ >= 120) -#define __HIP_ARCH_HAS_WARP_BALLOT__ (__CUDA_ARCH__ >= 200) -#define __HIP_ARCH_HAS_WARP_SHUFFLE__ (__CUDA_ARCH__ >= 300) -#define __HIP_ARCH_HAS_WARP_FUNNEL_SHIFT__ (__CUDA_ARCH__ >= 350) - -// sync -#define __HIP_ARCH_HAS_THREAD_FENCE_SYSTEM__ (__CUDA_ARCH__ >= 200) -#define __HIP_ARCH_HAS_SYNC_THREAD_EXT__ (__CUDA_ARCH__ >= 200) - -// misc -#define __HIP_ARCH_HAS_SURFACE_FUNCS__ (__CUDA_ARCH__ >= 200) -#define __HIP_ARCH_HAS_3DGRID__ (__CUDA_ARCH__ >= 200) -#define __HIP_ARCH_HAS_DYNAMIC_PARALLEL__ (__CUDA_ARCH__ >= 350) - -#endif - -#ifdef __CUDACC__ - -#include "nvidia_hip_atomics.h" -#include "nvidia_hip_unsafe_atomics.h" - -#define hipThreadIdx_x threadIdx.x -#define hipThreadIdx_y threadIdx.y -#define hipThreadIdx_z threadIdx.z - -#define hipBlockIdx_x blockIdx.x -#define hipBlockIdx_y blockIdx.y -#define hipBlockIdx_z blockIdx.z - -#define hipBlockDim_x blockDim.x -#define hipBlockDim_y blockDim.y -#define hipBlockDim_z blockDim.z - -#define hipGridDim_x gridDim.x -#define hipGridDim_y gridDim.y -#define hipGridDim_z gridDim.z - -#define HIP_SYMBOL(X) &X - -/** - * Map HIP_DYNAMIC_SHARED to "extern __shared__" for compatibility with old HIP applications - * To be removed in a future release. - */ -#define HIP_DYNAMIC_SHARED(type, var) extern __shared__ type var[]; -#define HIP_DYNAMIC_SHARED_ATTRIBUTE - -#ifdef __HIP_DEVICE_COMPILE__ -#define abort_() \ - { asm("trap;"); } -#undef assert -#define assert(COND) \ - { \ - if (!COND) { \ - abort_(); \ - } \ - } -#endif - -#define __clock() clock() -#define __clock64() clock64() - -#endif - -#endif diff --git a/hipamd/include/hip/nvidia_detail/nvidia_hip_runtime_api.h b/hipamd/include/hip/nvidia_detail/nvidia_hip_runtime_api.h deleted file mode 100644 index c340cb0b0..000000000 --- a/hipamd/include/hip/nvidia_detail/nvidia_hip_runtime_api.h +++ /dev/null @@ -1,3793 +0,0 @@ -/* -Copyright (c) 2015 - 2022 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#ifndef HIP_INCLUDE_HIP_NVIDIA_DETAIL_HIP_RUNTIME_API_H -#define HIP_INCLUDE_HIP_NVIDIA_DETAIL_HIP_RUNTIME_API_H - -#include -#include -#include -#include - -#include - -#define CUDA_9000 9000 -#define CUDA_10010 10010 -#define CUDA_10020 10020 -#define CUDA_11010 11010 -#define CUDA_11020 11020 -#define CUDA_11030 11030 -#define CUDA_11040 11040 -#define CUDA_11060 11060 -#define CUDA_12000 12000 - -#ifdef __cplusplus -extern "C" { -#endif - -#ifdef __cplusplus -#define __dparm(x) = x -#else -#define __dparm(x) -#endif - -// Add Deprecated Support for CUDA Mapped HIP APIs -#if defined(__DOXYGEN_ONLY__) || defined(HIP_ENABLE_DEPRECATED) -#define __HIP_DEPRECATED -#elif defined(_MSC_VER) -#define __HIP_DEPRECATED __declspec(deprecated) -#elif defined(__GNUC__) -#define __HIP_DEPRECATED __attribute__((deprecated)) -#else -#define __HIP_DEPRECATED -#endif - -// Add Deprecated Support for CUDA Mapped HIP APIs -#if defined(__DOXYGEN_ONLY__) || defined(HIP_ENABLE_DEPRECATED) -#define __HIP_DEPRECATED_MSG(msg) -#elif defined(_MSC_VER) -#define __HIP_DEPRECATED_MSG(msg) __declspec(deprecated(msg)) -#elif defined(__GNUC__) -#define __HIP_DEPRECATED_MSG(msg) __attribute__((deprecated(msg))) -#else -#define __HIP_DEPRECATED_MSG(msg) -#endif - - -// TODO -move to include/hip_runtime_api.h as a common implementation. -/** - * Memory copy types - * - */ -typedef enum cudaMemcpyKind hipMemcpyKind; -#define hipMemcpyHostToHost cudaMemcpyHostToHost -#define hipMemcpyHostToDevice cudaMemcpyHostToDevice -#define hipMemcpyDeviceToHost cudaMemcpyDeviceToHost -#define hipMemcpyDeviceToDevice cudaMemcpyDeviceToDevice -#define hipMemcpyDefault cudaMemcpyDefault - -typedef enum hipMemoryAdvise { - hipMemAdviseSetReadMostly, - hipMemAdviseUnsetReadMostly, - hipMemAdviseSetPreferredLocation, - hipMemAdviseUnsetPreferredLocation, - hipMemAdviseSetAccessedBy, - hipMemAdviseUnsetAccessedBy -} hipMemoryAdvise; - -// hipDataType -#define hipDataType cudaDataType -#define HIP_R_16F CUDA_R_16F -#define HIP_C_16F CUDA_C_16F -#define HIP_R_16BF CUDA_R_16BF -#define HIP_C_16BF CUDA_C_16BF -#define HIP_R_32F CUDA_R_32F -#define HIP_C_32F CUDA_C_32F -#define HIP_R_64F CUDA_R_64F -#define HIP_C_64F CUDA_C_64F -#define HIP_R_4I CUDA_R_4I -#define HIP_C_4I CUDA_C_4I -#define HIP_R_4U CUDA_R_4U -#define HIP_C_4U CUDA_C_4U -#define HIP_R_8I CUDA_R_8I -#define HIP_C_8I CUDA_C_8I -#define HIP_R_8U CUDA_R_8U -#define HIP_C_8U CUDA_C_8U -#define HIP_R_16I CUDA_R_16I -#define HIP_C_16I CUDA_C_16I -#define HIP_R_16U CUDA_R_16U -#define HIP_C_16U CUDA_C_16U -#define HIP_R_32I CUDA_R_32I -#define HIP_C_32I CUDA_C_32I -#define HIP_R_32U CUDA_R_32U -#define HIP_C_32U CUDA_C_32U -#define HIP_R_64I CUDA_R_64I -#define HIP_C_64I CUDA_C_64I -#define HIP_R_64U CUDA_R_64U -#define HIP_C_64U CUDA_C_64U -#define HIP_R_8F_E4M3 CUDA_R_8F_E4M3 -#define HIP_R_8F_E5M2 CUDA_R_8F_E5M2 - -// hip stream operation masks -#define STREAM_OPS_WAIT_MASK_32 0xFFFFFFFF -#define STREAM_OPS_WAIT_MASK_64 0xFFFFFFFFFFFFFFFF - -// stream operation flags -#define hipStreamWaitValueGte CU_STREAM_WAIT_VALUE_GEQ -#define hipStreamWaitValueEq CU_STREAM_WAIT_VALUE_EQ -#define hipStreamWaitValueAnd CU_STREAM_WAIT_VALUE_AND -#define hipStreamWaitValueNor CU_STREAM_WAIT_VALUE_NOR - -// hipLibraryPropertyType -#define hipLibraryPropertyType libraryPropertyType -#define HIP_LIBRARY_MAJOR_VERSION MAJOR_VERSION -#define HIP_LIBRARY_MINOR_VERSION MINOR_VERSION -#define HIP_LIBRARY_PATCH_LEVEL PATCH_LEVEL - -#define HIP_ARRAY_DESCRIPTOR CUDA_ARRAY_DESCRIPTOR -#define HIP_ARRAY3D_DESCRIPTOR CUDA_ARRAY3D_DESCRIPTOR - -//hipArray_Format -#define HIP_AD_FORMAT_UNSIGNED_INT8 CU_AD_FORMAT_UNSIGNED_INT8 -#define HIP_AD_FORMAT_UNSIGNED_INT16 CU_AD_FORMAT_UNSIGNED_INT16 -#define HIP_AD_FORMAT_UNSIGNED_INT32 CU_AD_FORMAT_UNSIGNED_INT32 -#define HIP_AD_FORMAT_SIGNED_INT8 CU_AD_FORMAT_SIGNED_INT8 -#define HIP_AD_FORMAT_SIGNED_INT16 CU_AD_FORMAT_SIGNED_INT16 -#define HIP_AD_FORMAT_SIGNED_INT32 CU_AD_FORMAT_SIGNED_INT32 -#define HIP_AD_FORMAT_HALF CU_AD_FORMAT_HALF -#define HIP_AD_FORMAT_FLOAT CU_AD_FORMAT_FLOAT - -// hipArray_Format -#define hipArray_Format CUarray_format - -inline static CUarray_format hipArray_FormatToCUarray_format( - hipArray_Format format) { - switch (format) { - case HIP_AD_FORMAT_UNSIGNED_INT8: - return CU_AD_FORMAT_UNSIGNED_INT8; - case HIP_AD_FORMAT_UNSIGNED_INT16: - return CU_AD_FORMAT_UNSIGNED_INT16; - case HIP_AD_FORMAT_UNSIGNED_INT32: - return CU_AD_FORMAT_UNSIGNED_INT32; - case HIP_AD_FORMAT_SIGNED_INT8: - return CU_AD_FORMAT_SIGNED_INT8; - case HIP_AD_FORMAT_SIGNED_INT16: - return CU_AD_FORMAT_SIGNED_INT16; - case HIP_AD_FORMAT_SIGNED_INT32: - return CU_AD_FORMAT_SIGNED_INT32; - case HIP_AD_FORMAT_HALF: - return CU_AD_FORMAT_HALF; - case HIP_AD_FORMAT_FLOAT: - return CU_AD_FORMAT_FLOAT; - default: - return CU_AD_FORMAT_UNSIGNED_INT8; - } -} - -#define HIP_TR_ADDRESS_MODE_WRAP CU_TR_ADDRESS_MODE_WRAP -#define HIP_TR_ADDRESS_MODE_CLAMP CU_TR_ADDRESS_MODE_CLAMP -#define HIP_TR_ADDRESS_MODE_MIRROR CU_TR_ADDRESS_MODE_MIRROR -#define HIP_TR_ADDRESS_MODE_BORDER CU_TR_ADDRESS_MODE_BORDER - -// hipAddress_mode -#define hipAddress_mode CUaddress_mode - -inline static CUaddress_mode hipAddress_modeToCUaddress_mode( - hipAddress_mode mode) { - switch (mode) { - case HIP_TR_ADDRESS_MODE_WRAP: - return CU_TR_ADDRESS_MODE_WRAP; - case HIP_TR_ADDRESS_MODE_CLAMP: - return CU_TR_ADDRESS_MODE_CLAMP; - case HIP_TR_ADDRESS_MODE_MIRROR: - return CU_TR_ADDRESS_MODE_MIRROR; - case HIP_TR_ADDRESS_MODE_BORDER: - return CU_TR_ADDRESS_MODE_BORDER; - default: - return CU_TR_ADDRESS_MODE_WRAP; - } -} - -#define HIP_TR_FILTER_MODE_POINT CU_TR_FILTER_MODE_POINT -#define HIP_TR_FILTER_MODE_LINEAR CU_TR_FILTER_MODE_LINEAR - -// hipFilter_mode -#define hipFilter_mode CUfilter_mode - -inline static CUfilter_mode hipFilter_mode_enumToCUfilter_mode( - hipFilter_mode mode) { - switch (mode) { - case HIP_TR_FILTER_MODE_POINT: - return CU_TR_FILTER_MODE_POINT; - case HIP_TR_FILTER_MODE_LINEAR: - return CU_TR_FILTER_MODE_LINEAR; - default: - return CU_TR_FILTER_MODE_POINT; - } -} - -//hipResourcetype -#define HIP_RESOURCE_TYPE_ARRAY CU_RESOURCE_TYPE_ARRAY -#define HIP_RESOURCE_TYPE_MIPMAPPED_ARRAY CU_RESOURCE_TYPE_MIPMAPPED_ARRAY -#define HIP_RESOURCE_TYPE_LINEAR CU_RESOURCE_TYPE_LINEAR -#define HIP_RESOURCE_TYPE_PITCH2D CU_RESOURCE_TYPE_PITCH2D - -// hipResourcetype -#define hipResourcetype CUresourcetype - -inline static CUresourcetype hipResourcetype_enumToCUresourcetype( - hipResourcetype resType) { - switch (resType) { - case HIP_RESOURCE_TYPE_ARRAY: - return CU_RESOURCE_TYPE_ARRAY; - case HIP_RESOURCE_TYPE_MIPMAPPED_ARRAY: - return CU_RESOURCE_TYPE_MIPMAPPED_ARRAY; - case HIP_RESOURCE_TYPE_LINEAR: - return CU_RESOURCE_TYPE_LINEAR; - case HIP_RESOURCE_TYPE_PITCH2D: - return CU_RESOURCE_TYPE_PITCH2D; - default: - return CU_RESOURCE_TYPE_ARRAY; - } -} - -// hipStreamPerThread -#define hipStreamPerThread ((cudaStream_t)2) - -#define hipTexRef CUtexref -#define hiparray CUarray -typedef CUmipmappedArray hipmipmappedArray; -typedef cudaMipmappedArray_t hipMipmappedArray_t; - -#define HIP_TRSA_OVERRIDE_FORMAT CU_TRSA_OVERRIDE_FORMAT -#define HIP_TRSF_READ_AS_INTEGER CU_TRSF_READ_AS_INTEGER -#define HIP_TRSF_NORMALIZED_COORDINATES CU_TRSF_NORMALIZED_COORDINATES -#define HIP_TRSF_SRGB CU_TRSF_SRGB - -// hipTextureAddressMode -typedef enum cudaTextureAddressMode hipTextureAddressMode; -#define hipAddressModeWrap cudaAddressModeWrap -#define hipAddressModeClamp cudaAddressModeClamp -#define hipAddressModeMirror cudaAddressModeMirror -#define hipAddressModeBorder cudaAddressModeBorder - -// hipTextureFilterMode -typedef enum cudaTextureFilterMode hipTextureFilterMode; -#define hipFilterModePoint cudaFilterModePoint -#define hipFilterModeLinear cudaFilterModeLinear - -// hipTextureReadMode -typedef enum cudaTextureReadMode hipTextureReadMode; -#define hipReadModeElementType cudaReadModeElementType -#define hipReadModeNormalizedFloat cudaReadModeNormalizedFloat - -// hipChannelFormatKind -typedef enum cudaChannelFormatKind hipChannelFormatKind; -#define hipChannelFormatKindSigned cudaChannelFormatKindSigned -#define hipChannelFormatKindUnsigned cudaChannelFormatKindUnsigned -#define hipChannelFormatKindFloat cudaChannelFormatKindFloat -#define hipChannelFormatKindNone cudaChannelFormatKindNone - -// hipMemRangeAttribute -typedef enum cudaMemRangeAttribute hipMemRangeAttribute; -#define hipMemRangeAttributeReadMostly cudaMemRangeAttributeReadMostly -#define hipMemRangeAttributePreferredLocation cudaMemRangeAttributePreferredLocation -#define hipMemRangeAttributeAccessedBy cudaMemRangeAttributeAccessedBy -#define hipMemRangeAttributeLastPrefetchLocation cudaMemRangeAttributeLastPrefetchLocation - -#define hipSurfaceBoundaryMode cudaSurfaceBoundaryMode -#define hipBoundaryModeZero cudaBoundaryModeZero -#define hipBoundaryModeTrap cudaBoundaryModeTrap -#define hipBoundaryModeClamp cudaBoundaryModeClamp - -// hipFuncCache -#define hipFuncCachePreferNone cudaFuncCachePreferNone -#define hipFuncCachePreferShared cudaFuncCachePreferShared -#define hipFuncCachePreferL1 cudaFuncCachePreferL1 -#define hipFuncCachePreferEqual cudaFuncCachePreferEqual - -// hipResourceType -#define hipResourceType cudaResourceType -#define hipResourceTypeArray cudaResourceTypeArray -#define hipResourceTypeMipmappedArray cudaResourceTypeMipmappedArray -#define hipResourceTypeLinear cudaResourceTypeLinear -#define hipResourceTypePitch2D cudaResourceTypePitch2D -// -// hipErrorNoDevice. - -// hipResourceViewFormat -typedef enum cudaResourceViewFormat hipResourceViewFormat; -#define hipResViewFormatNone cudaResViewFormatNone -#define hipResViewFormatUnsignedChar1 cudaResViewFormatUnsignedChar1 -#define hipResViewFormatUnsignedChar2 cudaResViewFormatUnsignedChar2 -#define hipResViewFormatUnsignedChar4 cudaResViewFormatUnsignedChar4 -#define hipResViewFormatSignedChar1 cudaResViewFormatSignedChar1 -#define hipResViewFormatSignedChar2 cudaResViewFormatSignedChar2 -#define hipResViewFormatSignedChar4 cudaResViewFormatSignedChar4 -#define hipResViewFormatUnsignedShort1 cudaResViewFormatUnsignedShort1 -#define hipResViewFormatUnsignedShort2 cudaResViewFormatUnsignedShort2 -#define hipResViewFormatUnsignedShort4 cudaResViewFormatUnsignedShort4 -#define hipResViewFormatSignedShort1 cudaResViewFormatSignedShort1 -#define hipResViewFormatSignedShort2 cudaResViewFormatSignedShort2 -#define hipResViewFormatSignedShort4 cudaResViewFormatSignedShort4 -#define hipResViewFormatUnsignedInt1 cudaResViewFormatUnsignedInt1 -#define hipResViewFormatUnsignedInt2 cudaResViewFormatUnsignedInt2 -#define hipResViewFormatUnsignedInt4 cudaResViewFormatUnsignedInt4 -#define hipResViewFormatSignedInt1 cudaResViewFormatSignedInt1 -#define hipResViewFormatSignedInt2 cudaResViewFormatSignedInt2 -#define hipResViewFormatSignedInt4 cudaResViewFormatSignedInt4 -#define hipResViewFormatHalf1 cudaResViewFormatHalf1 -#define hipResViewFormatHalf2 cudaResViewFormatHalf2 -#define hipResViewFormatHalf4 cudaResViewFormatHalf4 -#define hipResViewFormatFloat1 cudaResViewFormatFloat1 -#define hipResViewFormatFloat2 cudaResViewFormatFloat2 -#define hipResViewFormatFloat4 cudaResViewFormatFloat4 -#define hipResViewFormatUnsignedBlockCompressed1 cudaResViewFormatUnsignedBlockCompressed1 -#define hipResViewFormatUnsignedBlockCompressed2 cudaResViewFormatUnsignedBlockCompressed2 -#define hipResViewFormatUnsignedBlockCompressed3 cudaResViewFormatUnsignedBlockCompressed3 -#define hipResViewFormatUnsignedBlockCompressed4 cudaResViewFormatUnsignedBlockCompressed4 -#define hipResViewFormatSignedBlockCompressed4 cudaResViewFormatSignedBlockCompressed4 -#define hipResViewFormatUnsignedBlockCompressed5 cudaResViewFormatUnsignedBlockCompressed5 -#define hipResViewFormatSignedBlockCompressed5 cudaResViewFormatSignedBlockCompressed5 -#define hipResViewFormatUnsignedBlockCompressed6H cudaResViewFormatUnsignedBlockCompressed6H -#define hipResViewFormatSignedBlockCompressed6H cudaResViewFormatSignedBlockCompressed6H -#define hipResViewFormatUnsignedBlockCompressed7 cudaResViewFormatUnsignedBlockCompressed7 - -//! Flags that can be used with hipEventCreateWithFlags: -#define hipEventDefault cudaEventDefault -#define hipEventBlockingSync cudaEventBlockingSync -#define hipEventDisableTiming cudaEventDisableTiming -#define hipEventInterprocess cudaEventInterprocess -#define hipEventReleaseToDevice 0 /* no-op on CUDA platform */ -#define hipEventReleaseToSystem 0 /* no-op on CUDA platform */ - - -#define hipHostMallocDefault cudaHostAllocDefault -#define hipHostMallocPortable cudaHostAllocPortable -#define hipHostMallocMapped cudaHostAllocMapped -#define hipHostMallocWriteCombined cudaHostAllocWriteCombined -#define hipHostMallocCoherent 0x0 -#define hipHostMallocNonCoherent 0x0 - -#define hipMemAttachGlobal cudaMemAttachGlobal -#define hipMemAttachHost cudaMemAttachHost -#define hipMemAttachSingle cudaMemAttachSingle - -#define hipHostRegisterDefault cudaHostRegisterDefault -#define hipHostRegisterPortable cudaHostRegisterPortable -#define hipHostRegisterMapped cudaHostRegisterMapped -#define hipHostRegisterIoMemory cudaHostRegisterIoMemory -#define hipHostRegisterReadOnly cudaHostRegisterReadOnly - -#define HIP_LAUNCH_PARAM_BUFFER_POINTER CU_LAUNCH_PARAM_BUFFER_POINTER -#define HIP_LAUNCH_PARAM_BUFFER_SIZE CU_LAUNCH_PARAM_BUFFER_SIZE -#define HIP_LAUNCH_PARAM_END CU_LAUNCH_PARAM_END -#define hipLimitPrintfFifoSize cudaLimitPrintfFifoSize -#define hipLimitMallocHeapSize cudaLimitMallocHeapSize -#define hipLimitStackSize cudaLimitStackSize -#define hipIpcMemLazyEnablePeerAccess cudaIpcMemLazyEnablePeerAccess - -#define hipOccupancyDefault cudaOccupancyDefault -#define hipOccupancyDisableCachingOverride cudaOccupancyDisableCachingOverride - -#define hipCooperativeLaunchMultiDeviceNoPreSync \ - cudaCooperativeLaunchMultiDeviceNoPreSync -#define hipCooperativeLaunchMultiDeviceNoPostSync \ - cudaCooperativeLaunchMultiDeviceNoPostSync - - -// enum CUjit_option redefines -#define HIPRTC_JIT_MAX_REGISTERS CU_JIT_MAX_REGISTERS -#define HIPRTC_JIT_THREADS_PER_BLOCK CU_JIT_THREADS_PER_BLOCK -#define HIPRTC_JIT_WALL_TIME CU_JIT_WALL_TIME -#define HIPRTC_JIT_INFO_LOG_BUFFER CU_JIT_INFO_LOG_BUFFER -#define HIPRTC_JIT_INFO_LOG_BUFFER_SIZE_BYTES CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES -#define HIPRTC_JIT_ERROR_LOG_BUFFER CU_JIT_ERROR_LOG_BUFFER -#define HIPRTC_JIT_ERROR_LOG_BUFFER_SIZE_BYTES CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES -#define HIPRTC_JIT_OPTIMIZATION_LEVEL CU_JIT_OPTIMIZATION_LEVEL -#define HIPRTC_JIT_TARGET_FROM_HIPCONTEXT CU_JIT_TARGET_FROM_CUCONTEXT -#define HIPRTC_JIT_TARGET CU_JIT_TARGET -#define HIPRTC_JIT_FALLBACK_STRATEGY CU_JIT_FALLBACK_STRATEGY -#define HIPRTC_JIT_GENERATE_DEBUG_INFO CU_JIT_GENERATE_DEBUG_INFO -#define HIPRTC_JIT_LOG_VERBOSE CU_JIT_LOG_VERBOSE -#define HIPRTC_JIT_GENERATE_LINE_INFO CU_JIT_GENERATE_LINE_INFO -#define HIPRTC_JIT_CACHE_MODE CU_JIT_CACHE_MODE -#define HIPRTC_JIT_NEW_SM3X_OPT CU_JIT_NEW_SM3X_OPT -#define HIPRTC_JIT_FAST_COMPILE CU_JIT_FAST_COMPILE -#define HIPRTC_JIT_NUM_OPTIONS CU_JIT_NUM_OPTIONS - -typedef cudaEvent_t hipEvent_t; -typedef cudaStream_t hipStream_t; -typedef cudaIpcEventHandle_t hipIpcEventHandle_t; -typedef cudaIpcMemHandle_t hipIpcMemHandle_t; -typedef enum cudaLimit hipLimit_t; -typedef enum cudaFuncAttribute hipFuncAttribute; -typedef enum cudaFuncCache hipFuncCache_t; -typedef CUcontext hipCtx_t; -typedef enum cudaSharedMemConfig hipSharedMemConfig; -typedef CUfunc_cache hipFuncCache; -typedef CUjit_option hipJitOption; -typedef CUdevice hipDevice_t; -typedef enum cudaDeviceP2PAttr hipDeviceP2PAttr; -#define hipDevP2PAttrPerformanceRank cudaDevP2PAttrPerformanceRank -#define hipDevP2PAttrAccessSupported cudaDevP2PAttrAccessSupported -#define hipDevP2PAttrNativeAtomicSupported cudaDevP2PAttrNativeAtomicSupported -#define hipDevP2PAttrHipArrayAccessSupported cudaDevP2PAttrCudaArrayAccessSupported -#define hipFuncAttributeMaxDynamicSharedMemorySize cudaFuncAttributeMaxDynamicSharedMemorySize -#define hipFuncAttributePreferredSharedMemoryCarveout cudaFuncAttributePreferredSharedMemoryCarveout - -typedef CUmodule hipModule_t; -typedef CUfunction hipFunction_t; -typedef CUdeviceptr hipDeviceptr_t; -typedef struct cudaArray hipArray; -typedef struct cudaArray* hipArray_t; -typedef struct cudaArray* hipArray_const_t; -typedef struct cudaFuncAttributes hipFuncAttributes; -typedef struct cudaLaunchParams hipLaunchParams; -typedef CUDA_LAUNCH_PARAMS hipFunctionLaunchParams; -#define hipFunction_attribute CUfunction_attribute -#define hipPointer_attribute CUpointer_attribute -#define hip_Memcpy2D CUDA_MEMCPY2D -#define HIP_MEMCPY3D CUDA_MEMCPY3D -#define hipMemcpy3DParms cudaMemcpy3DParms -#define hipArrayDefault cudaArrayDefault -#define hipArrayLayered cudaArrayLayered -#define hipArraySurfaceLoadStore cudaArraySurfaceLoadStore -#define hipArrayCubemap cudaArrayCubemap -#define hipArrayTextureGather cudaArrayTextureGather - -typedef cudaTextureObject_t hipTextureObject_t; -typedef cudaSurfaceObject_t hipSurfaceObject_t; -#define hipTextureType1D cudaTextureType1D -#define hipTextureType1DLayered cudaTextureType1DLayered -#define hipTextureType2D cudaTextureType2D -#define hipTextureType2DLayered cudaTextureType2DLayered -#define hipTextureType3D cudaTextureType3D - -#define hipDeviceScheduleAuto cudaDeviceScheduleAuto -#define hipDeviceScheduleSpin cudaDeviceScheduleSpin -#define hipDeviceScheduleYield cudaDeviceScheduleYield -#define hipDeviceScheduleBlockingSync cudaDeviceScheduleBlockingSync -#define hipDeviceScheduleMask cudaDeviceScheduleMask -#define hipDeviceMapHost cudaDeviceMapHost -#define hipDeviceLmemResizeToMax cudaDeviceLmemResizeToMax - -#define hipCpuDeviceId cudaCpuDeviceId -#define hipInvalidDeviceId cudaInvalidDeviceId -typedef struct cudaExtent hipExtent; -typedef struct cudaPitchedPtr hipPitchedPtr; -typedef struct cudaPos hipPos; -#define make_hipExtent make_cudaExtent -#define make_hipPos make_cudaPos -#define make_hipPitchedPtr make_cudaPitchedPtr -// Flags that can be used with hipStreamCreateWithFlags -#define hipStreamDefault cudaStreamDefault -#define hipStreamNonBlocking cudaStreamNonBlocking - -typedef cudaMemPool_t hipMemPool_t; -typedef enum cudaMemPoolAttr hipMemPoolAttr; -#define hipMemPoolReuseFollowEventDependencies cudaMemPoolReuseFollowEventDependencies -#define hipMemPoolReuseAllowOpportunistic cudaMemPoolReuseAllowOpportunistic -#define hipMemPoolReuseAllowInternalDependencies cudaMemPoolReuseAllowInternalDependencies -#define hipMemPoolAttrReleaseThreshold cudaMemPoolAttrReleaseThreshold -#define hipMemPoolAttrReservedMemCurrent cudaMemPoolAttrReservedMemCurrent -#define hipMemPoolAttrReservedMemHigh cudaMemPoolAttrReservedMemHigh -#define hipMemPoolAttrUsedMemCurrent cudaMemPoolAttrUsedMemCurrent -#define hipMemPoolAttrUsedMemHigh cudaMemPoolAttrUsedMemHigh -typedef struct cudaMemLocation hipMemLocation; -typedef struct cudaMemPoolProps hipMemPoolProps; -typedef struct cudaMemAccessDesc hipMemAccessDesc; -typedef enum cudaMemAccessFlags hipMemAccessFlags; -#define hipMemAccessFlagsProtNone cudaMemAccessFlagsProtNone -#define hipMemAccessFlagsProtRead cudaMemAccessFlagsProtRead -#define hipMemAccessFlagsProtReadWrite cudaMemAccessFlagsProtReadWrite -typedef enum cudaMemAllocationHandleType hipMemAllocationHandleType; -typedef struct cudaMemPoolPtrExportData hipMemPoolPtrExportData; - -typedef struct cudaChannelFormatDesc hipChannelFormatDesc; -typedef struct cudaResourceDesc hipResourceDesc; -typedef struct cudaTextureDesc hipTextureDesc; -typedef struct cudaResourceViewDesc hipResourceViewDesc; -typedef CUDA_RESOURCE_DESC HIP_RESOURCE_DESC; -typedef CUDA_TEXTURE_DESC HIP_TEXTURE_DESC; -typedef CUDA_RESOURCE_VIEW_DESC HIP_RESOURCE_VIEW_DESC; -// adding code for hipmemSharedConfig -#define hipSharedMemBankSizeDefault cudaSharedMemBankSizeDefault -#define hipSharedMemBankSizeFourByte cudaSharedMemBankSizeFourByte -#define hipSharedMemBankSizeEightByte cudaSharedMemBankSizeEightByte - -//Function Attributes -#define HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK -#define HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES -#define HIP_FUNC_ATTRIBUTE_CONST_SIZE_BYTES CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES -#define HIP_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES -#define HIP_FUNC_ATTRIBUTE_NUM_REGS CU_FUNC_ATTRIBUTE_NUM_REGS -#define HIP_FUNC_ATTRIBUTE_PTX_VERSION CU_FUNC_ATTRIBUTE_PTX_VERSION -#define HIP_FUNC_ATTRIBUTE_BINARY_VERSION CU_FUNC_ATTRIBUTE_BINARY_VERSION -#define HIP_FUNC_ATTRIBUTE_CACHE_MODE_CA CU_FUNC_ATTRIBUTE_CACHE_MODE_CA -#define HIP_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES -#define HIP_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT -#define HIP_FUNC_ATTRIBUTE_MAX CU_FUNC_ATTRIBUTE_MAX - -//Pointer Attributes -#define HIP_POINTER_ATTRIBUTE_CONTEXT CU_POINTER_ATTRIBUTE_CONTEXT -#define HIP_POINTER_ATTRIBUTE_MEMORY_TYPE CU_POINTER_ATTRIBUTE_MEMORY_TYPE -#define HIP_POINTER_ATTRIBUTE_DEVICE_POINTER CU_POINTER_ATTRIBUTE_DEVICE_POINTER -#define HIP_POINTER_ATTRIBUTE_HOST_POINTER CU_POINTER_ATTRIBUTE_HOST_POINTER -#define HIP_POINTER_ATTRIBUTE_P2P_TOKENS CU_POINTER_ATTRIBUTE_P2P_TOKENS -#define HIP_POINTER_ATTRIBUTE_SYNC_MEMOPS CU_POINTER_ATTRIBUTE_SYNC_MEMOPS -#define HIP_POINTER_ATTRIBUTE_BUFFER_ID CU_POINTER_ATTRIBUTE_BUFFER_ID -#define HIP_POINTER_ATTRIBUTE_IS_MANAGED CU_POINTER_ATTRIBUTE_IS_MANAGED -#define HIP_POINTER_ATTRIBUTE_DEVICE_ORDINAL CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL -#define HIP_POINTER_ATTRIBUTE_IS_LEGACY_HIP_IPC_CAPABLE CU_POINTER_ATTRIBUTE_IS_LEGACY_CUDA_IPC_CAPABLE -#define HIP_POINTER_ATTRIBUTE_RANGE_START_ADDR CU_POINTER_ATTRIBUTE_RANGE_START_ADDR -#define HIP_POINTER_ATTRIBUTE_RANGE_SIZE CU_POINTER_ATTRIBUTE_RANGE_SIZE -#define HIP_POINTER_ATTRIBUTE_MAPPED CU_POINTER_ATTRIBUTE_MAPPED -#define HIP_POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES CU_POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES -#define HIP_POINTER_ATTRIBUTE_IS_GPU_DIRECT_RDMA_CAPABLE CU_POINTER_ATTRIBUTE_IS_GPU_DIRECT_RDMA_CAPABLE -#define HIP_POINTER_ATTRIBUTE_ACCESS_FLAGS CU_POINTER_ATTRIBUTE_ACCESS_FLAGS -#define HIP_POINTER_ATTRIBUTE_MEMPOOL_HANDLE CU_POINTER_ATTRIBUTE_MEMPOOL_HANDLE - -typedef enum cudaGraphInstantiateFlags hipGraphInstantiateFlags; -#define hipGraphInstantiateFlagAutoFreeOnLaunch cudaGraphInstantiateFlagAutoFreeOnLaunch -#define hipGraphInstantiateFlagUpload cudaGraphInstantiateFlagUpload -#define hipGraphInstantiateFlagDeviceLaunch cudaGraphInstantiateFlagDeviceLaunch -#define hipGraphInstantiateFlagUseNodePriority cudaGraphInstantiateFlagUseNodePriority - -#if CUDA_VERSION >= CUDA_9000 -#define __shfl(...) __shfl_sync(0xffffffff, __VA_ARGS__) -#define __shfl_up(...) __shfl_up_sync(0xffffffff, __VA_ARGS__) -#define __shfl_down(...) __shfl_down_sync(0xffffffff, __VA_ARGS__) -#define __shfl_xor(...) __shfl_xor_sync(0xffffffff, __VA_ARGS__) -#endif // CUDA_VERSION >= CUDA_9000 - -inline static hipError_t hipCUDAErrorTohipError(cudaError_t cuError) { - switch (cuError) { - case cudaSuccess: - return hipSuccess; - case cudaErrorProfilerDisabled: - return hipErrorProfilerDisabled; - case cudaErrorProfilerNotInitialized: - return hipErrorProfilerNotInitialized; - case cudaErrorProfilerAlreadyStarted: - return hipErrorProfilerAlreadyStarted; - case cudaErrorProfilerAlreadyStopped: - return hipErrorProfilerAlreadyStopped; - case cudaErrorInsufficientDriver: - return hipErrorInsufficientDriver; - case cudaErrorUnsupportedLimit: - return hipErrorUnsupportedLimit; - case cudaErrorPeerAccessUnsupported: - return hipErrorPeerAccessUnsupported; - case cudaErrorInvalidGraphicsContext: - return hipErrorInvalidGraphicsContext; - case cudaErrorSharedObjectSymbolNotFound: - return hipErrorSharedObjectSymbolNotFound; - case cudaErrorSharedObjectInitFailed: - return hipErrorSharedObjectInitFailed; - case cudaErrorOperatingSystem: - return hipErrorOperatingSystem; - case cudaErrorIllegalState: - return hipErrorIllegalState; - case cudaErrorSetOnActiveProcess: - return hipErrorSetOnActiveProcess; - case cudaErrorIllegalAddress: - return hipErrorIllegalAddress; - case cudaErrorInvalidSymbol: - return hipErrorInvalidSymbol; - case cudaErrorMissingConfiguration: - return hipErrorMissingConfiguration; - case cudaErrorMemoryAllocation: - return hipErrorOutOfMemory; - case cudaErrorInitializationError: - return hipErrorNotInitialized; - case cudaErrorLaunchFailure: - return hipErrorLaunchFailure; - case cudaErrorCooperativeLaunchTooLarge: - return hipErrorCooperativeLaunchTooLarge; - case cudaErrorPriorLaunchFailure: - return hipErrorPriorLaunchFailure; - case cudaErrorLaunchOutOfResources: - return hipErrorLaunchOutOfResources; - case cudaErrorInvalidDeviceFunction: - return hipErrorInvalidDeviceFunction; - case cudaErrorInvalidConfiguration: - return hipErrorInvalidConfiguration; - case cudaErrorInvalidDevice: - return hipErrorInvalidDevice; - case cudaErrorInvalidValue: - return hipErrorInvalidValue; - case cudaErrorInvalidPitchValue: - return hipErrorInvalidPitchValue; - case cudaErrorInvalidDevicePointer: - return hipErrorInvalidDevicePointer; - case cudaErrorInvalidMemcpyDirection: - return hipErrorInvalidMemcpyDirection; - case cudaErrorInvalidResourceHandle: - return hipErrorInvalidHandle; - case cudaErrorNotReady: - return hipErrorNotReady; - case cudaErrorNoDevice: - return hipErrorNoDevice; - case cudaErrorPeerAccessAlreadyEnabled: - return hipErrorPeerAccessAlreadyEnabled; - case cudaErrorPeerAccessNotEnabled: - return hipErrorPeerAccessNotEnabled; - case cudaErrorContextIsDestroyed: - return hipErrorContextIsDestroyed; - case cudaErrorHostMemoryAlreadyRegistered: - return hipErrorHostMemoryAlreadyRegistered; - case cudaErrorHostMemoryNotRegistered: - return hipErrorHostMemoryNotRegistered; - case cudaErrorMapBufferObjectFailed: - return hipErrorMapFailed; - case cudaErrorAssert: - return hipErrorAssert; - case cudaErrorNotSupported: - return hipErrorNotSupported; - case cudaErrorCudartUnloading: - return hipErrorDeinitialized; - case cudaErrorInvalidKernelImage: - return hipErrorInvalidImage; - case cudaErrorUnmapBufferObjectFailed: - return hipErrorUnmapFailed; - case cudaErrorNoKernelImageForDevice: - return hipErrorNoBinaryForGpu; - case cudaErrorECCUncorrectable: - return hipErrorECCNotCorrectable; - case cudaErrorDeviceAlreadyInUse: - return hipErrorContextAlreadyInUse; - case cudaErrorInvalidPtx: - return hipErrorInvalidKernelFile; - case cudaErrorLaunchTimeout: - return hipErrorLaunchTimeOut; -#if CUDA_VERSION >= CUDA_10010 - case cudaErrorInvalidSource: - return hipErrorInvalidSource; - case cudaErrorFileNotFound: - return hipErrorFileNotFound; - case cudaErrorSymbolNotFound: - return hipErrorNotFound; - case cudaErrorArrayIsMapped: - return hipErrorArrayIsMapped; - case cudaErrorNotMappedAsPointer: - return hipErrorNotMappedAsPointer; - case cudaErrorNotMappedAsArray: - return hipErrorNotMappedAsArray; - case cudaErrorNotMapped: - return hipErrorNotMapped; - case cudaErrorAlreadyAcquired: - return hipErrorAlreadyAcquired; - case cudaErrorAlreadyMapped: - return hipErrorAlreadyMapped; -#endif -#if CUDA_VERSION >= CUDA_10020 - case cudaErrorDeviceUninitialized: - return hipErrorInvalidContext; -#endif - case cudaErrorStreamCaptureUnsupported: - return hipErrorStreamCaptureUnsupported; - case cudaErrorStreamCaptureInvalidated: - return hipErrorStreamCaptureInvalidated; - case cudaErrorStreamCaptureMerge: - return hipErrorStreamCaptureMerge; - case cudaErrorStreamCaptureUnmatched: - return hipErrorStreamCaptureUnmatched; - case cudaErrorStreamCaptureUnjoined: - return hipErrorStreamCaptureUnjoined; - case cudaErrorStreamCaptureIsolation: - return hipErrorStreamCaptureIsolation; - case cudaErrorStreamCaptureImplicit: - return hipErrorStreamCaptureImplicit; - case cudaErrorCapturedEvent: - return hipErrorCapturedEvent; - case cudaErrorStreamCaptureWrongThread: - return hipErrorStreamCaptureWrongThread; - case cudaErrorGraphExecUpdateFailure: - return hipErrorGraphExecUpdateFailure; - case cudaErrorUnknown: - default: - return hipErrorUnknown; // Note - translated error. - } -} - -inline static hipError_t hipCUResultTohipError(CUresult cuError) { - switch (cuError) { - case CUDA_SUCCESS: - return hipSuccess; - case CUDA_ERROR_OUT_OF_MEMORY: - return hipErrorOutOfMemory; - case CUDA_ERROR_INVALID_VALUE: - return hipErrorInvalidValue; - case CUDA_ERROR_INVALID_DEVICE: - return hipErrorInvalidDevice; - case CUDA_ERROR_DEINITIALIZED: - return hipErrorDeinitialized; - case CUDA_ERROR_NO_DEVICE: - return hipErrorNoDevice; - case CUDA_ERROR_INVALID_CONTEXT: - return hipErrorInvalidContext; - case CUDA_ERROR_NOT_INITIALIZED: - return hipErrorNotInitialized; - case CUDA_ERROR_INVALID_HANDLE: - return hipErrorInvalidHandle; - case CUDA_ERROR_MAP_FAILED: - return hipErrorMapFailed; - case CUDA_ERROR_PROFILER_DISABLED: - return hipErrorProfilerDisabled; - case CUDA_ERROR_PROFILER_NOT_INITIALIZED: - return hipErrorProfilerNotInitialized; - case CUDA_ERROR_PROFILER_ALREADY_STARTED: - return hipErrorProfilerAlreadyStarted; - case CUDA_ERROR_PROFILER_ALREADY_STOPPED: - return hipErrorProfilerAlreadyStopped; - case CUDA_ERROR_INVALID_IMAGE: - return hipErrorInvalidImage; - case CUDA_ERROR_CONTEXT_ALREADY_CURRENT: - return hipErrorContextAlreadyCurrent; - case CUDA_ERROR_UNMAP_FAILED: - return hipErrorUnmapFailed; - case CUDA_ERROR_ARRAY_IS_MAPPED: - return hipErrorArrayIsMapped; - case CUDA_ERROR_ALREADY_MAPPED: - return hipErrorAlreadyMapped; - case CUDA_ERROR_NO_BINARY_FOR_GPU: - return hipErrorNoBinaryForGpu; - case CUDA_ERROR_ALREADY_ACQUIRED: - return hipErrorAlreadyAcquired; - case CUDA_ERROR_NOT_MAPPED: - return hipErrorNotMapped; - case CUDA_ERROR_NOT_MAPPED_AS_ARRAY: - return hipErrorNotMappedAsArray; - case CUDA_ERROR_NOT_MAPPED_AS_POINTER: - return hipErrorNotMappedAsPointer; - case CUDA_ERROR_ECC_UNCORRECTABLE: - return hipErrorECCNotCorrectable; - case CUDA_ERROR_UNSUPPORTED_LIMIT: - return hipErrorUnsupportedLimit; - case CUDA_ERROR_CONTEXT_ALREADY_IN_USE: - return hipErrorContextAlreadyInUse; - case CUDA_ERROR_PEER_ACCESS_UNSUPPORTED: - return hipErrorPeerAccessUnsupported; - case CUDA_ERROR_INVALID_PTX: - return hipErrorInvalidKernelFile; - case CUDA_ERROR_INVALID_GRAPHICS_CONTEXT: - return hipErrorInvalidGraphicsContext; - case CUDA_ERROR_INVALID_SOURCE: - return hipErrorInvalidSource; - case CUDA_ERROR_FILE_NOT_FOUND: - return hipErrorFileNotFound; - case CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND: - return hipErrorSharedObjectSymbolNotFound; - case CUDA_ERROR_SHARED_OBJECT_INIT_FAILED: - return hipErrorSharedObjectInitFailed; - case CUDA_ERROR_OPERATING_SYSTEM: - return hipErrorOperatingSystem; - case CUDA_ERROR_ILLEGAL_STATE: - return hipErrorIllegalState; - case CUDA_ERROR_NOT_FOUND: - return hipErrorNotFound; - case CUDA_ERROR_NOT_READY: - return hipErrorNotReady; - case CUDA_ERROR_ILLEGAL_ADDRESS: - return hipErrorIllegalAddress; - case CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES: - return hipErrorLaunchOutOfResources; - case CUDA_ERROR_LAUNCH_TIMEOUT: - return hipErrorLaunchTimeOut; - case CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED: - return hipErrorPeerAccessAlreadyEnabled; - case CUDA_ERROR_PEER_ACCESS_NOT_ENABLED: - return hipErrorPeerAccessNotEnabled; - case CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE: - return hipErrorSetOnActiveProcess; - case CUDA_ERROR_CONTEXT_IS_DESTROYED: - return hipErrorContextIsDestroyed; - case CUDA_ERROR_ASSERT: - return hipErrorAssert; - case CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED: - return hipErrorHostMemoryAlreadyRegistered; - case CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED: - return hipErrorHostMemoryNotRegistered; - case CUDA_ERROR_LAUNCH_FAILED: - return hipErrorLaunchFailure; - case CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE: - return hipErrorCooperativeLaunchTooLarge; - case CUDA_ERROR_NOT_SUPPORTED: - return hipErrorNotSupported; - case CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED: - return hipErrorStreamCaptureUnsupported; - case CUDA_ERROR_STREAM_CAPTURE_INVALIDATED: - return hipErrorStreamCaptureInvalidated; - case CUDA_ERROR_STREAM_CAPTURE_MERGE: - return hipErrorStreamCaptureMerge; - case CUDA_ERROR_STREAM_CAPTURE_UNMATCHED: - return hipErrorStreamCaptureUnmatched; - case CUDA_ERROR_STREAM_CAPTURE_UNJOINED: - return hipErrorStreamCaptureUnjoined; - case CUDA_ERROR_STREAM_CAPTURE_ISOLATION: - return hipErrorStreamCaptureIsolation; - case CUDA_ERROR_STREAM_CAPTURE_IMPLICIT: - return hipErrorStreamCaptureImplicit; - case CUDA_ERROR_CAPTURED_EVENT: - return hipErrorCapturedEvent; - case CUDA_ERROR_STREAM_CAPTURE_WRONG_THREAD: - return hipErrorStreamCaptureWrongThread; - case CUDA_ERROR_GRAPH_EXEC_UPDATE_FAILURE: - return hipErrorGraphExecUpdateFailure; - case CUDA_ERROR_UNKNOWN: - default: - return hipErrorUnknown; // Note - translated error. - } -} - -inline static CUresult hipErrorToCUResult(hipError_t hError) { - switch (hError) { - case hipSuccess: - return CUDA_SUCCESS; - case hipErrorOutOfMemory: - return CUDA_ERROR_OUT_OF_MEMORY; - case hipErrorInvalidValue: - return CUDA_ERROR_INVALID_VALUE; - case hipErrorInvalidDevice: - return CUDA_ERROR_INVALID_DEVICE; - case hipErrorDeinitialized: - return CUDA_ERROR_DEINITIALIZED; - case hipErrorNoDevice: - return CUDA_ERROR_NO_DEVICE; - case hipErrorInvalidContext: - return CUDA_ERROR_INVALID_CONTEXT; - case hipErrorNotInitialized: - return CUDA_ERROR_NOT_INITIALIZED; - case hipErrorInvalidHandle: - return CUDA_ERROR_INVALID_HANDLE; - case hipErrorMapFailed: - return CUDA_ERROR_MAP_FAILED; - case hipErrorProfilerDisabled: - return CUDA_ERROR_PROFILER_DISABLED; - case hipErrorProfilerNotInitialized: - return CUDA_ERROR_PROFILER_NOT_INITIALIZED; - case hipErrorProfilerAlreadyStarted: - return CUDA_ERROR_PROFILER_ALREADY_STARTED; - case hipErrorProfilerAlreadyStopped: - return CUDA_ERROR_PROFILER_ALREADY_STOPPED; - case hipErrorInvalidImage: - return CUDA_ERROR_INVALID_IMAGE; - case hipErrorContextAlreadyCurrent: - return CUDA_ERROR_CONTEXT_ALREADY_CURRENT; - case hipErrorUnmapFailed: - return CUDA_ERROR_UNMAP_FAILED; - case hipErrorArrayIsMapped: - return CUDA_ERROR_ARRAY_IS_MAPPED; - case hipErrorAlreadyMapped: - return CUDA_ERROR_ALREADY_MAPPED; - case hipErrorNoBinaryForGpu: - return CUDA_ERROR_NO_BINARY_FOR_GPU; - case hipErrorAlreadyAcquired: - return CUDA_ERROR_ALREADY_ACQUIRED; - case hipErrorNotMapped: - return CUDA_ERROR_NOT_MAPPED; - case hipErrorNotMappedAsArray: - return CUDA_ERROR_NOT_MAPPED_AS_ARRAY; - case hipErrorNotMappedAsPointer: - return CUDA_ERROR_NOT_MAPPED_AS_POINTER; - case hipErrorECCNotCorrectable: - return CUDA_ERROR_ECC_UNCORRECTABLE; - case hipErrorUnsupportedLimit: - return CUDA_ERROR_UNSUPPORTED_LIMIT; - case hipErrorContextAlreadyInUse: - return CUDA_ERROR_CONTEXT_ALREADY_IN_USE; - case hipErrorPeerAccessUnsupported: - return CUDA_ERROR_PEER_ACCESS_UNSUPPORTED; - case hipErrorInvalidKernelFile: - return CUDA_ERROR_INVALID_PTX; - case hipErrorInvalidGraphicsContext: - return CUDA_ERROR_INVALID_GRAPHICS_CONTEXT; - case hipErrorInvalidSource: - return CUDA_ERROR_INVALID_SOURCE; - case hipErrorFileNotFound: - return CUDA_ERROR_FILE_NOT_FOUND; - case hipErrorSharedObjectSymbolNotFound: - return CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND; - case hipErrorSharedObjectInitFailed: - return CUDA_ERROR_SHARED_OBJECT_INIT_FAILED; - case hipErrorOperatingSystem: - return CUDA_ERROR_OPERATING_SYSTEM; - case hipErrorIllegalState: - return CUDA_ERROR_ILLEGAL_STATE; - case hipErrorNotFound: - return CUDA_ERROR_NOT_FOUND; - case hipErrorNotReady: - return CUDA_ERROR_NOT_READY; - case hipErrorIllegalAddress: - return CUDA_ERROR_ILLEGAL_ADDRESS; - case hipErrorLaunchOutOfResources: - return CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES; - case hipErrorLaunchTimeOut: - return CUDA_ERROR_LAUNCH_TIMEOUT; - case hipErrorPeerAccessAlreadyEnabled: - return CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED; - case hipErrorPeerAccessNotEnabled: - return CUDA_ERROR_PEER_ACCESS_NOT_ENABLED; - case hipErrorSetOnActiveProcess: - return CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE; - case hipErrorContextIsDestroyed: - return CUDA_ERROR_CONTEXT_IS_DESTROYED; - case hipErrorAssert: - return CUDA_ERROR_ASSERT; - case hipErrorHostMemoryAlreadyRegistered: - return CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED; - case hipErrorHostMemoryNotRegistered: - return CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED; - case hipErrorLaunchFailure: - return CUDA_ERROR_LAUNCH_FAILED; - case hipErrorCooperativeLaunchTooLarge: - return CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE; - case hipErrorNotSupported: - return CUDA_ERROR_NOT_SUPPORTED; - case hipErrorStreamCaptureUnsupported: - return CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED; - case hipErrorStreamCaptureInvalidated: - return CUDA_ERROR_STREAM_CAPTURE_INVALIDATED; - case hipErrorStreamCaptureMerge: - return CUDA_ERROR_STREAM_CAPTURE_MERGE; - case hipErrorStreamCaptureUnmatched: - return CUDA_ERROR_STREAM_CAPTURE_UNMATCHED; - case hipErrorStreamCaptureUnjoined: - return CUDA_ERROR_STREAM_CAPTURE_UNJOINED; - case hipErrorStreamCaptureIsolation: - return CUDA_ERROR_STREAM_CAPTURE_ISOLATION; - case hipErrorStreamCaptureImplicit: - return CUDA_ERROR_STREAM_CAPTURE_IMPLICIT; - case hipErrorCapturedEvent: - return CUDA_ERROR_CAPTURED_EVENT; - case hipErrorStreamCaptureWrongThread: - return CUDA_ERROR_STREAM_CAPTURE_WRONG_THREAD; - case hipErrorGraphExecUpdateFailure: - return CUDA_ERROR_GRAPH_EXEC_UPDATE_FAILURE; - case hipErrorUnknown: - default: - return CUDA_ERROR_UNKNOWN; // Note - translated error. - } -} - -inline static cudaError_t hipErrorToCudaError(hipError_t hError) { - switch (hError) { - case hipSuccess: - return cudaSuccess; - case hipErrorOutOfMemory: - return cudaErrorMemoryAllocation; - case hipErrorProfilerDisabled: - return cudaErrorProfilerDisabled; - case hipErrorProfilerNotInitialized: - return cudaErrorProfilerNotInitialized; - case hipErrorProfilerAlreadyStarted: - return cudaErrorProfilerAlreadyStarted; - case hipErrorProfilerAlreadyStopped: - return cudaErrorProfilerAlreadyStopped; - case hipErrorInvalidConfiguration: - return cudaErrorInvalidConfiguration; - case hipErrorLaunchOutOfResources: - return cudaErrorLaunchOutOfResources; - case hipErrorInvalidValue: - return cudaErrorInvalidValue; - case hipErrorInvalidPitchValue: - return cudaErrorInvalidPitchValue; - case hipErrorInvalidHandle: - return cudaErrorInvalidResourceHandle; - case hipErrorInvalidDevice: - return cudaErrorInvalidDevice; - case hipErrorInvalidMemcpyDirection: - return cudaErrorInvalidMemcpyDirection; - case hipErrorInvalidDevicePointer: - return cudaErrorInvalidDevicePointer; - case hipErrorNotInitialized: - return cudaErrorInitializationError; - case hipErrorNoDevice: - return cudaErrorNoDevice; - case hipErrorNotReady: - return cudaErrorNotReady; - case hipErrorPeerAccessNotEnabled: - return cudaErrorPeerAccessNotEnabled; - case hipErrorPeerAccessAlreadyEnabled: - return cudaErrorPeerAccessAlreadyEnabled; - case hipErrorHostMemoryAlreadyRegistered: - return cudaErrorHostMemoryAlreadyRegistered; - case hipErrorHostMemoryNotRegistered: - return cudaErrorHostMemoryNotRegistered; - case hipErrorDeinitialized: - return cudaErrorCudartUnloading; - case hipErrorInvalidSymbol: - return cudaErrorInvalidSymbol; - case hipErrorInsufficientDriver: - return cudaErrorInsufficientDriver; - case hipErrorMissingConfiguration: - return cudaErrorMissingConfiguration; - case hipErrorPriorLaunchFailure: - return cudaErrorPriorLaunchFailure; - case hipErrorInvalidDeviceFunction: - return cudaErrorInvalidDeviceFunction; - case hipErrorInvalidImage: - return cudaErrorInvalidKernelImage; - case hipErrorInvalidContext: -#if CUDA_VERSION >= CUDA_10020 - return cudaErrorDeviceUninitialized; -#else - return cudaErrorUnknown; -#endif - case hipErrorMapFailed: - return cudaErrorMapBufferObjectFailed; - case hipErrorUnmapFailed: - return cudaErrorUnmapBufferObjectFailed; - case hipErrorArrayIsMapped: -#if CUDA_VERSION >= CUDA_10010 - return cudaErrorArrayIsMapped; -#else - return cudaErrorUnknown; -#endif - case hipErrorAlreadyMapped: -#if CUDA_VERSION >= CUDA_10010 - return cudaErrorAlreadyMapped; -#else - return cudaErrorUnknown; -#endif - case hipErrorNoBinaryForGpu: - return cudaErrorNoKernelImageForDevice; - case hipErrorAlreadyAcquired: -#if CUDA_VERSION >= CUDA_10010 - return cudaErrorAlreadyAcquired; -#else - return cudaErrorUnknown; -#endif - case hipErrorNotMapped: -#if CUDA_VERSION >= CUDA_10010 - return cudaErrorNotMapped; -#else - return cudaErrorUnknown; -#endif - case hipErrorNotMappedAsArray: -#if CUDA_VERSION >= CUDA_10010 - return cudaErrorNotMappedAsArray; -#else - return cudaErrorUnknown; -#endif - case hipErrorNotMappedAsPointer: -#if CUDA_VERSION >= CUDA_10010 - return cudaErrorNotMappedAsPointer; -#else - return cudaErrorUnknown; -#endif - case hipErrorECCNotCorrectable: - return cudaErrorECCUncorrectable; - case hipErrorUnsupportedLimit: - return cudaErrorUnsupportedLimit; - case hipErrorContextAlreadyInUse: - return cudaErrorDeviceAlreadyInUse; - case hipErrorPeerAccessUnsupported: - return cudaErrorPeerAccessUnsupported; - case hipErrorInvalidKernelFile: - return cudaErrorInvalidPtx; - case hipErrorInvalidGraphicsContext: - return cudaErrorInvalidGraphicsContext; - case hipErrorInvalidSource: -#if CUDA_VERSION >= CUDA_10010 - return cudaErrorInvalidSource; -#else - return cudaErrorUnknown; -#endif - case hipErrorFileNotFound: -#if CUDA_VERSION >= CUDA_10010 - return cudaErrorFileNotFound; -#else - return cudaErrorUnknown; -#endif - case hipErrorSharedObjectSymbolNotFound: - return cudaErrorSharedObjectSymbolNotFound; - case hipErrorSharedObjectInitFailed: - return cudaErrorSharedObjectInitFailed; - case hipErrorOperatingSystem: - return cudaErrorOperatingSystem; - case hipErrorIllegalState: - return cudaErrorIllegalState; - case hipErrorNotFound: -#if CUDA_VERSION >= CUDA_10010 - return cudaErrorSymbolNotFound; -#else - return cudaErrorUnknown; -#endif - case hipErrorIllegalAddress: - return cudaErrorIllegalAddress; - case hipErrorLaunchTimeOut: - return cudaErrorLaunchTimeout; - case hipErrorSetOnActiveProcess: - return cudaErrorSetOnActiveProcess; - case hipErrorContextIsDestroyed: - return cudaErrorContextIsDestroyed; - case hipErrorAssert: - return cudaErrorAssert; - case hipErrorLaunchFailure: - return cudaErrorLaunchFailure; - case hipErrorCooperativeLaunchTooLarge: - return cudaErrorCooperativeLaunchTooLarge; - case hipErrorStreamCaptureUnsupported: - return cudaErrorStreamCaptureUnsupported; - case hipErrorStreamCaptureInvalidated: - return cudaErrorStreamCaptureInvalidated; - case hipErrorStreamCaptureMerge: - return cudaErrorStreamCaptureMerge; - case hipErrorStreamCaptureUnmatched: - return cudaErrorStreamCaptureUnmatched; - case hipErrorStreamCaptureUnjoined: - return cudaErrorStreamCaptureUnjoined; - case hipErrorStreamCaptureIsolation: - return cudaErrorStreamCaptureIsolation; - case hipErrorStreamCaptureImplicit: - return cudaErrorStreamCaptureImplicit; - case hipErrorCapturedEvent: - return cudaErrorCapturedEvent; - case hipErrorStreamCaptureWrongThread: - return cudaErrorStreamCaptureWrongThread; - case hipErrorGraphExecUpdateFailure: - return cudaErrorGraphExecUpdateFailure; - case hipErrorNotSupported: - return cudaErrorNotSupported; - // HSA: does not exist in CUDA - case hipErrorRuntimeMemory: - // HSA: does not exist in CUDA - case hipErrorRuntimeOther: - case hipErrorUnknown: - case hipErrorTbd: - default: - return cudaErrorUnknown; // Note - translated error. - } -} - -inline static enum cudaMemcpyKind hipMemcpyKindToCudaMemcpyKind(hipMemcpyKind kind) { - switch (kind) { - case hipMemcpyHostToHost: - return cudaMemcpyHostToHost; - case hipMemcpyHostToDevice: - return cudaMemcpyHostToDevice; - case hipMemcpyDeviceToHost: - return cudaMemcpyDeviceToHost; - case hipMemcpyDeviceToDevice: - return cudaMemcpyDeviceToDevice; - case hipMemcpyDefault: - return cudaMemcpyDefault; - default: - return (hipMemcpyKind)-1; - } -} - -inline static enum cudaTextureAddressMode hipTextureAddressModeToCudaTextureAddressMode( - hipTextureAddressMode kind) { - switch (kind) { - case hipAddressModeWrap: - return cudaAddressModeWrap; - case hipAddressModeClamp: - return cudaAddressModeClamp; - case hipAddressModeMirror: - return cudaAddressModeMirror; - case hipAddressModeBorder: - return cudaAddressModeBorder; - default: - return (hipTextureAddressMode)-1; - } -} - -inline static enum cudaMemRangeAttribute hipMemRangeAttributeToCudaMemRangeAttribute( - hipMemRangeAttribute kind) { - switch (kind) { - case hipMemRangeAttributeReadMostly: - return cudaMemRangeAttributeReadMostly; - case hipMemRangeAttributePreferredLocation: - return cudaMemRangeAttributePreferredLocation; - case hipMemRangeAttributeAccessedBy: - return cudaMemRangeAttributeAccessedBy; - case hipMemRangeAttributeLastPrefetchLocation: - return cudaMemRangeAttributeLastPrefetchLocation; - default: - return (hipMemRangeAttribute)-1; - } -} - -inline static enum cudaMemoryAdvise hipMemoryAdviseTocudaMemoryAdvise( - hipMemoryAdvise kind) { - switch (kind) { - case hipMemAdviseSetReadMostly: - return cudaMemAdviseSetReadMostly; - case hipMemAdviseUnsetReadMostly : - return cudaMemAdviseUnsetReadMostly ; - case hipMemAdviseSetPreferredLocation: - return cudaMemAdviseSetPreferredLocation; - case hipMemAdviseUnsetPreferredLocation: - return cudaMemAdviseUnsetPreferredLocation; - case hipMemAdviseSetAccessedBy: - return cudaMemAdviseSetAccessedBy; - case hipMemAdviseUnsetAccessedBy: - return cudaMemAdviseUnsetAccessedBy; - default: - return (enum cudaMemoryAdvise)-1; - } -} - -inline static enum cudaTextureFilterMode hipTextureFilterModeToCudaTextureFilterMode( - hipTextureFilterMode kind) { - switch (kind) { - case hipFilterModePoint: - return cudaFilterModePoint; - case hipFilterModeLinear: - return cudaFilterModeLinear; - default: - return (hipTextureFilterMode)-1; - } -} - -inline static enum cudaTextureReadMode hipTextureReadModeToCudaTextureReadMode(hipTextureReadMode kind) { - switch (kind) { - case hipReadModeElementType: - return cudaReadModeElementType; - case hipReadModeNormalizedFloat: - return cudaReadModeNormalizedFloat; - default: - return (hipTextureReadMode)-1; - } -} - -inline static enum cudaChannelFormatKind hipChannelFormatKindToCudaChannelFormatKind( - hipChannelFormatKind kind) { - switch (kind) { - case hipChannelFormatKindSigned: - return cudaChannelFormatKindSigned; - case hipChannelFormatKindUnsigned: - return cudaChannelFormatKindUnsigned; - case hipChannelFormatKindFloat: - return cudaChannelFormatKindFloat; - case hipChannelFormatKindNone: - return cudaChannelFormatKindNone; - default: - return (hipChannelFormatKind)-1; - } -} - -typedef enum cudaExternalMemoryHandleType hipExternalMemoryHandleType; -#define hipExternalMemoryHandleTypeOpaqueFd cudaExternalMemoryHandleTypeOpaqueFd -#define hipExternalMemoryHandleTypeOpaqueWin32 cudaExternalMemoryHandleTypeOpaqueWin32 -#define hipExternalMemoryHandleTypeOpaqueWin32Kmt cudaExternalMemoryHandleTypeOpaqueWin32Kmt -#define hipExternalMemoryHandleTypeD3D12Heap cudaExternalMemoryHandleTypeD3D12Heap -#define hipExternalMemoryHandleTypeD3D12Resource cudaExternalMemoryHandleTypeD3D12Resource -#if CUDA_VERSION >= CUDA_10020 -#define hipExternalMemoryHandleTypeD3D11Resource cudaExternalMemoryHandleTypeD3D11Resource -#define hipExternalMemoryHandleTypeD3D11ResourceKmt cudaExternalMemoryHandleTypeD3D11ResourceKmt -#define hipExternalMemoryHandleTypeNvSciBuf cudaExternalMemoryHandleTypeNvSciBuf -#endif - -typedef struct cudaExternalMemoryHandleDesc hipExternalMemoryHandleDesc; -typedef struct cudaExternalMemoryBufferDesc hipExternalMemoryBufferDesc; -typedef cudaExternalMemory_t hipExternalMemory_t; -typedef cudaExternalMemoryMipmappedArrayDesc hipExternalMemoryMipmappedArrayDesc; - -typedef enum cudaExternalSemaphoreHandleType hipExternalSemaphoreHandleType; -#define hipExternalSemaphoreHandleTypeOpaqueFd cudaExternalSemaphoreHandleTypeOpaqueFd -#define hipExternalSemaphoreHandleTypeOpaqueWin32 cudaExternalSemaphoreHandleTypeOpaqueWin32 -#define hipExternalSemaphoreHandleTypeOpaqueWin32Kmt cudaExternalSemaphoreHandleTypeOpaqueWin32Kmt -#define hipExternalSemaphoreHandleTypeD3D12Fence cudaExternalSemaphoreHandleTypeD3D12Fence -#if CUDA_VERSION >= CUDA_10020 -#define hipExternalSemaphoreHandleTypeD3D11Fence cudaExternalSemaphoreHandleTypeD3D11Fence -#define hipExternalSemaphoreHandleTypeNvSciSync cudaExternalSemaphoreHandleTypeNvSciSync -#define hipExternalSemaphoreHandleTypeKeyedMutex cudaExternalSemaphoreHandleTypeKeyedMutex -#define hipExternalSemaphoreHandleTypeKeyedMutexKmt cudaExternalSemaphoreHandleTypeKeyedMutexKmt -#endif -#if CUDA_VERSION >= CUDA_11020 -#define hipExternalSemaphoreHandleTypeTimelineSemaphoreFd cudaExternalSemaphoreHandleTypeTimelineSemaphoreFd -#define hipExternalSemaphoreHandleTypeTimelineSemaphoreWin32 cudaExternalSemaphoreHandleTypeTimelineSemaphoreWin32 -#endif - -typedef struct cudaExternalSemaphoreHandleDesc hipExternalSemaphoreHandleDesc; -typedef cudaExternalSemaphore_t hipExternalSemaphore_t; -typedef struct cudaExternalSemaphoreSignalParams hipExternalSemaphoreSignalParams; -typedef struct cudaExternalSemaphoreWaitParams hipExternalSemaphoreWaitParams; - -typedef struct cudaGraphicsResource hipGraphicsResource; -typedef cudaGraphicsResource_t hipGraphicsResource_t; - -typedef enum cudaGraphicsRegisterFlags hipGraphicsRegisterFlags; -#define hipGraphicsRegisterFlagsNone cudaGraphicsRegisterFlagsNone -#define hipGraphicsRegisterFlagsReadOnly cudaGraphicsRegisterFlagsReadOnly -#define hipGraphicsRegisterFlagsWriteDiscard cudaGraphicsRegisterFlagsWriteDiscard -#define hipGraphicsRegisterFlagsSurfaceLoadStore cudaGraphicsRegisterFlagsSurfaceLoadStore -#define hipGraphicsRegisterFlagsTextureGather cudaGraphicsRegisterFlagsTextureGather - -/** - * graph types - * - */ -typedef cudaGraph_t hipGraph_t; -typedef cudaGraphNode_t hipGraphNode_t; -typedef cudaGraphExec_t hipGraphExec_t; -typedef cudaUserObject_t hipUserObject_t; - -typedef enum cudaGraphNodeType hipGraphNodeType; -#define hipGraphNodeTypeKernel cudaGraphNodeTypeKernel -#define hipGraphNodeTypeMemcpy cudaGraphNodeTypeMemcpy -#define hipGraphNodeTypeMemset cudaGraphNodeTypeMemset -#define hipGraphNodeTypeHost cudaGraphNodeTypeHost -#define hipGraphNodeTypeGraph cudaGraphNodeTypeGraph -#define hipGraphNodeTypeEmpty cudaGraphNodeTypeEmpty -#define hipGraphNodeTypeWaitEvent cudaGraphNodeTypeWaitEvent -#define hipGraphNodeTypeEventRecord cudaGraphNodeTypeEventRecord -#define hipGraphNodeTypeExtSemaphoreSignal cudaGraphNodeTypeExtSemaphoreSignal -#define hipGraphNodeTypeExtSemaphoreWait cudaGraphNodeTypeExtSemaphoreWait -#define hipGraphNodeTypeMemcpyFromSymbol cudaGraphNodeTypeMemcpyFromSymbol -#define hipGraphNodeTypeMemcpyToSymbol cudaGraphNodeTypeMemcpyToSymbol -#define hipGraphNodeTypeCount cudaGraphNodeTypeCount - -typedef cudaHostFn_t hipHostFn_t; -typedef struct cudaHostNodeParams hipHostNodeParams; -typedef struct cudaKernelNodeParams hipKernelNodeParams; -typedef struct cudaMemsetParams hipMemsetParams; - -#if CUDA_VERSION >= CUDA_11040 -typedef struct cudaMemAllocNodeParams hipMemAllocNodeParams; -#endif - -typedef enum cudaGraphExecUpdateResult hipGraphExecUpdateResult; -#define hipGraphExecUpdateSuccess cudaGraphExecUpdateSuccess -#define hipGraphExecUpdateError cudaGraphExecUpdateError -#define hipGraphExecUpdateErrorTopologyChanged cudaGraphExecUpdateErrorTopologyChanged -#define hipGraphExecUpdateErrorNodeTypeChanged cudaGraphExecUpdateErrorNodeTypeChanged -#define hipGraphExecUpdateErrorFunctionChanged cudaGraphExecUpdateErrorFunctionChanged -#define hipGraphExecUpdateErrorParametersChanged cudaGraphExecUpdateErrorParametersChanged -#define hipGraphExecUpdateErrorNotSupported cudaGraphExecUpdateErrorNotSupported -#define hipGraphExecUpdateErrorUnsupportedFunctionChange \ - cudaGraphExecUpdateErrorUnsupportedFunctionChange - -typedef enum cudaStreamCaptureMode hipStreamCaptureMode; -#define hipStreamCaptureModeGlobal cudaStreamCaptureModeGlobal -#define hipStreamCaptureModeThreadLocal cudaStreamCaptureModeThreadLocal -#define hipStreamCaptureModeRelaxed cudaStreamCaptureModeRelaxed - -typedef enum cudaStreamCaptureStatus hipStreamCaptureStatus; -#define hipStreamCaptureStatusNone cudaStreamCaptureStatusNone -#define hipStreamCaptureStatusActive cudaStreamCaptureStatusActive -#define hipStreamCaptureStatusInvalidated cudaStreamCaptureStatusInvalidated - -typedef union cudaKernelNodeAttrValue hipKernelNodeAttrValue; -typedef enum cudaKernelNodeAttrID hipKernelNodeAttrID; -#define hipKernelNodeAttributeAccessPolicyWindow cudaKernelNodeAttributeAccessPolicyWindow -#define hipKernelNodeAttributeCooperative cudaKernelNodeAttributeCooperative -typedef enum cudaAccessProperty hipAccessProperty; -#define hipAccessPropertyNormal cudaAccessPropertyNormal -#define hipAccessPropertyStreaming cudaAccessPropertyStreaming -#define hipAccessPropertyPersisting cudaAccessPropertyPersisting -typedef struct cudaAccessPolicyWindow hipAccessPolicyWindow; - -typedef enum cudaGraphMemAttributeType hipGraphMemAttributeType; -#define hipGraphMemAttrUsedMemCurrent cudaGraphMemAttrUsedMemCurrent -#define hipGraphMemAttrUsedMemHigh cudaGraphMemAttrUsedMemHigh -#define hipGraphMemAttrReservedMemCurrent cudaGraphMemAttrReservedMemCurrent -#define hipGraphMemAttrReservedMemHigh cudaGraphMemAttrReservedMemHigh - -typedef enum cudaUserObjectFlags hipUserObjectFlags; -#define hipUserObjectNoDestructorSync cudaUserObjectNoDestructorSync - -typedef enum cudaUserObjectRetainFlags hipUserObjectRetainFlags; -#define hipGraphUserObjectMove cudaGraphUserObjectMove - -#if CUDA_VERSION >= CUDA_11030 -typedef enum cudaStreamUpdateCaptureDependenciesFlags hipStreamUpdateCaptureDependenciesFlags; -#define hipStreamAddCaptureDependencies cudaStreamAddCaptureDependencies -#define hipStreamSetCaptureDependencies cudaStreamSetCaptureDependencies -#endif - -#if CUDA_VERSION >= CUDA_11030 -typedef enum cudaGraphDebugDotFlags hipGraphDebugDotFlags; -#define hipGraphDebugDotFlagsVerbose cudaGraphDebugDotFlagsVerbose -#define hipGraphDebugDotFlagsKernelNodeParams cudaGraphDebugDotFlagsKernelNodeParams -#define hipGraphDebugDotFlagsMemcpyNodeParams cudaGraphDebugDotFlagsMemcpyNodeParams -#define hipGraphDebugDotFlagsMemsetNodeParams cudaGraphDebugDotFlagsMemsetNodeParams -#define hipGraphDebugDotFlagsHostNodeParams cudaGraphDebugDotFlagsHostNodeParams -#define hipGraphDebugDotFlagsEventNodeParams cudaGraphDebugDotFlagsEventNodeParams -#define hipGraphDebugDotFlagsExtSemasSignalNodeParams cudaGraphDebugDotFlagsExtSemasSignalNodeParams -#define hipGraphDebugDotFlagsExtSemasWaitNodeParams cudaGraphDebugDotFlagsExtSemasWaitNodeParams -#define hipGraphDebugDotFlagsKernelNodeAttributes cudaGraphDebugDotFlagsKernelNodeAttributes -#define hipGraphDebugDotFlagsHandles cudaGraphDebugDotFlagsHandles -#endif - -#if CUDA_VERSION >= CUDA_10020 -#define hipMemAllocationGranularityMinimum CU_MEM_ALLOC_GRANULARITY_MINIMUM -#define hipMemAllocationGranularityRecommended CU_MEM_ALLOC_GRANULARITY_RECOMMENDED -typedef enum CUmemAllocationGranularity_flags_enum hipMemAllocationGranularity_flags; -typedef enum cudaMemLocationType hipMemLocationType; -#define hipMemLocationTypeInvalid cudaMemLocationTypeInvalid -#define hipMemLocationTypeDevice cudaMemLocationTypeDevice -#define hipMemHandleTypeNone cudaMemHandleTypeNone -#define hipMemHandleTypePosixFileDescriptor cudaMemHandleTypePosixFileDescriptor -#define hipMemHandleTypeWin32 cudaMemHandleTypeWin32 -#define hipMemHandleTypeWin32Kmt cudaMemHandleTypeWin32Kmt -typedef enum cudaMemAllocationType hipMemAllocationType; -#define hipMemAllocationTypeInvalid cudaMemAllocationTypeInvalid -#define hipMemAllocationTypePinned cudaMemAllocationTypePinned -#define hipMemAllocationTypeMax cudaMemAllocationTypeMax -#define hipMemGenericAllocationHandle_t CUmemGenericAllocationHandle -//CUarrayMapInfo mappings -typedef CUarrayMapInfo hipArrayMapInfo; -typedef CUarraySparseSubresourceType hipArraySparseSubresourceType; -#define hipArraySparseSubresourceTypeSparseLevel CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_SPARSE_LEVEL -#define hipArraySparseSubresourceTypeMiptail CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_MIPTAIL -typedef CUmemOperationType hipMemOperationType; -#define hipMemOperationTypeMap CU_MEM_OPERATION_TYPE_MAP -#define hipMemOperationTypeUnmap CU_MEM_OPERATION_TYPE_UNMAP -typedef CUmemHandleType hipMemHandleType; -#define hipMemHandleTypeGeneric CU_MEM_HANDLE_TYPE_GENERIC -// Explicitely declaring hipMemAllocationProp based on CUmemAllocationProp but using CUDA runtime members instead -// Because hipMemAllocationType, hipMemAllocationHandleType & hipMemLocation are defined using CUDA runtime data types & also used by hipMemPoolProps -// Currently there doesn't exist CUDA inbuilt runtime structure corresponding to CUmemAllocationProp -// Need to update this structure accordingly if CUDA updates CUmemAllocationProp -typedef struct hipMemAllocationProp { - /** Memory allocation type */ - hipMemAllocationType type; - /** Requested handle type */ - hipMemAllocationHandleType requestedHandleTypes; - /** Location of allocation */ - hipMemLocation location; - /** - * Windows-specific POBJECT_ATTRIBUTES required when - * ::CU_MEM_HANDLE_TYPE_WIN32 is specified. This object atributes structure - * includes security attributes that define - * the scope of which exported allocations may be tranferred to other - * processes. In all other cases, this field is required to be zero. - */ - void *win32HandleMetaData; - struct { - /** - * Allocation hint for requesting compressible memory. - * On devices that support Compute Data Compression, compressible - * memory can be used to accelerate accesses to data with unstructured - * sparsity and other compressible data patterns. Applications are - * expected to query allocation property of the handle obtained with - * ::cuMemCreate using ::cuMemGetAllocationPropertiesFromHandle to - * validate if the obtained allocation is compressible or not. Note that - * compressed memory may not be mappable on all devices. - */ - unsigned char compressionType; - /** RDMA capable */ - unsigned char gpuDirectRDMACapable; - /** Bitmask indicating intended usage for this allocation */ - unsigned short usage; - unsigned char reserved[4]; - } allocFlags; -} hipMemAllocationProp; -#endif -/** - * Stream CallBack struct - */ -#define HIPRT_CB CUDART_CB -typedef void(HIPRT_CB* hipStreamCallback_t)(hipStream_t stream, hipError_t status, void* userData); -inline static hipError_t hipInit(unsigned int flags) { - return hipCUResultTohipError(cuInit(flags)); -} - -inline static hipError_t hipDeviceReset() { return hipCUDAErrorTohipError(cudaDeviceReset()); } - -inline static hipError_t hipGetLastError() { return hipCUDAErrorTohipError(cudaGetLastError()); } - -inline static hipError_t hipPeekAtLastError() { - return hipCUDAErrorTohipError(cudaPeekAtLastError()); -} - -inline static hipError_t hipMalloc(void** ptr, size_t size) { - return hipCUDAErrorTohipError(cudaMalloc(ptr, size)); -} - -inline static hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height) { - return hipCUDAErrorTohipError(cudaMallocPitch(ptr, pitch, width, height)); -} - -inline static hipError_t hipMemAllocPitch(hipDeviceptr_t* dptr,size_t* pitch,size_t widthInBytes,size_t height,unsigned int elementSizeBytes){ - return hipCUResultTohipError(cuMemAllocPitch(dptr,pitch,widthInBytes,height,elementSizeBytes)); -} - -inline static hipError_t hipMalloc3D(hipPitchedPtr* pitchedDevPtr, hipExtent extent) { - return hipCUDAErrorTohipError(cudaMalloc3D(pitchedDevPtr, extent)); -} - -inline static hipError_t hipFree(void* ptr) { return hipCUDAErrorTohipError(cudaFree(ptr)); } - -__HIP_DEPRECATED_MSG("use hipHostMalloc instead") -inline static hipError_t hipMallocHost(void** ptr, size_t size) { - return hipCUDAErrorTohipError(cudaMallocHost(ptr, size)); -} - -__HIP_DEPRECATED_MSG("use hipHostMalloc instead") -inline static hipError_t hipMemAllocHost(void** ptr, size_t size) { - return hipCUResultTohipError(cuMemAllocHost(ptr, size)); -} - -__HIP_DEPRECATED_MSG("use hipHostMalloc instead") -inline static hipError_t hipHostAlloc(void** ptr, size_t size, unsigned int flags) { - return hipCUDAErrorTohipError(cudaHostAlloc(ptr, size, flags)); -} - -inline static hipError_t hipHostMalloc(void** ptr, size_t size, unsigned int flags) { - return hipCUDAErrorTohipError(cudaHostAlloc(ptr, size, flags)); -} - -inline static hipError_t hipMemAdvise(const void* dev_ptr, size_t count, hipMemoryAdvise advice, - int device) { - return hipCUDAErrorTohipError(cudaMemAdvise(dev_ptr, count, - hipMemoryAdviseTocudaMemoryAdvise(advice), device)); -} - -inline static hipError_t hipMemPrefetchAsync(const void* dev_ptr, size_t count, int device, - hipStream_t stream __dparm(0)) { - return hipCUDAErrorTohipError(cudaMemPrefetchAsync(dev_ptr, count, device, stream)); -} - -inline static hipError_t hipMemRangeGetAttribute(void* data, size_t data_size, - hipMemRangeAttribute attribute, - const void* dev_ptr, size_t count) { - return hipCUDAErrorTohipError(cudaMemRangeGetAttribute(data, data_size, - hipMemRangeAttributeToCudaMemRangeAttribute(attribute), dev_ptr, count)); -} - -inline static hipError_t hipMemRangeGetAttributes(void** data, size_t* data_sizes, - hipMemRangeAttribute* attributes, - size_t num_attributes, const void* dev_ptr, - size_t count) { - return hipCUDAErrorTohipError(cudaMemRangeGetAttributes(data, data_sizes, attributes, - num_attributes, dev_ptr, count)); -} - -inline static hipError_t hipStreamAttachMemAsync(hipStream_t stream, hipDeviceptr_t* dev_ptr, - size_t length __dparm(0), - unsigned int flags __dparm(hipMemAttachSingle)) { - return hipCUDAErrorTohipError(cudaStreamAttachMemAsync(stream, dev_ptr, length, flags)); -} - -inline static hipError_t hipMallocManaged(void** ptr, size_t size, unsigned int flags) { - return hipCUDAErrorTohipError(cudaMallocManaged(ptr, size, flags)); -} - -inline static hipError_t hipMallocArray(hipArray** array, const hipChannelFormatDesc* desc, - size_t width, size_t height __dparm(0), - unsigned int flags __dparm(hipArrayDefault)) { - return hipCUDAErrorTohipError(cudaMallocArray(array, desc, width, height, flags)); -} - -inline static hipError_t hipMalloc3DArray(hipArray** array, const hipChannelFormatDesc* desc, - hipExtent extent, unsigned int flags) { - return hipCUDAErrorTohipError(cudaMalloc3DArray(array, desc, extent, flags)); -} - -inline static hipError_t hipFreeArray(hipArray* array) { - return hipCUDAErrorTohipError(cudaFreeArray(array)); -} - -inline static hipError_t hipMipmappedArrayCreate(hipmipmappedArray* pHandle, - HIP_ARRAY3D_DESCRIPTOR* pMipmappedArrayDesc, - unsigned int numMipmapLevels) { - return hipCUResultTohipError(cuMipmappedArrayCreate(pHandle, pMipmappedArrayDesc, numMipmapLevels)); -} - -inline static hipError_t hipMipmappedArrayDestroy(hipmipmappedArray hMipmappedArray) { - return hipCUResultTohipError(cuMipmappedArrayDestroy(hMipmappedArray)); -} - -inline static hipError_t hipMipmappedArrayGetLevel(hiparray* pLevelArray, - hipmipmappedArray hMipMappedArray, - unsigned int level) { - return hipCUResultTohipError(cuMipmappedArrayGetLevel((CUarray*)pLevelArray, hMipMappedArray, level)); -} - -inline static hipError_t hipMallocMipmappedArray(hipMipmappedArray_t* pHandle, - const hipChannelFormatDesc* desc, hipExtent extent, - unsigned int numLevels, unsigned int flags __dparm(0)) { - return hipCUDAErrorTohipError(cudaMallocMipmappedArray(pHandle, desc, extent, numLevels, flags)); -} - -inline static hipError_t hipFreeMipmappedArray(hipMipmappedArray_t hMipmappedArray) { - return hipCUDAErrorTohipError(cudaFreeMipmappedArray(hMipmappedArray)); -} - -inline static hipError_t hipGetMipmappedArrayLevel(hipArray_t* pLevelArray, - hipMipmappedArray_t hMipMappedArray, - unsigned int level) { - return hipCUDAErrorTohipError(cudaGetMipmappedArrayLevel(pLevelArray, hMipMappedArray, level)); -} - -inline static hipError_t hipHostGetDevicePointer(void** devPtr, void* hostPtr, unsigned int flags) { - return hipCUDAErrorTohipError(cudaHostGetDevicePointer(devPtr, hostPtr, flags)); -} - -inline static hipError_t hipHostGetFlags(unsigned int* flagsPtr, void* hostPtr) { - return hipCUDAErrorTohipError(cudaHostGetFlags(flagsPtr, hostPtr)); -} - -inline static hipError_t hipHostRegister(void* ptr, size_t size, unsigned int flags) { - return hipCUDAErrorTohipError(cudaHostRegister(ptr, size, flags)); -} - -inline static hipError_t hipHostUnregister(void* ptr) { - return hipCUDAErrorTohipError(cudaHostUnregister(ptr)); -} - -__HIP_DEPRECATED_MSG("use hipHostFree instead") -inline static hipError_t hipFreeHost(void* ptr) { - return hipCUDAErrorTohipError(cudaFreeHost(ptr)); -} - -inline static hipError_t hipHostFree(void* ptr) { - return hipCUDAErrorTohipError(cudaFreeHost(ptr)); -} - -inline static hipError_t hipSetDevice(int device) { - return hipCUDAErrorTohipError(cudaSetDevice(device)); -} - -inline static hipError_t hipChooseDevice(int* device, const hipDeviceProp_t* prop) { - - if (prop == NULL) { - return hipErrorInvalidValue; - } - - struct cudaDeviceProp cdprop; - memset(&cdprop, 0x0, sizeof(struct cudaDeviceProp)); - cdprop.major = prop->major; - cdprop.minor = prop->minor; - cdprop.totalGlobalMem = prop->totalGlobalMem; - cdprop.sharedMemPerBlock = prop->sharedMemPerBlock; - cdprop.regsPerBlock = prop->regsPerBlock; - cdprop.warpSize = prop->warpSize; - cdprop.maxThreadsPerBlock = prop->maxThreadsPerBlock; - cdprop.clockRate = prop->clockRate; - cdprop.totalConstMem = prop->totalConstMem; - cdprop.multiProcessorCount = prop->multiProcessorCount; - cdprop.l2CacheSize = prop->l2CacheSize; - cdprop.maxThreadsPerMultiProcessor = prop->maxThreadsPerMultiProcessor; - cdprop.computeMode = prop->computeMode; - cdprop.canMapHostMemory = prop->canMapHostMemory; - cdprop.memoryClockRate = prop->memoryClockRate; - cdprop.memoryBusWidth = prop->memoryBusWidth; - return hipCUDAErrorTohipError(cudaChooseDevice(device, &cdprop)); -} - -inline static hipError_t hipMemcpyHtoD(hipDeviceptr_t dst, void* src, size_t size) { - return hipCUResultTohipError(cuMemcpyHtoD(dst, src, size)); -} - -inline static hipError_t hipMemcpyDtoH(void* dst, hipDeviceptr_t src, size_t size) { - return hipCUResultTohipError(cuMemcpyDtoH(dst, src, size)); -} - -inline static hipError_t hipMemcpyDtoD(hipDeviceptr_t dst, hipDeviceptr_t src, size_t size) { - return hipCUResultTohipError(cuMemcpyDtoD(dst, src, size)); -} - -inline static hipError_t hipMemcpyHtoDAsync(hipDeviceptr_t dst, void* src, size_t size, - hipStream_t stream) { - return hipCUResultTohipError(cuMemcpyHtoDAsync(dst, src, size, stream)); -} - -inline static hipError_t hipMemcpyDtoHAsync(void* dst, hipDeviceptr_t src, size_t size, - hipStream_t stream) { - return hipCUResultTohipError(cuMemcpyDtoHAsync(dst, src, size, stream)); -} - -inline static hipError_t hipMemcpyDtoDAsync(hipDeviceptr_t dst, hipDeviceptr_t src, size_t size, - hipStream_t stream) { - return hipCUResultTohipError(cuMemcpyDtoDAsync(dst, src, size, stream)); -} - -inline static hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, - hipMemcpyKind copyKind) { - return hipCUDAErrorTohipError( - cudaMemcpy(dst, src, sizeBytes, copyKind)); -} - - -inline static hipError_t hipMemcpyWithStream(void* dst, const void* src, size_t sizeBytes, - hipMemcpyKind copyKind, hipStream_t stream) { - cudaError_t error = cudaMemcpyAsync(dst, src, sizeBytes, copyKind, stream); - - if (error != cudaSuccess) return hipCUDAErrorTohipError(error); - - return hipCUDAErrorTohipError(cudaStreamSynchronize(stream)); -} - -inline static hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, - hipMemcpyKind copyKind, hipStream_t stream __dparm(0)) { - return hipCUDAErrorTohipError( - cudaMemcpyAsync(dst, src, sizeBytes, copyKind, stream)); -} - -inline static hipError_t hipMemcpyToSymbol( - const void* symbol, const void* src, size_t sizeBytes, size_t offset __dparm(0), - hipMemcpyKind copyType __dparm(hipMemcpyKindToCudaMemcpyKind(hipMemcpyHostToDevice))) { - return hipCUDAErrorTohipError(cudaMemcpyToSymbol(symbol, src, sizeBytes, offset, copyType)); -} - -inline static hipError_t hipMemcpyToSymbolAsync(const void* symbol, const void* src, - size_t sizeBytes, size_t offset, - hipMemcpyKind copyType, - hipStream_t stream __dparm(0)) { - return hipCUDAErrorTohipError(cudaMemcpyToSymbolAsync( - symbol, src, sizeBytes, offset, copyType, stream)); -} - -inline static hipError_t hipMemcpyFromSymbol( - void* dst, const void* symbolName, size_t sizeBytes, size_t offset __dparm(0), - hipMemcpyKind kind __dparm(hipMemcpyKindToCudaMemcpyKind(hipMemcpyDeviceToHost))) { - return hipCUDAErrorTohipError(cudaMemcpyFromSymbol(dst, symbolName, sizeBytes, offset, kind)); -} - -inline static hipError_t hipMemcpyFromSymbolAsync(void* dst, const void* symbolName, - size_t sizeBytes, size_t offset, - hipMemcpyKind kind, - hipStream_t stream __dparm(0)) { - return hipCUDAErrorTohipError(cudaMemcpyFromSymbolAsync( - dst, symbolName, sizeBytes, offset, kind, stream)); -} - -inline static hipError_t hipGetSymbolAddress(void** devPtr, const void* symbolName) { - return hipCUDAErrorTohipError(cudaGetSymbolAddress(devPtr, symbolName)); -} - -inline static hipError_t hipGetSymbolSize(size_t* size, const void* symbolName) { - return hipCUDAErrorTohipError(cudaGetSymbolSize(size, symbolName)); -} - -inline static hipError_t hipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch, - size_t width, size_t height, hipMemcpyKind kind) { - return hipCUDAErrorTohipError( - cudaMemcpy2D(dst, dpitch, src, spitch, width, height, kind)); -} - -inline static hipError_t hipMemcpyParam2D(const hip_Memcpy2D* pCopy) { - return hipCUResultTohipError(cuMemcpy2D(pCopy)); -} - -inline static hipError_t hipMemcpyParam2DAsync(const hip_Memcpy2D* pCopy, hipStream_t stream __dparm(0)) { - return hipCUResultTohipError(cuMemcpy2DAsync(pCopy, stream)); -} - -inline static hipError_t hipMemcpy3D(const struct hipMemcpy3DParms *p) { - return hipCUDAErrorTohipError(cudaMemcpy3D(p)); -} - -inline static hipError_t hipMemcpy3DAsync(const struct hipMemcpy3DParms *p, hipStream_t stream) { - return hipCUDAErrorTohipError(cudaMemcpy3DAsync(p, stream)); -} - -inline static hipError_t hipDrvMemcpy3D(const HIP_MEMCPY3D* pCopy) { - return hipCUResultTohipError(cuMemcpy3D(pCopy)); -} - -inline static hipError_t hipDrvMemcpy3DAsync(const HIP_MEMCPY3D* pCopy, hipStream_t stream) { - return hipCUResultTohipError(cuMemcpy3DAsync(pCopy, stream)); -} - -inline static hipError_t hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t spitch, - size_t width, size_t height, hipMemcpyKind kind, - hipStream_t stream) { - return hipCUDAErrorTohipError(cudaMemcpy2DAsync(dst, dpitch, src, spitch, width, height, - kind, stream)); -} - -inline static hipError_t hipMemcpy2DFromArray(void* dst, size_t dpitch, hipArray* src, - size_t wOffset, size_t hOffset, size_t width, - size_t height, hipMemcpyKind kind) { - return hipCUDAErrorTohipError(cudaMemcpy2DFromArray(dst, dpitch, src, wOffset, hOffset, width, - height, - kind)); -} - -inline static hipError_t hipMemcpy2DFromArrayAsync(void* dst, size_t dpitch, hipArray* src, - size_t wOffset, size_t hOffset, size_t width, - size_t height, hipMemcpyKind kind, - hipStream_t stream) { - return hipCUDAErrorTohipError(cudaMemcpy2DFromArrayAsync(dst, dpitch, src, wOffset, hOffset, - width, height, - kind, - stream)); -} - -inline static hipError_t hipMemcpy2DToArray(hipArray* dst, size_t wOffset, size_t hOffset, - const void* src, size_t spitch, size_t width, - size_t height, hipMemcpyKind kind) { - return hipCUDAErrorTohipError(cudaMemcpy2DToArray(dst, wOffset, hOffset, src, spitch, width, - height, kind)); -} - -inline static hipError_t hipMemcpy2DToArrayAsync(hipArray* dst, size_t wOffset, size_t hOffset, - const void* src, size_t spitch, size_t width, - size_t height, hipMemcpyKind kind, - hipStream_t stream) { - return hipCUDAErrorTohipError(cudaMemcpy2DToArrayAsync(dst, wOffset, hOffset, src, spitch, - width, height, - kind, - stream)); -} - -__HIP_DEPRECATED inline static hipError_t hipMemcpyToArray(hipArray* dst, size_t wOffset, - size_t hOffset, const void* src, - size_t count, hipMemcpyKind kind) { - return hipCUDAErrorTohipError( - cudaMemcpyToArray(dst, wOffset, hOffset, src, count, kind)); -} - -__HIP_DEPRECATED inline static hipError_t hipMemcpyFromArray(void* dst, hipArray_const_t srcArray, - size_t wOffset, size_t hOffset, - size_t count, hipMemcpyKind kind) { - return hipCUDAErrorTohipError(cudaMemcpyFromArray(dst, srcArray, wOffset, hOffset, count, - kind)); -} - -inline static hipError_t hipMemcpyAtoH(void* dst, hipArray* srcArray, size_t srcOffset, - size_t count) { - return hipCUResultTohipError(cuMemcpyAtoH(dst, (CUarray)srcArray, srcOffset, count)); -} - -inline static hipError_t hipMemcpyHtoA(hipArray* dstArray, size_t dstOffset, const void* srcHost, - size_t count) { - return hipCUResultTohipError(cuMemcpyHtoA((CUarray)dstArray, dstOffset, srcHost, count)); -} - -inline static hipError_t hipDeviceSynchronize() { - return hipCUDAErrorTohipError(cudaDeviceSynchronize()); -} - -inline static hipError_t hipDeviceGetCacheConfig(hipFuncCache_t* pCacheConfig) { - return hipCUDAErrorTohipError(cudaDeviceGetCacheConfig(pCacheConfig)); -} - -inline static hipError_t hipFuncSetAttribute(const void* func, hipFuncAttribute attr, int value) { - return hipCUDAErrorTohipError(cudaFuncSetAttribute(func, attr, value)); -} - -inline static hipError_t hipDeviceSetCacheConfig(hipFuncCache_t cacheConfig) { - return hipCUDAErrorTohipError(cudaDeviceSetCacheConfig(cacheConfig)); -} - -inline static hipError_t hipFuncSetSharedMemConfig(const void* func, hipSharedMemConfig config) { - return hipCUDAErrorTohipError(cudaFuncSetSharedMemConfig(func, config)); -} - -inline static const char* hipGetErrorString(hipError_t error) { - return cudaGetErrorString(hipErrorToCudaError(error)); -} - -inline static const char* hipGetErrorName(hipError_t error) { - return cudaGetErrorName(hipErrorToCudaError(error)); -} - -inline static hipError_t hipDrvGetErrorString(hipError_t error, const char** errorString) { - CUresult err = hipErrorToCUResult(error); - if( err == CUDA_ERROR_UNKNOWN ) { - return hipCUResultTohipError(cuGetErrorString((CUresult)error, errorString)); - } else { - return hipCUResultTohipError(cuGetErrorString(err, errorString)); - } -} - -inline static hipError_t hipDrvGetErrorName(hipError_t error, const char** errorString) { - CUresult err = hipErrorToCUResult(error); - if( err == CUDA_ERROR_UNKNOWN ) { - return hipCUResultTohipError(cuGetErrorName((CUresult)error, errorString)); - } else { - return hipCUResultTohipError(cuGetErrorName(err, errorString)); - } -} - -inline static hipError_t hipGetDeviceCount(int* count) { - return hipCUDAErrorTohipError(cudaGetDeviceCount(count)); -} - -inline static hipError_t hipGetDevice(int* device) { - return hipCUDAErrorTohipError(cudaGetDevice(device)); -} - -inline static hipError_t hipIpcCloseMemHandle(void* devPtr) { - return hipCUDAErrorTohipError(cudaIpcCloseMemHandle(devPtr)); -} - -inline static hipError_t hipIpcGetEventHandle(hipIpcEventHandle_t* handle, hipEvent_t event) { - return hipCUDAErrorTohipError(cudaIpcGetEventHandle(handle, event)); -} - -inline static hipError_t hipIpcGetMemHandle(hipIpcMemHandle_t* handle, void* devPtr) { - return hipCUDAErrorTohipError(cudaIpcGetMemHandle(handle, devPtr)); -} - -inline static hipError_t hipIpcOpenEventHandle(hipEvent_t* event, hipIpcEventHandle_t handle) { - return hipCUDAErrorTohipError(cudaIpcOpenEventHandle(event, handle)); -} - -inline static hipError_t hipIpcOpenMemHandle(void** devPtr, hipIpcMemHandle_t handle, - unsigned int flags) { - return hipCUDAErrorTohipError(cudaIpcOpenMemHandle(devPtr, handle, flags)); -} - -inline static hipError_t hipMemset(void* devPtr, int value, size_t count) { - return hipCUDAErrorTohipError(cudaMemset(devPtr, value, count)); -} - -inline static hipError_t hipMemsetD32(hipDeviceptr_t devPtr, int value, size_t count) { - return hipCUResultTohipError(cuMemsetD32(devPtr, value, count)); -} - -inline static hipError_t hipMemsetAsync(void* devPtr, int value, size_t count, - hipStream_t stream __dparm(0)) { - return hipCUDAErrorTohipError(cudaMemsetAsync(devPtr, value, count, stream)); -} - -inline static hipError_t hipMemsetD32Async(hipDeviceptr_t devPtr, int value, size_t count, - hipStream_t stream __dparm(0)) { - return hipCUResultTohipError(cuMemsetD32Async(devPtr, value, count, stream)); -} - -inline static hipError_t hipMemsetD8(hipDeviceptr_t dest, unsigned char value, size_t sizeBytes) { - return hipCUResultTohipError(cuMemsetD8(dest, value, sizeBytes)); -} - -inline static hipError_t hipMemsetD8Async(hipDeviceptr_t dest, unsigned char value, size_t sizeBytes, - hipStream_t stream __dparm(0)) { - return hipCUResultTohipError(cuMemsetD8Async(dest, value, sizeBytes, stream)); -} - -inline static hipError_t hipMemsetD16(hipDeviceptr_t dest, unsigned short value, size_t sizeBytes) { - return hipCUResultTohipError(cuMemsetD16(dest, value, sizeBytes)); -} - -inline static hipError_t hipMemsetD16Async(hipDeviceptr_t dest, unsigned short value, size_t sizeBytes, - hipStream_t stream __dparm(0)) { - return hipCUResultTohipError(cuMemsetD16Async(dest, value, sizeBytes, stream)); -} - -inline static hipError_t hipMemset2D(void* dst, size_t pitch, int value, size_t width, size_t height) { - return hipCUDAErrorTohipError(cudaMemset2D(dst, pitch, value, width, height)); -} - -inline static hipError_t hipMemset2DAsync(void* dst, size_t pitch, int value, size_t width, size_t height, hipStream_t stream __dparm(0)) { - return hipCUDAErrorTohipError(cudaMemset2DAsync(dst, pitch, value, width, height, stream)); -} - -inline static hipError_t hipMemset3D(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent ){ - return hipCUDAErrorTohipError(cudaMemset3D(pitchedDevPtr, value, extent)); -} - -inline static hipError_t hipMemset3DAsync(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent, hipStream_t stream __dparm(0) ){ - return hipCUDAErrorTohipError(cudaMemset3DAsync(pitchedDevPtr, value, extent, stream)); -} - -inline static hipError_t hipGetDeviceProperties(hipDeviceProp_t* p_prop, int device) { - - if (p_prop == NULL) { - return hipErrorInvalidValue; - } - - struct cudaDeviceProp cdprop; - cudaError_t cerror; - cerror = cudaGetDeviceProperties(&cdprop, device); - - strncpy(p_prop->name, cdprop.name, 256); - p_prop->totalGlobalMem = cdprop.totalGlobalMem; - p_prop->sharedMemPerBlock = cdprop.sharedMemPerBlock; - p_prop->regsPerBlock = cdprop.regsPerBlock; - p_prop->warpSize = cdprop.warpSize; - p_prop->maxThreadsPerBlock = cdprop.maxThreadsPerBlock; - for (int i = 0; i < 3; i++) { - p_prop->maxThreadsDim[i] = cdprop.maxThreadsDim[i]; - p_prop->maxGridSize[i] = cdprop.maxGridSize[i]; - } - p_prop->clockRate = cdprop.clockRate; - p_prop->memoryClockRate = cdprop.memoryClockRate; - p_prop->memoryBusWidth = cdprop.memoryBusWidth; - p_prop->totalConstMem = cdprop.totalConstMem; - p_prop->major = cdprop.major; - p_prop->minor = cdprop.minor; - p_prop->multiProcessorCount = cdprop.multiProcessorCount; - p_prop->l2CacheSize = cdprop.l2CacheSize; - p_prop->maxThreadsPerMultiProcessor = cdprop.maxThreadsPerMultiProcessor; - p_prop->computeMode = cdprop.computeMode; - p_prop->clockInstructionRate = cdprop.clockRate; // Same as clock-rate: - - int ccVers = p_prop->major * 100 + p_prop->minor * 10; - p_prop->arch.hasGlobalInt32Atomics = (ccVers >= 110); - p_prop->arch.hasGlobalFloatAtomicExch = (ccVers >= 110); - p_prop->arch.hasSharedInt32Atomics = (ccVers >= 120); - p_prop->arch.hasSharedFloatAtomicExch = (ccVers >= 120); - p_prop->arch.hasFloatAtomicAdd = (ccVers >= 200); - p_prop->arch.hasGlobalInt64Atomics = (ccVers >= 120); - p_prop->arch.hasSharedInt64Atomics = (ccVers >= 110); - p_prop->arch.hasDoubles = (ccVers >= 130); - p_prop->arch.hasWarpVote = (ccVers >= 120); - p_prop->arch.hasWarpBallot = (ccVers >= 200); - p_prop->arch.hasWarpShuffle = (ccVers >= 300); - p_prop->arch.hasFunnelShift = (ccVers >= 350); - p_prop->arch.hasThreadFenceSystem = (ccVers >= 200); - p_prop->arch.hasSyncThreadsExt = (ccVers >= 200); - p_prop->arch.hasSurfaceFuncs = (ccVers >= 200); - p_prop->arch.has3dGrid = (ccVers >= 200); - p_prop->arch.hasDynamicParallelism = (ccVers >= 350); - - p_prop->concurrentKernels = cdprop.concurrentKernels; - p_prop->pciDomainID = cdprop.pciDomainID; - p_prop->pciBusID = cdprop.pciBusID; - p_prop->pciDeviceID = cdprop.pciDeviceID; - p_prop->maxSharedMemoryPerMultiProcessor = cdprop.sharedMemPerMultiprocessor; - p_prop->isMultiGpuBoard = cdprop.isMultiGpuBoard; - p_prop->canMapHostMemory = cdprop.canMapHostMemory; - p_prop->gcnArch = 0; // Not a GCN arch - p_prop->integrated = cdprop.integrated; - p_prop->cooperativeLaunch = cdprop.cooperativeLaunch; - p_prop->cooperativeMultiDeviceLaunch = cdprop.cooperativeMultiDeviceLaunch; - p_prop->cooperativeMultiDeviceUnmatchedFunc = 0; - p_prop->cooperativeMultiDeviceUnmatchedGridDim = 0; - p_prop->cooperativeMultiDeviceUnmatchedBlockDim = 0; - p_prop->cooperativeMultiDeviceUnmatchedSharedMem = 0; - - p_prop->maxTexture1D = cdprop.maxTexture1D; - p_prop->maxTexture2D[0] = cdprop.maxTexture2D[0]; - p_prop->maxTexture2D[1] = cdprop.maxTexture2D[1]; - p_prop->maxTexture3D[0] = cdprop.maxTexture3D[0]; - p_prop->maxTexture3D[1] = cdprop.maxTexture3D[1]; - p_prop->maxTexture3D[2] = cdprop.maxTexture3D[2]; - - p_prop->memPitch = cdprop.memPitch; - p_prop->textureAlignment = cdprop.textureAlignment; - p_prop->texturePitchAlignment = cdprop.texturePitchAlignment; - p_prop->kernelExecTimeoutEnabled = cdprop.kernelExecTimeoutEnabled; - p_prop->ECCEnabled = cdprop.ECCEnabled; - p_prop->tccDriver = cdprop.tccDriver; - - return hipCUDAErrorTohipError(cerror); -} - -inline static hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device) { - enum cudaDeviceAttr cdattr; - cudaError_t cerror; - - switch (attr) { - case hipDeviceAttributeMaxThreadsPerBlock: - cdattr = cudaDevAttrMaxThreadsPerBlock; - break; - case hipDeviceAttributeMaxBlockDimX: - cdattr = cudaDevAttrMaxBlockDimX; - break; - case hipDeviceAttributeMaxBlockDimY: - cdattr = cudaDevAttrMaxBlockDimY; - break; - case hipDeviceAttributeMaxBlockDimZ: - cdattr = cudaDevAttrMaxBlockDimZ; - break; - case hipDeviceAttributeMaxGridDimX: - cdattr = cudaDevAttrMaxGridDimX; - break; - case hipDeviceAttributeMaxGridDimY: - cdattr = cudaDevAttrMaxGridDimY; - break; - case hipDeviceAttributeMaxGridDimZ: - cdattr = cudaDevAttrMaxGridDimZ; - break; - case hipDeviceAttributeMaxSharedMemoryPerBlock: - cdattr = cudaDevAttrMaxSharedMemoryPerBlock; - break; - case hipDeviceAttributeTotalConstantMemory: - cdattr = cudaDevAttrTotalConstantMemory; - break; - case hipDeviceAttributeWarpSize: - cdattr = cudaDevAttrWarpSize; - break; - case hipDeviceAttributeMaxRegistersPerBlock: - cdattr = cudaDevAttrMaxRegistersPerBlock; - break; - case hipDeviceAttributeClockRate: - cdattr = cudaDevAttrClockRate; - break; - case hipDeviceAttributeMemoryClockRate: - cdattr = cudaDevAttrMemoryClockRate; - break; - case hipDeviceAttributeMemoryBusWidth: - cdattr = cudaDevAttrGlobalMemoryBusWidth; - break; - case hipDeviceAttributeMultiprocessorCount: - cdattr = cudaDevAttrMultiProcessorCount; - break; - case hipDeviceAttributeComputeMode: - cdattr = cudaDevAttrComputeMode; - break; - case hipDeviceAttributeL2CacheSize: - cdattr = cudaDevAttrL2CacheSize; - break; - case hipDeviceAttributeMaxThreadsPerMultiProcessor: - cdattr = cudaDevAttrMaxThreadsPerMultiProcessor; - break; - case hipDeviceAttributeComputeCapabilityMajor: - cdattr = cudaDevAttrComputeCapabilityMajor; - break; - case hipDeviceAttributeComputeCapabilityMinor: - cdattr = cudaDevAttrComputeCapabilityMinor; - break; - case hipDeviceAttributeConcurrentKernels: - cdattr = cudaDevAttrConcurrentKernels; - break; - case hipDeviceAttributePciBusId: - cdattr = cudaDevAttrPciBusId; - break; - case hipDeviceAttributePciDeviceId: - cdattr = cudaDevAttrPciDeviceId; - break; - case hipDeviceAttributeMaxSharedMemoryPerMultiprocessor: - cdattr = cudaDevAttrMaxSharedMemoryPerMultiprocessor; - break; - case hipDeviceAttributeIsMultiGpuBoard: - cdattr = cudaDevAttrIsMultiGpuBoard; - break; - case hipDeviceAttributeIntegrated: - cdattr = cudaDevAttrIntegrated; - break; - case hipDeviceAttributeMaxTexture1DWidth: - cdattr = cudaDevAttrMaxTexture1DWidth; - break; - case hipDeviceAttributeMaxTexture2DWidth: - cdattr = cudaDevAttrMaxTexture2DWidth; - break; - case hipDeviceAttributeMaxTexture2DHeight: - cdattr = cudaDevAttrMaxTexture2DHeight; - break; - case hipDeviceAttributeMaxTexture3DWidth: - cdattr = cudaDevAttrMaxTexture3DWidth; - break; - case hipDeviceAttributeMaxTexture3DHeight: - cdattr = cudaDevAttrMaxTexture3DHeight; - break; - case hipDeviceAttributeMaxTexture3DDepth: - cdattr = cudaDevAttrMaxTexture3DDepth; - break; - case hipDeviceAttributeMaxPitch: - cdattr = cudaDevAttrMaxPitch; - break; - case hipDeviceAttributeTextureAlignment: - cdattr = cudaDevAttrTextureAlignment; - break; - case hipDeviceAttributeTexturePitchAlignment: - cdattr = cudaDevAttrTexturePitchAlignment; - break; - case hipDeviceAttributeKernelExecTimeout: - cdattr = cudaDevAttrKernelExecTimeout; - break; - case hipDeviceAttributeCanMapHostMemory: - cdattr = cudaDevAttrCanMapHostMemory; - break; - case hipDeviceAttributeEccEnabled: - cdattr = cudaDevAttrEccEnabled; - break; - case hipDeviceAttributeCooperativeLaunch: - cdattr = cudaDevAttrCooperativeLaunch; - break; - case hipDeviceAttributeCooperativeMultiDeviceLaunch: - cdattr = cudaDevAttrCooperativeMultiDeviceLaunch; - break; - case hipDeviceAttributeConcurrentManagedAccess: - cdattr = cudaDevAttrConcurrentManagedAccess; - break; - case hipDeviceAttributeManagedMemory: - cdattr = cudaDevAttrManagedMemory; - break; - case hipDeviceAttributePageableMemoryAccessUsesHostPageTables: - cdattr = cudaDevAttrPageableMemoryAccessUsesHostPageTables; - break; - case hipDeviceAttributePageableMemoryAccess: - cdattr = cudaDevAttrPageableMemoryAccess; - break; - case hipDeviceAttributeDirectManagedMemAccessFromHost: - cdattr = cudaDevAttrDirectManagedMemAccessFromHost; - break; - case hipDeviceAttributeGlobalL1CacheSupported: - cdattr = cudaDevAttrGlobalL1CacheSupported; - break; - case hipDeviceAttributeMaxBlocksPerMultiProcessor: - cdattr = cudaDevAttrMaxBlocksPerMultiprocessor; - break; - case hipDeviceAttributeMultiGpuBoardGroupID: - cdattr = cudaDevAttrMultiGpuBoardGroupID; - break; - case hipDeviceAttributeReservedSharedMemPerBlock: - cdattr = cudaDevAttrReservedSharedMemoryPerBlock; - break; - case hipDeviceAttributeSingleToDoublePrecisionPerfRatio: - cdattr = cudaDevAttrSingleToDoublePrecisionPerfRatio; - break; - case hipDeviceAttributeStreamPrioritiesSupported: - cdattr = cudaDevAttrStreamPrioritiesSupported; - break; - case hipDeviceAttributeSurfaceAlignment: - cdattr = cudaDevAttrSurfaceAlignment; - break; - case hipDeviceAttributeTccDriver: - cdattr = cudaDevAttrTccDriver; - break; - case hipDeviceAttributeUnifiedAddressing: - cdattr = cudaDevAttrUnifiedAddressing; - break; -#if CUDA_VERSION >= CUDA_11020 - case hipDeviceAttributeMemoryPoolsSupported: - cdattr = cudaDevAttrMemoryPoolsSupported; - break; -#endif // CUDA_VERSION >= CUDA_11020 - case hipDeviceAttributeVirtualMemoryManagementSupported: - return hipCUResultTohipError(cuDeviceGetAttribute(pi, - CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED, - device)); - case hipDeviceAttributeAccessPolicyMaxWindowSize: - cdattr = cudaDevAttrMaxAccessPolicyWindowSize; - break; - case hipDeviceAttributeAsyncEngineCount: - cdattr = cudaDevAttrAsyncEngineCount; - break; - case hipDeviceAttributeCanUseHostPointerForRegisteredMem: - cdattr = cudaDevAttrCanUseHostPointerForRegisteredMem; - break; - case hipDeviceAttributeComputePreemptionSupported: - cdattr = cudaDevAttrComputePreemptionSupported; - break; - case hipDeviceAttributeHostNativeAtomicSupported: - cdattr = cudaDevAttrHostNativeAtomicSupported; - break; - default: - return hipCUDAErrorTohipError(cudaErrorInvalidValue); - } - cerror = cudaDeviceGetAttribute(pi, cdattr, device); - return hipCUDAErrorTohipError(cerror); -} -#if CUDA_VERSION >= CUDA_10020 -inline static CUmemAllocationProp hipMemAllocationPropToCUmemAllocationProp(const hipMemAllocationProp* prop) { - CUmemAllocationProp cuProp; - cuProp.type = (CUmemAllocationType)prop->type; - cuProp.requestedHandleTypes = (CUmemAllocationHandleType)prop->requestedHandleTypes; - cuProp.location.type = (CUmemLocationType)prop->location.type; - cuProp.location.id = prop->location.id; - cuProp.win32HandleMetaData = prop->win32HandleMetaData; - cuProp.allocFlags.compressionType = prop->allocFlags.compressionType; - cuProp.allocFlags.gpuDirectRDMACapable = prop->allocFlags.gpuDirectRDMACapable; - cuProp.allocFlags.usage = prop->allocFlags.usage; - cuProp.allocFlags.reserved[0] = prop->allocFlags.reserved[0]; - cuProp.allocFlags.reserved[1] = prop->allocFlags.reserved[1]; - cuProp.allocFlags.reserved[2] = prop->allocFlags.reserved[2]; - cuProp.allocFlags.reserved[3] = prop->allocFlags.reserved[3]; - return cuProp; -} -inline static CUmemLocation hipMemLocationToCUmemLocation(const hipMemLocation* loc) { - CUmemLocation cuLoc; - cuLoc.id = loc->id; - cuLoc.type = (CUmemLocationType)loc->type; - return cuLoc; -} -inline static CUmemAccessDesc hipMemAccessDescToCUmemAccessDesc(const hipMemAccessDesc* desc) { - CUmemAccessDesc cuDesc; - cuDesc.flags = (CUmemAccess_flags)desc->flags; - cuDesc.location.id = (desc->location).id; - cuDesc.location.type = (CUmemLocationType)((desc->location).type); - return cuDesc; -} -inline static hipError_t hipMemGetAllocationGranularity(size_t* granularity, - const hipMemAllocationProp* prop, - hipMemAllocationGranularity_flags option) { - CUmemAllocationProp cuProp = hipMemAllocationPropToCUmemAllocationProp(prop); - return hipCUResultTohipError(cuMemGetAllocationGranularity(granularity, &cuProp, option)); -} -inline static hipError_t hipMemCreate(hipMemGenericAllocationHandle_t* handle, - size_t size, - const hipMemAllocationProp* prop, - unsigned long long flags) { - CUmemAllocationProp cuProp = hipMemAllocationPropToCUmemAllocationProp(prop); - return hipCUResultTohipError(cuMemCreate(handle, size, &cuProp, flags)); -} -inline static hipError_t hipMemRelease(hipMemGenericAllocationHandle_t handle) { - return hipCUResultTohipError(cuMemRelease(handle)); -} -inline static hipError_t hipMemAddressFree(hipDeviceptr_t ptr, size_t size) { - return hipCUResultTohipError(cuMemAddressFree(ptr, size)); -} -inline static hipError_t hipMemAddressReserve(hipDeviceptr_t* ptr, - size_t size, - size_t alignment, - hipDeviceptr_t addr, - unsigned long long flags) { - return hipCUResultTohipError(cuMemAddressReserve(ptr, size, alignment, addr, flags)); -} -inline static hipError_t hipMemExportToShareableHandle(void* shareableHandle, - hipMemGenericAllocationHandle_t handle, - hipMemAllocationHandleType handleType, - unsigned long long flags) { - return hipCUResultTohipError(cuMemExportToShareableHandle(shareableHandle, handle, (CUmemAllocationHandleType)handleType, flags)); -} -inline static hipError_t hipMemGetAccess(unsigned long long* flags, - const hipMemLocation* location, - hipDeviceptr_t ptr) { - CUmemLocation loc = hipMemLocationToCUmemLocation(location); - return hipCUResultTohipError(cuMemGetAccess(flags, &loc, ptr)); -} -inline static hipError_t hipMemGetAllocationPropertiesFromHandle(hipMemAllocationProp* prop, - hipMemGenericAllocationHandle_t handle) { - CUmemAllocationProp cuProp = hipMemAllocationPropToCUmemAllocationProp(prop); - return hipCUResultTohipError(cuMemGetAllocationPropertiesFromHandle(&cuProp, handle)); -} -inline static hipError_t hipMemImportFromShareableHandle(hipMemGenericAllocationHandle_t* handle, - void* osHandle, - hipMemAllocationHandleType shHandleType) { - return hipCUResultTohipError(cuMemImportFromShareableHandle(handle, osHandle, (CUmemAllocationHandleType)shHandleType)); -} -inline static hipError_t hipMemMap(hipDeviceptr_t ptr, size_t size, size_t offset, - hipMemGenericAllocationHandle_t handle, - unsigned long long flags) { - return hipCUResultTohipError(cuMemMap(ptr, size, offset, handle, flags)); -} -inline static hipError_t hipMemMapArrayAsync(hipArrayMapInfo* mapInfoList, - unsigned int count, - hipStream_t stream) { - return hipCUResultTohipError(cuMemMapArrayAsync(mapInfoList, count, stream)); -} -inline static hipError_t hipMemRetainAllocationHandle(hipMemGenericAllocationHandle_t* handle, - void* addr) { - return hipCUResultTohipError(cuMemRetainAllocationHandle(handle, addr)); -} -inline static hipError_t hipMemSetAccess(hipDeviceptr_t ptr, size_t size, - const hipMemAccessDesc* desc, - size_t count) { - CUmemAccessDesc cuDesc = hipMemAccessDescToCUmemAccessDesc(desc); - return hipCUResultTohipError(cuMemSetAccess(ptr, size, &cuDesc, count)); -} -inline static hipError_t hipMemUnmap(hipDeviceptr_t ptr, size_t size) { - return hipCUResultTohipError(cuMemUnmap(ptr, size)); -} -#endif // CUDA_VERSION >= CUDA_10020 - -inline static hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks, - const void* func, - int blockSize, - size_t dynamicSMemSize) { - return hipCUDAErrorTohipError(cudaOccupancyMaxActiveBlocksPerMultiprocessor(numBlocks, func, - blockSize, dynamicSMemSize)); -} - -inline static hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int* numBlocks, - const void* func, - int blockSize, - size_t dynamicSMemSize, - unsigned int flags) { - return hipCUDAErrorTohipError(cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(numBlocks, func, - blockSize, dynamicSMemSize, flags)); -} - -inline static hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks, - hipFunction_t f, - int blockSize, - size_t dynamicSMemSize ){ - return hipCUResultTohipError(cuOccupancyMaxActiveBlocksPerMultiprocessor(numBlocks, f, - blockSize, dynamicSMemSize)); -} - -inline static hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int* numBlocks, - hipFunction_t f, - int blockSize, - size_t dynamicSMemSize, - unsigned int flags ) { - return hipCUResultTohipError(cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(numBlocks,f, - blockSize, dynamicSMemSize, flags)); -} - -//TODO - Match CUoccupancyB2DSize -inline static hipError_t hipModuleOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize, - hipFunction_t f, size_t dynSharedMemPerBlk, - int blockSizeLimit){ - return hipCUResultTohipError(cuOccupancyMaxPotentialBlockSize(gridSize, blockSize, f, NULL, - dynSharedMemPerBlk, blockSizeLimit)); -} - -//TODO - Match CUoccupancyB2DSize -inline static hipError_t hipModuleOccupancyMaxPotentialBlockSizeWithFlags(int* gridSize, int* blockSize, - hipFunction_t f, size_t dynSharedMemPerBlk, - int blockSizeLimit, unsigned int flags){ - return hipCUResultTohipError(cuOccupancyMaxPotentialBlockSizeWithFlags(gridSize, blockSize, f, NULL, - dynSharedMemPerBlk, blockSizeLimit, flags)); -} - -inline static hipError_t hipPointerGetAttributes(hipPointerAttribute_t* attributes, const void* ptr) { - struct cudaPointerAttributes cPA; - hipError_t err = hipCUDAErrorTohipError(cudaPointerGetAttributes(&cPA, ptr)); - if (err == hipSuccess) { -#if (CUDART_VERSION >= 11000) - auto memType = cPA.type; -#else - unsigned memType = cPA.memoryType; // No auto because cuda 10.2 doesnt force c++11 -#endif - switch (memType) { - case cudaMemoryTypeDevice: - attributes->type = hipMemoryTypeDevice; - break; - case cudaMemoryTypeHost: - attributes->type = hipMemoryTypeHost; - break; - case cudaMemoryTypeManaged: - attributes->type = hipMemoryTypeManaged; - break; - default: - return hipErrorInvalidValue; - } - attributes->device = cPA.device; - attributes->devicePointer = cPA.devicePointer; - attributes->hostPointer = cPA.hostPointer; - attributes->isManaged = 0; - attributes->allocationFlags = 0; - } - return err; -} - -inline static hipError_t hipPointerGetAttribute(void* data, hipPointer_attribute attribute, - hipDeviceptr_t ptr) { - return hipCUResultTohipError(cuPointerGetAttribute(data, attribute, ptr)); -} - -inline static hipError_t hipDrvPointerGetAttributes(unsigned int numAttributes, - hipPointer_attribute* attributes, - void** data, hipDeviceptr_t ptr) { - return hipCUResultTohipError(cuPointerGetAttributes(numAttributes, attributes, data, ptr)); -} - -inline static hipError_t hipMemGetInfo(size_t* free, size_t* total) { - return hipCUDAErrorTohipError(cudaMemGetInfo(free, total)); -} - -inline static hipError_t hipEventCreate(hipEvent_t* event) { - return hipCUDAErrorTohipError(cudaEventCreate(event)); -} - -inline static hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream __dparm(NULL)) { - return hipCUDAErrorTohipError(cudaEventRecord(event, stream)); -} - -inline static hipError_t hipEventSynchronize(hipEvent_t event) { - return hipCUDAErrorTohipError(cudaEventSynchronize(event)); -} - -inline static hipError_t hipEventElapsedTime(float* ms, hipEvent_t start, hipEvent_t stop) { - return hipCUDAErrorTohipError(cudaEventElapsedTime(ms, start, stop)); -} - -inline static hipError_t hipEventDestroy(hipEvent_t event) { - return hipCUDAErrorTohipError(cudaEventDestroy(event)); -} - -inline static hipError_t hipStreamCreateWithFlags(hipStream_t* stream, unsigned int flags) { - return hipCUDAErrorTohipError(cudaStreamCreateWithFlags(stream, flags)); -} - -inline static hipError_t hipStreamCreateWithPriority(hipStream_t* stream, unsigned int flags, int priority) { - return hipCUDAErrorTohipError(cudaStreamCreateWithPriority(stream, flags, priority)); -} - -inline static hipError_t hipDeviceGetStreamPriorityRange(int* leastPriority, int* greatestPriority) { - return hipCUDAErrorTohipError(cudaDeviceGetStreamPriorityRange(leastPriority, greatestPriority)); -} - -inline static hipError_t hipStreamCreate(hipStream_t* stream) { - return hipCUDAErrorTohipError(cudaStreamCreate(stream)); -} - -inline static hipError_t hipStreamSynchronize(hipStream_t stream) { - return hipCUDAErrorTohipError(cudaStreamSynchronize(stream)); -} - -inline static hipError_t hipStreamDestroy(hipStream_t stream) { - return hipCUDAErrorTohipError(cudaStreamDestroy(stream)); -} - -inline static hipError_t hipStreamGetFlags(hipStream_t stream, unsigned int *flags) { - return hipCUDAErrorTohipError(cudaStreamGetFlags(stream, flags)); -} - -inline static hipError_t hipStreamGetPriority(hipStream_t stream, int *priority) { - return hipCUDAErrorTohipError(cudaStreamGetPriority(stream, priority)); -} - -inline static hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, - unsigned int flags) { - return hipCUDAErrorTohipError(cudaStreamWaitEvent(stream, event, flags)); -} - -inline static hipError_t hipStreamQuery(hipStream_t stream) { - return hipCUDAErrorTohipError(cudaStreamQuery(stream)); -} - -inline static hipError_t hipStreamAddCallback(hipStream_t stream, hipStreamCallback_t callback, - void* userData, unsigned int flags) { - return hipCUDAErrorTohipError( - cudaStreamAddCallback(stream, (cudaStreamCallback_t)callback, userData, flags)); -} - -inline static hipError_t hipStreamGetDevice(hipStream_t stream, hipDevice_t* device) { - hipCtx_t context; - auto err = hipCUResultTohipError(cuStreamGetCtx(stream, &context)); - if (err != hipSuccess) return err; - - err = hipCUResultTohipError(cuCtxPushCurrent(context)); - if (err != hipSuccess) return err; - - err = hipCUResultTohipError(cuCtxGetDevice(device)); - if (err != hipSuccess) return err; - - return hipCUResultTohipError(cuCtxPopCurrent(&context)); -} - -inline static hipError_t hipDriverGetVersion(int* driverVersion) { - return hipCUDAErrorTohipError(cudaDriverGetVersion(driverVersion)); -} - -inline static hipError_t hipRuntimeGetVersion(int* runtimeVersion) { - return hipCUDAErrorTohipError(cudaRuntimeGetVersion(runtimeVersion)); -} - -inline static hipError_t hipDeviceCanAccessPeer(int* canAccessPeer, int device, int peerDevice) { - return hipCUDAErrorTohipError(cudaDeviceCanAccessPeer(canAccessPeer, device, peerDevice)); -} - -inline static hipError_t hipDeviceDisablePeerAccess(int peerDevice) { - return hipCUDAErrorTohipError(cudaDeviceDisablePeerAccess(peerDevice)); -} - -inline static hipError_t hipDeviceEnablePeerAccess(int peerDevice, unsigned int flags) { - return hipCUDAErrorTohipError(cudaDeviceEnablePeerAccess(peerDevice, flags)); -} - -inline static hipError_t hipCtxDisablePeerAccess(hipCtx_t peerCtx) { - return hipCUResultTohipError(cuCtxDisablePeerAccess(peerCtx)); -} - -inline static hipError_t hipCtxEnablePeerAccess(hipCtx_t peerCtx, unsigned int flags) { - return hipCUResultTohipError(cuCtxEnablePeerAccess(peerCtx, flags)); -} - -inline static hipError_t hipDevicePrimaryCtxGetState(hipDevice_t dev, unsigned int* flags, - int* active) { - return hipCUResultTohipError(cuDevicePrimaryCtxGetState(dev, flags, active)); -} - -inline static hipError_t hipDevicePrimaryCtxRelease(hipDevice_t dev) { - return hipCUResultTohipError(cuDevicePrimaryCtxRelease(dev)); -} - -inline static hipError_t hipDevicePrimaryCtxRetain(hipCtx_t* pctx, hipDevice_t dev) { - return hipCUResultTohipError(cuDevicePrimaryCtxRetain(pctx, dev)); -} - -inline static hipError_t hipDevicePrimaryCtxReset(hipDevice_t dev) { - return hipCUResultTohipError(cuDevicePrimaryCtxReset(dev)); -} - -inline static hipError_t hipDevicePrimaryCtxSetFlags(hipDevice_t dev, unsigned int flags) { - return hipCUResultTohipError(cuDevicePrimaryCtxSetFlags(dev, flags)); -} - -inline static hipError_t hipMemGetAddressRange(hipDeviceptr_t* pbase, size_t* psize, - hipDeviceptr_t dptr) { - return hipCUResultTohipError(cuMemGetAddressRange(pbase, psize, dptr)); -} - -inline static hipError_t hipMemcpyPeer(void* dst, int dstDevice, const void* src, int srcDevice, - size_t count) { - return hipCUDAErrorTohipError(cudaMemcpyPeer(dst, dstDevice, src, srcDevice, count)); -} - -inline static hipError_t hipMemcpyPeerAsync(void* dst, int dstDevice, const void* src, - int srcDevice, size_t count, - hipStream_t stream __dparm(0)) { - return hipCUDAErrorTohipError( - cudaMemcpyPeerAsync(dst, dstDevice, src, srcDevice, count, stream)); -} - -// Profile APIs: -inline static hipError_t hipProfilerStart() { return hipCUDAErrorTohipError(cudaProfilerStart()); } - -inline static hipError_t hipProfilerStop() { return hipCUDAErrorTohipError(cudaProfilerStop()); } - -inline static hipError_t hipGetDeviceFlags(unsigned int* flags) { - return hipCUDAErrorTohipError(cudaGetDeviceFlags(flags)); -} - -inline static hipError_t hipSetDeviceFlags(unsigned int flags) { - return hipCUDAErrorTohipError(cudaSetDeviceFlags(flags)); -} - -inline static hipError_t hipEventCreateWithFlags(hipEvent_t* event, unsigned int flags) { - return hipCUDAErrorTohipError(cudaEventCreateWithFlags(event, flags)); -} - -inline static hipError_t hipEventQuery(hipEvent_t event) { - return hipCUDAErrorTohipError(cudaEventQuery(event)); -} - -inline static hipError_t hipCtxCreate(hipCtx_t* ctx, unsigned int flags, hipDevice_t device) { - return hipCUResultTohipError(cuCtxCreate(ctx, flags, device)); -} - -inline static hipError_t hipCtxDestroy(hipCtx_t ctx) { - return hipCUResultTohipError(cuCtxDestroy(ctx)); -} - -inline static hipError_t hipCtxPopCurrent(hipCtx_t* ctx) { - return hipCUResultTohipError(cuCtxPopCurrent(ctx)); -} - -inline static hipError_t hipCtxPushCurrent(hipCtx_t ctx) { - return hipCUResultTohipError(cuCtxPushCurrent(ctx)); -} - -inline static hipError_t hipCtxSetCurrent(hipCtx_t ctx) { - return hipCUResultTohipError(cuCtxSetCurrent(ctx)); -} - -inline static hipError_t hipCtxGetCurrent(hipCtx_t* ctx) { - return hipCUResultTohipError(cuCtxGetCurrent(ctx)); -} - -inline static hipError_t hipCtxGetDevice(hipDevice_t* device) { - return hipCUResultTohipError(cuCtxGetDevice(device)); -} - -inline static hipError_t hipCtxGetApiVersion(hipCtx_t ctx, int* apiVersion) { - return hipCUResultTohipError(cuCtxGetApiVersion(ctx, (unsigned int*)apiVersion)); -} - -inline static hipError_t hipCtxGetCacheConfig(hipFuncCache* cacheConfig) { - return hipCUResultTohipError(cuCtxGetCacheConfig(cacheConfig)); -} - -inline static hipError_t hipCtxSetCacheConfig(hipFuncCache cacheConfig) { - return hipCUResultTohipError(cuCtxSetCacheConfig(cacheConfig)); -} - -inline static hipError_t hipCtxSetSharedMemConfig(hipSharedMemConfig config) { - return hipCUResultTohipError(cuCtxSetSharedMemConfig((CUsharedconfig)config)); -} - -inline static hipError_t hipCtxGetSharedMemConfig(hipSharedMemConfig* pConfig) { - return hipCUResultTohipError(cuCtxGetSharedMemConfig((CUsharedconfig*)pConfig)); -} - -inline static hipError_t hipCtxSynchronize(void) { - return hipCUResultTohipError(cuCtxSynchronize()); -} - -inline static hipError_t hipCtxGetFlags(unsigned int* flags) { - return hipCUResultTohipError(cuCtxGetFlags(flags)); -} - -inline static hipError_t hipCtxDetach(hipCtx_t ctx) { - return hipCUResultTohipError(cuCtxDetach(ctx)); -} - -inline static hipError_t hipDeviceGet(hipDevice_t* device, int ordinal) { - return hipCUResultTohipError(cuDeviceGet(device, ordinal)); -} - -inline static hipError_t hipDeviceComputeCapability(int* major, int* minor, hipDevice_t device) { - return hipCUResultTohipError(cuDeviceComputeCapability(major, minor, device)); -} - -inline static hipError_t hipDeviceGetName(char* name, int len, hipDevice_t device) { - return hipCUResultTohipError(cuDeviceGetName(name, len, device)); -} - -inline static hipError_t hipDeviceGetUuid(hipUUID* uuid, hipDevice_t device) { - if (uuid == NULL) { - return hipErrorInvalidValue; - } - struct CUuuid_st CUuid; - hipError_t err = hipCUResultTohipError(cuDeviceGetUuid(&CUuid, device)); - if (err == hipSuccess) { - strncpy(uuid->bytes, CUuid.bytes, 16); - } - return err; -} - -inline static hipError_t hipDeviceGetP2PAttribute(int* value, hipDeviceP2PAttr attr, - int srcDevice, int dstDevice) { - return hipCUDAErrorTohipError(cudaDeviceGetP2PAttribute(value, attr, srcDevice, dstDevice)); -} - -inline static hipError_t hipDeviceGetPCIBusId(char* pciBusId, int len, hipDevice_t device) { - return hipCUDAErrorTohipError(cudaDeviceGetPCIBusId(pciBusId, len, device)); -} - -inline static hipError_t hipDeviceGetByPCIBusId(int* device, const char* pciBusId) { - return hipCUDAErrorTohipError(cudaDeviceGetByPCIBusId(device, pciBusId)); -} - -inline static hipError_t hipDeviceGetSharedMemConfig(hipSharedMemConfig* config) { - return hipCUDAErrorTohipError(cudaDeviceGetSharedMemConfig(config)); -} - -inline static hipError_t hipDeviceSetSharedMemConfig(hipSharedMemConfig config) { - return hipCUDAErrorTohipError(cudaDeviceSetSharedMemConfig(config)); -} - -inline static hipError_t hipDeviceGetLimit(size_t* pValue, hipLimit_t limit) { - return hipCUDAErrorTohipError(cudaDeviceGetLimit(pValue, limit)); -} - -inline static hipError_t hipDeviceSetLimit(hipLimit_t limit, size_t value) { - return hipCUDAErrorTohipError(cudaDeviceSetLimit(limit, value)); -} - -inline static hipError_t hipDeviceTotalMem(size_t* bytes, hipDevice_t device) { - return hipCUResultTohipError(cuDeviceTotalMem(bytes, device)); -} - -inline static hipError_t hipModuleLoad(hipModule_t* module, const char* fname) { - return hipCUResultTohipError(cuModuleLoad(module, fname)); -} - -inline static hipError_t hipModuleUnload(hipModule_t hmod) { - return hipCUResultTohipError(cuModuleUnload(hmod)); -} - -inline static hipError_t hipModuleGetFunction(hipFunction_t* function, hipModule_t module, - const char* kname) { - return hipCUResultTohipError(cuModuleGetFunction(function, module, kname)); -} - -inline static hipError_t hipModuleGetTexRef(hipTexRef* pTexRef, hipModule_t hmod, const char* name){ - return hipCUResultTohipError(cuModuleGetTexRef(pTexRef, hmod, name)); -} - -inline static hipError_t hipFuncGetAttributes(hipFuncAttributes* attr, const void* func) { - return hipCUDAErrorTohipError(cudaFuncGetAttributes(attr, func)); -} - -inline static hipError_t hipFuncGetAttribute (int* value, hipFunction_attribute attrib, hipFunction_t hfunc) { - return hipCUResultTohipError(cuFuncGetAttribute(value, attrib, hfunc)); -} - -inline static hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, hipModule_t hmod, - const char* name) { - return hipCUResultTohipError(cuModuleGetGlobal(dptr, bytes, hmod, name)); -} - -inline static hipError_t hipModuleLoadData(hipModule_t* module, const void* image) { - return hipCUResultTohipError(cuModuleLoadData(module, image)); -} - -inline static hipError_t hipModuleLoadDataEx(hipModule_t* module, const void* image, - unsigned int numOptions, hipJitOption* options, - void** optionValues) { - return hipCUResultTohipError( - cuModuleLoadDataEx(module, image, numOptions, options, optionValues)); -} - -inline static hipError_t hipLaunchKernel(const void* function_address, dim3 numBlocks, - dim3 dimBlocks, void** args, size_t sharedMemBytes, - hipStream_t stream) { - return hipCUDAErrorTohipError( - cudaLaunchKernel(function_address, numBlocks, dimBlocks, args, sharedMemBytes, stream)); -} - -inline static hipError_t hipModuleLaunchKernel(hipFunction_t f, unsigned int gridDimX, - unsigned int gridDimY, unsigned int gridDimZ, - unsigned int blockDimX, unsigned int blockDimY, - unsigned int blockDimZ, unsigned int sharedMemBytes, - hipStream_t stream, void** kernelParams, - void** extra) { - return hipCUResultTohipError(cuLaunchKernel(f, gridDimX, gridDimY, gridDimZ, blockDimX, - blockDimY, blockDimZ, sharedMemBytes, stream, - kernelParams, extra)); -} - -inline static hipError_t hipFuncSetCacheConfig(const void* func, hipFuncCache_t cacheConfig) { - return hipCUDAErrorTohipError(cudaFuncSetCacheConfig(func, cacheConfig)); -} - -#if CUDA_VERSION < CUDA_12000 -__HIP_DEPRECATED inline static hipError_t hipBindTexture(size_t* offset, - struct textureReference* tex, - const void* devPtr, - const hipChannelFormatDesc* desc, - size_t size __dparm(UINT_MAX)) { - return hipCUDAErrorTohipError(cudaBindTexture(offset, tex, devPtr, desc, size)); -} - -__HIP_DEPRECATED inline static hipError_t hipBindTexture2D( - size_t* offset, struct textureReference* tex, const void* devPtr, - const hipChannelFormatDesc* desc, size_t width, size_t height, size_t pitch) { - return hipCUDAErrorTohipError(cudaBindTexture2D(offset, tex, devPtr, desc, width, height, pitch)); -} -#endif // CUDA_VERSION < CUDA_12000 - - -inline static hipChannelFormatDesc hipCreateChannelDesc(int x, int y, int z, int w, - hipChannelFormatKind f) { - return cudaCreateChannelDesc(x, y, z, w, hipChannelFormatKindToCudaChannelFormatKind(f)); -} - -inline static hipChannelFormatDesc hipCreateChannelDescHalf() { - int e = (int)sizeof(unsigned short) * 8; - return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindFloat); -} - -inline static hipChannelFormatDesc hipCreateChannelDescHalf1() { - int e = (int)sizeof(unsigned short) * 8; - return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindFloat); -} - -inline static hipChannelFormatDesc hipCreateChannelDescHalf2() { - int e = (int)sizeof(unsigned short) * 8; - return cudaCreateChannelDesc(e, e, 0, 0, cudaChannelFormatKindFloat); -} - -inline static hipChannelFormatDesc hipCreateChannelDescHalf4() { - int e = (int)sizeof(unsigned short) * 8; - return cudaCreateChannelDesc(e, e, e, e, cudaChannelFormatKindFloat); -} - -inline static hipError_t hipCreateTextureObject(hipTextureObject_t* pTexObject, - const hipResourceDesc* pResDesc, - const hipTextureDesc* pTexDesc, - const hipResourceViewDesc* pResViewDesc) { - return hipCUDAErrorTohipError( - cudaCreateTextureObject(pTexObject, pResDesc, pTexDesc, pResViewDesc)); -} - -inline static hipError_t hipDestroyTextureObject(hipTextureObject_t textureObject) { - return hipCUDAErrorTohipError(cudaDestroyTextureObject(textureObject)); -} - -inline static hipError_t hipCreateSurfaceObject(hipSurfaceObject_t* pSurfObject, - const hipResourceDesc* pResDesc) { - return hipCUDAErrorTohipError(cudaCreateSurfaceObject(pSurfObject, pResDesc)); -} - -inline static hipError_t hipDestroySurfaceObject(hipSurfaceObject_t surfaceObject) { - return hipCUDAErrorTohipError(cudaDestroySurfaceObject(surfaceObject)); -} - -inline static hipError_t hipGetTextureObjectResourceDesc(hipResourceDesc* pResDesc, - hipTextureObject_t textureObject) { - return hipCUDAErrorTohipError(cudaGetTextureObjectResourceDesc( pResDesc, textureObject)); -} - -#if CUDA_VERSION < CUDA_12000 -__HIP_DEPRECATED inline static hipError_t hipGetTextureAlignmentOffset( - size_t* offset, const struct textureReference* texref) { - return hipCUDAErrorTohipError(cudaGetTextureAlignmentOffset(offset,texref)); -} -#endif - -inline static hipError_t hipGetChannelDesc(hipChannelFormatDesc* desc, hipArray_const_t array) -{ - return hipCUDAErrorTohipError(cudaGetChannelDesc(desc,array)); -} - -inline static hipError_t hipLaunchCooperativeKernel(const void* f, dim3 gridDim, dim3 blockDim, - void** kernelParams, unsigned int sharedMemBytes, - hipStream_t stream) { - return hipCUDAErrorTohipError( - cudaLaunchCooperativeKernel(f, gridDim, blockDim, kernelParams, sharedMemBytes, stream)); -} - -inline static hipError_t hipModuleLaunchCooperativeKernel(hipFunction_t f, unsigned int gridDimX, - unsigned int gridDimY, unsigned int gridDimZ, - unsigned int blockDimX, unsigned int blockDimY, - unsigned int blockDimZ, unsigned int sharedMemBytes, - hipStream_t stream, void** kernelParams) { - return hipCUResultTohipError(cuLaunchCooperativeKernel(f, gridDimX, gridDimY, gridDimZ, - blockDimX, blockDimY, blockDimZ, - sharedMemBytes, stream,kernelParams)); -} - -inline static hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList, - int numDevices, unsigned int flags) { - return hipCUDAErrorTohipError(cudaLaunchCooperativeKernelMultiDevice(launchParamsList, numDevices, flags)); -} - -inline static hipError_t hipModuleLaunchCooperativeKernelMultiDevice( - hipFunctionLaunchParams* launchParamsList, - unsigned int numDevices, - unsigned int flags) { - return hipCUResultTohipError(cuLaunchCooperativeKernelMultiDevice(launchParamsList, - numDevices, flags)); -} - -inline static hipError_t hipImportExternalSemaphore(hipExternalSemaphore_t* extSem_out, - const hipExternalSemaphoreHandleDesc* semHandleDesc) { - return hipCUDAErrorTohipError(cudaImportExternalSemaphore(extSem_out,(const struct cudaExternalSemaphoreHandleDesc*)semHandleDesc)); -} - -inline static hipError_t hipSignalExternalSemaphoresAsync(const hipExternalSemaphore_t* extSemArray, - const hipExternalSemaphoreSignalParams* paramsArray, - unsigned int numExtSems, hipStream_t stream) { - return hipCUDAErrorTohipError(cudaSignalExternalSemaphoresAsync(extSemArray, (const struct cudaExternalSemaphoreSignalParams*)paramsArray, numExtSems, stream)); -} -inline static hipError_t hipWaitExternalSemaphoresAsync(const hipExternalSemaphore_t* extSemArray, - const hipExternalSemaphoreWaitParams* paramsArray, - unsigned int numExtSems, hipStream_t stream) { - return hipCUDAErrorTohipError(cudaWaitExternalSemaphoresAsync(extSemArray, (const struct cudaExternalSemaphoreWaitParams*)paramsArray, numExtSems, stream)); -} - -inline static hipError_t hipDestroyExternalSemaphore(hipExternalSemaphore_t extSem) { - return hipCUDAErrorTohipError(cudaDestroyExternalSemaphore(extSem)); -} - -inline static hipError_t hipImportExternalMemory(hipExternalMemory_t* extMem_out, const hipExternalMemoryHandleDesc* memHandleDesc) { - return hipCUDAErrorTohipError(cudaImportExternalMemory(extMem_out, (const struct cudaExternalMemoryHandleDesc*)memHandleDesc)); -} - -inline static hipError_t hipExternalMemoryGetMappedBuffer(void **devPtr, hipExternalMemory_t extMem, const hipExternalMemoryBufferDesc *bufferDesc) { - return hipCUDAErrorTohipError(cudaExternalMemoryGetMappedBuffer(devPtr, extMem, (const struct cudaExternalMemoryBufferDesc*)bufferDesc)); -} - -inline static hipError_t hipExternalMemoryGetMappedMipmappedArray( - hipMipmappedArray_t* mipmap, hipExternalMemory_t extMem, - const hipExternalMemoryMipmappedArrayDesc* mipmapDesc) { - return hipCUDAErrorTohipError(cudaExternalMemoryGetMappedMipmappedArray( - (cudaMipmappedArray_t*)mipmap, (cudaExternalMemory_t)extMem, - (const struct cudaExternalMemoryMipmappedArrayDesc*)mipmapDesc)); -} - -inline static hipError_t hipDestroyExternalMemory(hipExternalMemory_t extMem) { - return hipCUDAErrorTohipError(cudaDestroyExternalMemory(extMem)); -} - -inline static hipError_t hipGraphicsMapResources(int count, hipGraphicsResource_t* resources, hipStream_t stream __dparm(0)) { - return hipCUDAErrorTohipError(cudaGraphicsMapResources(count, resources, stream)); -} - -inline static hipError_t hipGraphicsSubResourceGetMappedArray(hipArray_t* array, hipGraphicsResource_t resource, unsigned int arrayIndex, - unsigned int mipLevel) { - return hipCUDAErrorTohipError(cudaGraphicsSubResourceGetMappedArray(array, resource, arrayIndex, mipLevel)); -} - -inline static hipError_t hipGraphicsResourceGetMappedPointer(void** devPtr, size_t* size, hipGraphicsResource_t resource) { - return hipCUDAErrorTohipError(cudaGraphicsResourceGetMappedPointer(devPtr, size, resource)); -} - -inline static hipError_t hipGraphicsUnmapResources(int count, hipGraphicsResource_t* resources, hipStream_t stream __dparm(0)) { - return hipCUDAErrorTohipError(cudaGraphicsUnmapResources(count, resources, stream)); -} - -inline static hipError_t hipGraphicsUnregisterResource(hipGraphicsResource_t resource) { - return hipCUDAErrorTohipError(cudaGraphicsUnregisterResource(resource)); -} - -#if CUDA_VERSION >= CUDA_11020 -// ========================== HIP Stream Ordered Memory Allocator ================================= -inline static hipError_t hipDeviceGetDefaultMemPool(hipMemPool_t* mem_pool, int device) { - return hipCUDAErrorTohipError(cudaDeviceGetDefaultMemPool(mem_pool, device)); -} - -inline static hipError_t hipDeviceSetMemPool(int device, hipMemPool_t mem_pool) { - return hipCUDAErrorTohipError(cudaDeviceSetMemPool(device, mem_pool)); -} - -inline static hipError_t hipDeviceGetMemPool(hipMemPool_t* mem_pool, int device) { - return hipCUDAErrorTohipError(cudaDeviceGetMemPool(mem_pool, device)); -} - -inline static hipError_t hipMallocAsync(void** dev_ptr, size_t size, hipStream_t stream) { - return hipCUDAErrorTohipError(cudaMallocAsync(dev_ptr, size, stream)); -} - -inline static hipError_t hipFreeAsync(void* dev_ptr, hipStream_t stream) { - return hipCUDAErrorTohipError(cudaFreeAsync(dev_ptr, stream)); -} - -inline static hipError_t hipMemPoolTrimTo(hipMemPool_t mem_pool, size_t min_bytes_to_hold) { - return hipCUDAErrorTohipError(cudaMemPoolTrimTo(mem_pool, min_bytes_to_hold)); -} - -inline static hipError_t hipMemPoolSetAttribute(hipMemPool_t mem_pool, hipMemPoolAttr attr, void* value) { - return hipCUDAErrorTohipError(cudaMemPoolSetAttribute(mem_pool, attr, value)); -} - -inline static hipError_t hipMemPoolGetAttribute(hipMemPool_t mem_pool, hipMemPoolAttr attr, void* value) { - return hipCUDAErrorTohipError(cudaMemPoolGetAttribute(mem_pool, attr, value)); -} - -inline static hipError_t hipMemPoolSetAccess( - hipMemPool_t mem_pool, - const hipMemAccessDesc* desc_list, - size_t count) { - return hipCUDAErrorTohipError(cudaMemPoolSetAccess(mem_pool, desc_list, count)); -} - -inline static hipError_t hipMemPoolGetAccess( - hipMemAccessFlags* flags, - hipMemPool_t mem_pool, - hipMemLocation* location) { - return hipCUDAErrorTohipError(cudaMemPoolGetAccess(flags, mem_pool, location)); -} - -inline static hipError_t hipMemPoolCreate(hipMemPool_t* mem_pool, const hipMemPoolProps* pool_props) { - return hipCUDAErrorTohipError(cudaMemPoolCreate(mem_pool, pool_props)); -} - -inline static hipError_t hipMemPoolDestroy(hipMemPool_t mem_pool) { - return hipCUDAErrorTohipError(cudaMemPoolDestroy(mem_pool)); -} - -inline static hipError_t hipMallocFromPoolAsync( - void** dev_ptr, - size_t size, - hipMemPool_t mem_pool, - hipStream_t stream) { - return hipCUDAErrorTohipError(cudaMallocFromPoolAsync(dev_ptr, size, mem_pool, stream)); -} - -inline static hipError_t hipMemPoolExportToShareableHandle( - void* shared_handle, - hipMemPool_t mem_pool, - hipMemAllocationHandleType handle_type, - unsigned int flags) { - return hipCUDAErrorTohipError(cudaMemPoolExportToShareableHandle( - shared_handle, mem_pool, handle_type, flags)); -} - -inline static hipError_t hipMemPoolImportFromShareableHandle( - hipMemPool_t* mem_pool, - void* shared_handle, - hipMemAllocationHandleType handle_type, - unsigned int flags) { - return hipCUDAErrorTohipError(cudaMemPoolImportFromShareableHandle( - mem_pool, shared_handle, handle_type, flags)); -} - -inline static hipError_t hipMemPoolExportPointer(hipMemPoolPtrExportData* export_data, void* ptr) { - return hipCUDAErrorTohipError(cudaMemPoolExportPointer(export_data, ptr)); -} - -inline static hipError_t hipMemPoolImportPointer( - void** ptr, - hipMemPool_t mem_pool, - hipMemPoolPtrExportData* export_data) { - return hipCUDAErrorTohipError(cudaMemPoolImportPointer(ptr, mem_pool, export_data)); -} -#endif // CUDA_VERSION >= CUDA_11020 - -#ifdef __cplusplus -} -#endif - -#ifdef __CUDACC__ - -template -inline static hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks, - T func, - int blockSize, - size_t dynamicSMemSize) { - return hipCUDAErrorTohipError(cudaOccupancyMaxActiveBlocksPerMultiprocessor(numBlocks, func, - blockSize, dynamicSMemSize)); -} - -template -inline static hipError_t hipOccupancyMaxPotentialBlockSize(int* minGridSize, int* blockSize, T func, - size_t dynamicSMemSize = 0, - int blockSizeLimit = 0) { - return hipCUDAErrorTohipError(cudaOccupancyMaxPotentialBlockSize(minGridSize, blockSize, func, - dynamicSMemSize, blockSizeLimit)); -} - -template -inline static hipError_t hipOccupancyMaxPotentialBlockSizeVariableSMemWithFlags(int* min_grid_size, - int* block_size, - T func, - UnaryFunction block_size_to_dynamic_smem_size, - int block_size_limit = 0, - unsigned int flags = 0) { - return hipCUDAErrorTohipError(cudaOccupancyMaxPotentialBlockSizeVariableSMemWithFlags(min_grid_size, block_size, func, - block_size_to_dynamic_smem_size, block_size_limit,flags)); -} - -template -inline static hipError_t hipOccupancyMaxPotentialBlockSizeWithFlags(int* minGridSize, int* blockSize, T func, - size_t dynamicSMemSize = 0, - int blockSizeLimit = 0, unsigned int flags = 0) { - return hipCUDAErrorTohipError(cudaOccupancyMaxPotentialBlockSize(minGridSize, blockSize, func, - dynamicSMemSize, blockSizeLimit, flags)); -} - -template -inline static hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( int* numBlocks, T func, - int blockSize, size_t dynamicSMemSize,unsigned int flags) { - return hipCUDAErrorTohipError(cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(numBlocks, func, - blockSize, dynamicSMemSize, flags)); -} - -#if CUDA_VERSION < CUDA_12000 -template -inline static hipError_t hipBindTexture(size_t* offset, const struct texture& tex, - const void* devPtr, size_t size = UINT_MAX) { - return hipCUDAErrorTohipError(cudaBindTexture(offset, tex, devPtr, size)); -} - -template -inline static hipError_t hipBindTexture(size_t* offset, struct texture& tex, - const void* devPtr, const hipChannelFormatDesc& desc, - size_t size = UINT_MAX) { - return hipCUDAErrorTohipError(cudaBindTexture(offset, tex, devPtr, desc, size)); -} - -template -__HIP_DEPRECATED inline static hipError_t hipUnbindTexture(struct texture* tex) { - return hipCUDAErrorTohipError(cudaUnbindTexture(tex)); -} - -template -__HIP_DEPRECATED inline static hipError_t hipUnbindTexture(struct texture& tex) { - return hipCUDAErrorTohipError(cudaUnbindTexture(tex)); -} - -template -__HIP_DEPRECATED inline static hipError_t hipBindTextureToArray( - struct texture& tex, hipArray_const_t array, - const hipChannelFormatDesc& desc) { - return hipCUDAErrorTohipError(cudaBindTextureToArray(tex, array, desc)); -} - -template -__HIP_DEPRECATED inline static hipError_t hipBindTextureToArray( - struct texture* tex, hipArray_const_t array, - const hipChannelFormatDesc* desc) { - return hipCUDAErrorTohipError(cudaBindTextureToArray(tex, array, desc)); -} - -template -__HIP_DEPRECATED inline static hipError_t hipBindTextureToArray( - struct texture& tex, hipArray_const_t array) { - return hipCUDAErrorTohipError(cudaBindTextureToArray(tex, array)); -} -#endif // CUDA_VERSION < CUDA_12000 - -template -inline static hipChannelFormatDesc hipCreateChannelDesc() { - return cudaCreateChannelDesc(); -} - -template -inline static hipError_t hipLaunchCooperativeKernel(T f, dim3 gridDim, dim3 blockDim, - void** kernelParams, unsigned int sharedMemBytes, hipStream_t stream) { - return hipCUDAErrorTohipError( - cudaLaunchCooperativeKernel(reinterpret_cast(f), gridDim, blockDim, kernelParams, sharedMemBytes, stream)); -} - -inline static hipError_t hipTexObjectCreate(hipTextureObject_t* pTexObject, - const HIP_RESOURCE_DESC* pResDesc, - const HIP_TEXTURE_DESC* pTexDesc, - const HIP_RESOURCE_VIEW_DESC* pResViewDesc) { - return hipCUResultTohipError(cuTexObjectCreate((CUtexObject*)pTexObject, pResDesc, pTexDesc, pResViewDesc)); -} - -inline static hipError_t hipTexObjectDestroy(hipTextureObject_t texObject) { - return hipCUResultTohipError(cuTexObjectDestroy((CUtexObject)texObject)); -} - -inline static hipError_t hipTexObjectGetResourceDesc(HIP_RESOURCE_DESC* pResDesc, hipTextureObject_t texObject) { - return hipCUResultTohipError(cuTexObjectGetResourceDesc(pResDesc, (CUtexObject)texObject)); -} - -inline static hipError_t hipTexObjectGetResourceViewDesc(HIP_RESOURCE_VIEW_DESC* pResViewDesc, hipTextureObject_t texObject) { - return hipCUResultTohipError(cuTexObjectGetResourceViewDesc(pResViewDesc, (CUtexObject)texObject)); -} - -inline static hipError_t hipTexObjectGetTextureDesc(HIP_TEXTURE_DESC* pTexDesc, hipTextureObject_t texObject) { - return hipCUResultTohipError(cuTexObjectGetTextureDesc(pTexDesc, (CUtexObject)texObject)); -} - -__HIP_DEPRECATED inline static hipError_t hipTexRefSetAddressMode(hipTexRef hTexRef, int dim, hipAddress_mode am){ - return hipCUResultTohipError(cuTexRefSetAddressMode(hTexRef,dim,am)); -} - -__HIP_DEPRECATED inline static hipError_t hipTexRefSetFilterMode(hipTexRef hTexRef, hipFilter_mode fm){ - return hipCUResultTohipError(cuTexRefSetFilterMode(hTexRef,fm)); -} - -inline static hipError_t hipTexRefSetAddress(size_t *ByteOffset, hipTexRef hTexRef, hipDeviceptr_t dptr, size_t bytes){ - return hipCUResultTohipError(cuTexRefSetAddress(ByteOffset,hTexRef,dptr,bytes)); -} - -inline static hipError_t hipTexRefSetAddress2D(hipTexRef hTexRef, const CUDA_ARRAY_DESCRIPTOR *desc, hipDeviceptr_t dptr, size_t Pitch){ - return hipCUResultTohipError(cuTexRefSetAddress2D(hTexRef,desc,dptr,Pitch)); -} - -__HIP_DEPRECATED inline static hipError_t hipTexRefSetFormat(hipTexRef hTexRef, hipArray_Format fmt, int NumPackedComponents){ - return hipCUResultTohipError(cuTexRefSetFormat(hTexRef,fmt,NumPackedComponents)); -} - -__HIP_DEPRECATED inline static hipError_t hipTexRefSetFlags(hipTexRef hTexRef, unsigned int Flags){ - return hipCUResultTohipError(cuTexRefSetFlags(hTexRef,Flags)); -} - -__HIP_DEPRECATED inline static hipError_t hipTexRefSetArray(hipTexRef hTexRef, hiparray hArray, unsigned int Flags){ - return hipCUResultTohipError(cuTexRefSetArray(hTexRef,hArray,Flags)); -} - -inline static hipError_t hipArrayCreate(hiparray* pHandle, const HIP_ARRAY_DESCRIPTOR* pAllocateArray){ - return hipCUResultTohipError(cuArrayCreate(pHandle, pAllocateArray)); -} - -inline static hipError_t hipArrayDestroy(hiparray hArray){ - return hipCUResultTohipError(cuArrayDestroy(hArray)); -} - -inline static hipError_t hipArray3DCreate(hiparray* pHandle, - const HIP_ARRAY3D_DESCRIPTOR* pAllocateArray){ - return hipCUResultTohipError(cuArray3DCreate(pHandle, pAllocateArray)); -} - -inline static hipError_t hipArrayGetInfo(hipChannelFormatDesc* desc, hipExtent* extent, - unsigned int* flags, hipArray* array) { - return hipCUDAErrorTohipError(cudaArrayGetInfo(desc, extent, flags, array)); -} - -inline static hipError_t hipArrayGetDescriptor(HIP_ARRAY_DESCRIPTOR* pArrayDescriptor, - hipArray* array) { - return hipCUResultTohipError(cuArrayGetDescriptor(pArrayDescriptor, (CUarray)array)); -} - -inline static hipError_t hipArray3DGetDescriptor(HIP_ARRAY3D_DESCRIPTOR* pArrayDescriptor, - hipArray* array) { - return hipCUResultTohipError(cuArray3DGetDescriptor(pArrayDescriptor, (CUarray)array)); -} - -inline static hipError_t hipStreamBeginCapture(hipStream_t stream, hipStreamCaptureMode mode) { - return hipCUDAErrorTohipError(cudaStreamBeginCapture(stream, mode)); -} - -inline static hipError_t hipStreamEndCapture(hipStream_t stream, hipGraph_t* pGraph) { - return hipCUDAErrorTohipError(cudaStreamEndCapture(stream, pGraph)); -} - -inline static hipError_t hipGraphCreate(hipGraph_t* pGraph, unsigned int flags) { - return hipCUDAErrorTohipError(cudaGraphCreate(pGraph, flags)); -} - -inline static hipError_t hipGraphDestroy(hipGraph_t graph) { - return hipCUDAErrorTohipError(cudaGraphDestroy(graph)); -} - -inline static hipError_t hipGraphExecDestroy(hipGraphExec_t pGraphExec) { - return hipCUDAErrorTohipError(cudaGraphExecDestroy(pGraphExec)); -} - -inline static hipError_t hipGraphInstantiate(hipGraphExec_t* pGraphExec, hipGraph_t graph, - hipGraphNode_t* pErrorNode, char* pLogBuffer, - size_t bufferSize) { - return hipCUDAErrorTohipError( - cudaGraphInstantiate(pGraphExec, graph, pErrorNode, pLogBuffer, bufferSize)); -} - -#if CUDA_VERSION >= CUDA_11040 -inline static hipError_t hipGraphInstantiateWithFlags(hipGraphExec_t* pGraphExec, hipGraph_t graph, - unsigned long long flags) { - return hipCUDAErrorTohipError(cudaGraphInstantiateWithFlags(pGraphExec, graph, flags)); -} - -inline hipError_t hipGraphAddMemAllocNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, - const hipGraphNode_t* pDependencies, - size_t numDependencies, - hipMemAllocNodeParams* pNodeParams) { - return hipCUDAErrorTohipError(cudaGraphAddMemAllocNode( - pGraphNode, graph, pDependencies, numDependencies, pNodeParams)); -} - -inline hipError_t hipGraphMemAllocNodeGetParams(hipGraphNode_t node, - hipMemAllocNodeParams* pNodeParams) { - return hipCUDAErrorTohipError(cudaGraphMemAllocNodeGetParams(node, pNodeParams)); -} - -inline hipError_t hipGraphAddMemFreeNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, - const hipGraphNode_t* pDependencies, - size_t numDependencies, void* dev_ptr) { - return hipCUDAErrorTohipError(cudaGraphAddMemFreeNode( - pGraphNode, graph, pDependencies, numDependencies, dev_ptr)); -} - -inline hipError_t hipGraphMemFreeNodeGetParams(hipGraphNode_t node, void* dev_ptr) { - return hipCUDAErrorTohipError(cudaGraphMemFreeNodeGetParams(node, dev_ptr)); -} -#endif -inline static hipError_t hipGraphLaunch(hipGraphExec_t graphExec, hipStream_t stream) { - return hipCUDAErrorTohipError(cudaGraphLaunch(graphExec, stream)); -} - -inline static hipError_t hipGraphAddKernelNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, - const hipGraphNode_t* pDependencies, - size_t numDependencies, - const hipKernelNodeParams* pNodeParams) { - return hipCUDAErrorTohipError( - cudaGraphAddKernelNode(pGraphNode, graph, pDependencies, numDependencies, pNodeParams)); -} - -inline static hipError_t hipGraphAddMemcpyNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, - const hipGraphNode_t* pDependencies, - size_t numDependencies, - const hipMemcpy3DParms* pCopyParams) { - return hipCUDAErrorTohipError( - cudaGraphAddMemcpyNode(pGraphNode, graph, pDependencies, numDependencies, pCopyParams)); -} - -#if CUDA_VERSION >= CUDA_11010 -inline static hipError_t hipGraphAddMemcpyNode1D(hipGraphNode_t* pGraphNode, hipGraph_t graph, - const hipGraphNode_t* pDependencies, size_t numDependencies, - void* dst, const void* src, size_t count, hipMemcpyKind kind) { - return hipCUDAErrorTohipError( - cudaGraphAddMemcpyNode1D(pGraphNode, graph, pDependencies, numDependencies, dst, src, count, kind)); -} -#endif - -inline static hipError_t hipGraphAddMemsetNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, - const hipGraphNode_t* pDependencies, - size_t numDependencies, - const hipMemsetParams* pMemsetParams) { - return hipCUDAErrorTohipError( - cudaGraphAddMemsetNode(pGraphNode, graph, pDependencies, numDependencies, pMemsetParams)); -} - -inline static hipError_t hipGraphGetNodes(hipGraph_t graph, hipGraphNode_t* nodes, - size_t* numNodes) { - return hipCUDAErrorTohipError(cudaGraphGetNodes(graph, nodes, numNodes)); -} - -inline static hipError_t hipGraphGetRootNodes(hipGraph_t graph, hipGraphNode_t* pRootNodes, - size_t* pNumRootNodes) { - return hipCUDAErrorTohipError(cudaGraphGetRootNodes(graph, pRootNodes, pNumRootNodes)); -} - -inline static hipError_t hipGraphKernelNodeGetParams(hipGraphNode_t node, - hipKernelNodeParams* pNodeParams) { - return hipCUDAErrorTohipError(cudaGraphKernelNodeGetParams(node, pNodeParams)); -} - -inline static hipError_t hipGraphKernelNodeSetParams(hipGraphNode_t node, - const hipKernelNodeParams* pNodeParams) { - return hipCUDAErrorTohipError(cudaGraphKernelNodeSetParams(node, pNodeParams)); -} - -inline static hipError_t hipGraphKernelNodeSetAttribute(hipGraphNode_t hNode, hipKernelNodeAttrID attr, - const hipKernelNodeAttrValue* value) { - return hipCUDAErrorTohipError(cudaGraphKernelNodeSetAttribute(hNode, attr, value)); -} - -inline static hipError_t hipGraphKernelNodeGetAttribute(hipGraphNode_t hNode, hipKernelNodeAttrID attr, - hipKernelNodeAttrValue* value) { - return hipCUDAErrorTohipError(cudaGraphKernelNodeGetAttribute(hNode, attr, value)); -} - -inline static hipError_t hipGraphMemcpyNodeGetParams(hipGraphNode_t node, - hipMemcpy3DParms* pNodeParams) { - return hipCUDAErrorTohipError(cudaGraphMemcpyNodeGetParams(node, pNodeParams)); -} - -inline static hipError_t hipGraphMemcpyNodeSetParams(hipGraphNode_t node, - const hipMemcpy3DParms* pNodeParams) { - return hipCUDAErrorTohipError(cudaGraphMemcpyNodeSetParams(node, pNodeParams)); -} - -inline static hipError_t hipGraphMemsetNodeGetParams(hipGraphNode_t node, - hipMemsetParams* pNodeParams) { - return hipCUDAErrorTohipError(cudaGraphMemsetNodeGetParams(node, pNodeParams)); -} - -inline static hipError_t hipGraphMemsetNodeSetParams(hipGraphNode_t node, - const hipMemsetParams* pNodeParams) { - return hipCUDAErrorTohipError(cudaGraphMemsetNodeSetParams(node, pNodeParams)); -} - -inline static hipError_t hipThreadExchangeStreamCaptureMode(hipStreamCaptureMode* mode) { - return hipCUDAErrorTohipError(cudaThreadExchangeStreamCaptureMode(mode)); -} - -inline static hipError_t hipGraphExecKernelNodeSetParams(hipGraphExec_t hGraphExec, - hipGraphNode_t node, - const hipKernelNodeParams* pNodeParams) { - return hipCUDAErrorTohipError(cudaGraphExecKernelNodeSetParams(hGraphExec, node, pNodeParams)); -} - -inline static hipError_t hipGraphAddDependencies(hipGraph_t graph, const hipGraphNode_t* from, - const hipGraphNode_t* to, size_t numDependencies) { - return hipCUDAErrorTohipError(cudaGraphAddDependencies(graph, from, to, numDependencies)); -} - -inline static hipError_t hipGraphAddEmptyNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, - const hipGraphNode_t* pDependencies, - size_t numDependencies) { - return hipCUDAErrorTohipError( - cudaGraphAddEmptyNode(pGraphNode, graph, pDependencies, numDependencies)); -} - -inline static hipError_t hipStreamWriteValue32(hipStream_t stream, void* ptr, int32_t value, - unsigned int flags) { - if (value < 0) { - printf("Warning! value is negative, CUDA accept positive values\n"); - } - return hipCUResultTohipError(cuStreamWriteValue32(stream, reinterpret_cast(ptr), - static_cast(value), flags)); -} - -inline static hipError_t hipStreamWriteValue64(hipStream_t stream, void* ptr, int64_t value, - unsigned int flags) { - if (value < 0) { - printf("Warning! value is negative, CUDA accept positive values\n"); - } - return hipCUResultTohipError(cuStreamWriteValue64(stream, reinterpret_cast(ptr), - static_cast(value), flags)); -} - -inline static hipError_t hipStreamWaitValue32(hipStream_t stream, void* ptr, int32_t value, - unsigned int flags, - uint32_t mask __dparm(0xFFFFFFFF)) { - if (value < 0) { - printf("Warning! value is negative, CUDA accept positive values\n"); - } - if (mask != STREAM_OPS_WAIT_MASK_32) { - printf("Warning! mask will not have impact as CUDA ignores it.\n"); - } - return hipCUResultTohipError(cuStreamWaitValue32(stream, reinterpret_cast(ptr), - static_cast(value), flags)); -} - -inline static hipError_t hipStreamWaitValue64(hipStream_t stream, void* ptr, int64_t value, - unsigned int flags, - uint64_t mask __dparm(0xFFFFFFFFFFFFFFFF)) { - if (value < 0) { - printf("Warning! value is negative, CUDA accept positive values\n"); - } - if (mask != STREAM_OPS_WAIT_MASK_64) { - printf("Warning! mask will not have impact as CUDA ignores it.\n"); - } - return hipCUResultTohipError(cuStreamWaitValue64(stream, reinterpret_cast(ptr), - static_cast(value), flags)); -} - -inline static hipError_t hipGraphRemoveDependencies(hipGraph_t graph, const hipGraphNode_t* from, - const hipGraphNode_t* to, - size_t numDependencies) { - return hipCUDAErrorTohipError(cudaGraphRemoveDependencies(graph, from, to, numDependencies)); -} - - -inline static hipError_t hipGraphGetEdges(hipGraph_t graph, hipGraphNode_t* from, - hipGraphNode_t* to, size_t* numEdges) { - return hipCUDAErrorTohipError(cudaGraphGetEdges(graph, from, to, numEdges)); -} - -inline static hipError_t hipGraphNodeGetDependencies(hipGraphNode_t node, - hipGraphNode_t* pDependencies, - size_t* pNumDependencies) { - return hipCUDAErrorTohipError( - cudaGraphNodeGetDependencies(node, pDependencies, pNumDependencies)); -} - -inline static hipError_t hipGraphNodeGetDependentNodes(hipGraphNode_t node, - hipGraphNode_t* pDependentNodes, - size_t* pNumDependentNodes) { - return hipCUDAErrorTohipError( - cudaGraphNodeGetDependentNodes(node, pDependentNodes, pNumDependentNodes)); -} - -inline static hipError_t hipGraphNodeGetType(hipGraphNode_t node, hipGraphNodeType* pType) { - return hipCUDAErrorTohipError(cudaGraphNodeGetType(node, pType)); -} - -inline static hipError_t hipGraphDestroyNode(hipGraphNode_t node) { - return hipCUDAErrorTohipError(cudaGraphDestroyNode(node)); -} - -inline static hipError_t hipGraphClone(hipGraph_t* pGraphClone, hipGraph_t originalGraph) { - return hipCUDAErrorTohipError(cudaGraphClone(pGraphClone, originalGraph)); -} - -inline static hipError_t hipGraphNodeFindInClone(hipGraphNode_t* pNode, hipGraphNode_t originalNode, - hipGraph_t clonedGraph) { - return hipCUDAErrorTohipError(cudaGraphNodeFindInClone(pNode, originalNode, clonedGraph)); -} - -inline static hipError_t hipGraphAddChildGraphNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, - const hipGraphNode_t* pDependencies, - size_t numDependencies, hipGraph_t childGraph) { - return hipCUDAErrorTohipError( - cudaGraphAddChildGraphNode(pGraphNode, graph, pDependencies, numDependencies, childGraph)); -} - -inline static hipError_t hipGraphChildGraphNodeGetGraph(hipGraphNode_t node, hipGraph_t* pGraph) { - return hipCUDAErrorTohipError(cudaGraphChildGraphNodeGetGraph(node, pGraph)); -} - -#if CUDA_VERSION >= CUDA_11010 -inline static hipError_t hipGraphExecChildGraphNodeSetParams(hipGraphExec_t hGraphExec, - hipGraphNode_t node, - hipGraph_t childGraph) { - return hipCUDAErrorTohipError( - cudaGraphExecChildGraphNodeSetParams(hGraphExec, node, childGraph)); -} -#endif - -inline static hipError_t hipStreamGetCaptureInfo(hipStream_t stream, - hipStreamCaptureStatus* pCaptureStatus, - unsigned long long* pId) { - return hipCUDAErrorTohipError(cudaStreamGetCaptureInfo(stream, pCaptureStatus, pId)); -} - -#if CUDA_VERSION >= CUDA_11030 || defined(__CUDA_API_VERSION_INTERNAL) -inline static hipError_t hipStreamGetCaptureInfo_v2( - hipStream_t stream, hipStreamCaptureStatus* captureStatus_out, - unsigned long long* id_out __dparm(0), hipGraph_t* graph_out __dparm(0), - const hipGraphNode_t** dependencies_out __dparm(0), size_t* numDependencies_out __dparm(0)) { - return hipCUResultTohipError(cuStreamGetCaptureInfo_v2( - stream, reinterpret_cast(captureStatus_out), - reinterpret_cast(id_out), graph_out, - dependencies_out, numDependencies_out)); -} -#endif - -inline static hipError_t hipStreamIsCapturing(hipStream_t stream, - hipStreamCaptureStatus* pCaptureStatus) { - return hipCUDAErrorTohipError(cudaStreamIsCapturing(stream, pCaptureStatus)); -} - -#if CUDA_VERSION >= CUDA_11030 -inline static hipError_t hipStreamUpdateCaptureDependencies(hipStream_t stream, - hipGraphNode_t* dependencies, - size_t numDependencies, - unsigned int flags __dparm(0)) { - return hipCUDAErrorTohipError(cudaStreamUpdateCaptureDependencies(stream, dependencies, - numDependencies, flags)); -} -#endif - -#if CUDA_VERSION >= CUDA_11010 -inline static hipError_t hipGraphAddEventRecordNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, - const hipGraphNode_t* pDependencies, - size_t numDependencies, hipEvent_t event) { - return hipCUDAErrorTohipError( - cudaGraphAddEventRecordNode(pGraphNode, graph, pDependencies, numDependencies, event)); -} - -inline static hipError_t hipGraphAddEventWaitNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, - const hipGraphNode_t* pDependencies, - size_t numDependencies, hipEvent_t event) { - return hipCUDAErrorTohipError( - cudaGraphAddEventWaitNode(pGraphNode, graph, pDependencies, numDependencies, event)); -} -#endif - -inline static hipError_t hipGraphAddHostNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, - const hipGraphNode_t* pDependencies, - size_t numDependencies, - const hipHostNodeParams* pNodeParams) { - return hipCUDAErrorTohipError( - cudaGraphAddHostNode(pGraphNode, graph, pDependencies, numDependencies, pNodeParams)); -} - -#if CUDA_VERSION >= CUDA_11010 -inline static hipError_t hipGraphAddMemcpyNodeFromSymbol(hipGraphNode_t* pGraphNode, - hipGraph_t graph, - const hipGraphNode_t* pDependencies, - size_t numDependencies, void* dst, - const void* symbol, size_t count, - size_t offset, hipMemcpyKind kind) { - return hipCUDAErrorTohipError(cudaGraphAddMemcpyNodeFromSymbol( - pGraphNode, graph, pDependencies, numDependencies, dst, symbol, count, offset, kind)); -} - -inline static hipError_t hipGraphAddMemcpyNodeToSymbol(hipGraphNode_t* pGraphNode, hipGraph_t graph, - const hipGraphNode_t* pDependencies, - size_t numDependencies, const void* symbol, - const void* src, size_t count, size_t offset, - hipMemcpyKind kind) { - return hipCUDAErrorTohipError(cudaGraphAddMemcpyNodeToSymbol( - pGraphNode, graph, pDependencies, numDependencies, symbol, src, count, offset, kind)); -} - -inline static hipError_t hipGraphEventRecordNodeSetEvent(hipGraphNode_t node, hipEvent_t event) { - return hipCUDAErrorTohipError(cudaGraphEventRecordNodeSetEvent(node, event)); -} - -inline static hipError_t hipGraphEventWaitNodeGetEvent(hipGraphNode_t node, hipEvent_t* event_out) { - return hipCUDAErrorTohipError(cudaGraphEventWaitNodeGetEvent(node, event_out)); -} - -inline static hipError_t hipGraphEventWaitNodeSetEvent(hipGraphNode_t node, hipEvent_t event) { - return hipCUDAErrorTohipError(cudaGraphEventWaitNodeSetEvent(node, event)); -} -#endif - -inline static hipError_t hipGraphExecHostNodeSetParams(hipGraphExec_t hGraphExec, - hipGraphNode_t node, - const hipHostNodeParams* pNodeParams) { - return hipCUDAErrorTohipError(cudaGraphExecHostNodeSetParams(hGraphExec, node, pNodeParams)); -} - -inline static hipError_t hipGraphExecMemcpyNodeSetParams(hipGraphExec_t hGraphExec, - hipGraphNode_t node, - hipMemcpy3DParms* pNodeParams) { - return hipCUDAErrorTohipError(cudaGraphExecMemcpyNodeSetParams(hGraphExec, node, pNodeParams)); -} - -#if CUDA_VERSION >= CUDA_11010 -inline static hipError_t hipGraphExecMemcpyNodeSetParams1D(hipGraphExec_t hGraphExec, - hipGraphNode_t node, void* dst, - const void* src, size_t count, - hipMemcpyKind kind) { - return hipCUDAErrorTohipError( - cudaGraphExecMemcpyNodeSetParams1D(hGraphExec, node, dst, src, count, kind)); -} - -inline static hipError_t hipGraphExecMemcpyNodeSetParamsFromSymbol(hipGraphExec_t hGraphExec, - hipGraphNode_t node, void* dst, - const void* symbol, size_t count, - size_t offset, - hipMemcpyKind kind) { - return hipCUDAErrorTohipError(cudaGraphExecMemcpyNodeSetParamsFromSymbol( - hGraphExec, node, dst, symbol, count, offset, kind)); -} - -inline static hipError_t hipGraphExecMemcpyNodeSetParamsToSymbol( - hipGraphExec_t hGraphExec, hipGraphNode_t node, const void* symbol, const void* src, - size_t count, size_t offset, hipMemcpyKind kind) { - return hipCUDAErrorTohipError(cudaGraphExecMemcpyNodeSetParamsToSymbol( - hGraphExec, node, symbol, src, count, offset, kind)); -} -#endif - -inline static hipError_t hipGraphExecMemsetNodeSetParams(hipGraphExec_t hGraphExec, - hipGraphNode_t node, - const hipMemsetParams* pNodeParams) { - return hipCUDAErrorTohipError(cudaGraphExecMemsetNodeSetParams(hGraphExec, node, pNodeParams)); -} - -inline static hipError_t hipGraphExecUpdate(hipGraphExec_t hGraphExec, hipGraph_t hGraph, - hipGraphNode_t* hErrorNode_out, - hipGraphExecUpdateResult* updateResult_out) { - return hipCUDAErrorTohipError( - cudaGraphExecUpdate(hGraphExec, hGraph, hErrorNode_out, updateResult_out)); -} - -#if CUDA_VERSION >= CUDA_11010 -inline static hipError_t hipGraphMemcpyNodeSetParamsFromSymbol(hipGraphNode_t node, void* dst, - const void* symbol, size_t count, - size_t offset, hipMemcpyKind kind) { - return hipCUDAErrorTohipError( - cudaGraphMemcpyNodeSetParamsFromSymbol(node, dst, symbol, count, offset, kind)); -} - -inline static hipError_t hipGraphMemcpyNodeSetParamsToSymbol(hipGraphNode_t node, - const void* symbol, const void* src, - size_t count, size_t offset, - hipMemcpyKind kind) { - return hipCUDAErrorTohipError( - cudaGraphMemcpyNodeSetParamsToSymbol(node, symbol, src, count, offset, kind)); -} - -inline static hipError_t hipGraphEventRecordNodeGetEvent(hipGraphNode_t node, - hipEvent_t* event_out) { - return hipCUDAErrorTohipError(cudaGraphEventRecordNodeGetEvent(node, event_out)); -} -#endif - -inline static hipError_t hipGraphHostNodeGetParams(hipGraphNode_t node, - hipHostNodeParams* pNodeParams) { - return hipCUDAErrorTohipError(cudaGraphHostNodeGetParams(node, pNodeParams)); -} - -#if CUDA_VERSION >= CUDA_11010 -inline static hipError_t hipGraphMemcpyNodeSetParams1D(hipGraphNode_t node, void* dst, - const void* src, size_t count, - hipMemcpyKind kind) { - return hipCUDAErrorTohipError(cudaGraphMemcpyNodeSetParams1D(node, dst, src, count, kind)); -} - -inline static hipError_t hipGraphExecEventRecordNodeSetEvent(hipGraphExec_t hGraphExec, - hipGraphNode_t hNode, - hipEvent_t event) { - return hipCUDAErrorTohipError(cudaGraphExecEventRecordNodeSetEvent(hGraphExec, hNode, event)); -} - -inline static hipError_t hipGraphExecEventWaitNodeSetEvent(hipGraphExec_t hGraphExec, - hipGraphNode_t hNode, hipEvent_t event) { - return hipCUDAErrorTohipError(cudaGraphExecEventWaitNodeSetEvent(hGraphExec, hNode, event)); -} - -inline static hipError_t hipDeviceGetGraphMemAttribute(int device, hipGraphMemAttributeType attr, void* value) { - return hipCUDAErrorTohipError(cudaDeviceGetGraphMemAttribute(device, attr, value)); -} - -inline static hipError_t hipDeviceSetGraphMemAttribute(int device, hipGraphMemAttributeType attr, void* value) { - return hipCUDAErrorTohipError(cudaDeviceSetGraphMemAttribute(device, attr, value)); -} - -inline static hipError_t hipDeviceGraphMemTrim(int device) { - return hipCUDAErrorTohipError(cudaDeviceGraphMemTrim(device)); -} - -inline static hipError_t hipLaunchHostFunc(hipStream_t stream, hipHostFn_t fn, void* userData) { - return hipCUDAErrorTohipError(cudaLaunchHostFunc(stream, fn, userData)); -} - -inline static hipError_t hipUserObjectCreate(hipUserObject_t* object_out, void* ptr, hipHostFn_t destroy, - unsigned int initialRefcount, unsigned int flags) { - return hipCUDAErrorTohipError(cudaUserObjectCreate(object_out, ptr, destroy, initialRefcount, flags)); -} - - -inline static hipError_t hipUserObjectRelease(hipUserObject_t object, unsigned int count __dparm(1)) { - return hipCUDAErrorTohipError(cudaUserObjectRelease(object, count)); -} - - -inline static hipError_t hipUserObjectRetain(hipUserObject_t object, unsigned int count __dparm(1)) { - return hipCUDAErrorTohipError(cudaUserObjectRelease(object, count)); -} - -inline static hipError_t hipGraphRetainUserObject(hipGraph_t graph, hipUserObject_t object, unsigned int count __dparm(1), unsigned int flags __dparm(0)) { - return hipCUDAErrorTohipError(cudaGraphRetainUserObject(graph, object, count, flags)); -} - -inline static hipError_t hipGraphReleaseUserObject(hipGraph_t graph, hipUserObject_t object, unsigned int count __dparm(1)) { - return hipCUDAErrorTohipError(cudaGraphReleaseUserObject(graph, object, count)); -} -#endif - -inline static hipError_t hipGraphHostNodeSetParams(hipGraphNode_t node, - const hipHostNodeParams* pNodeParams) { - return hipCUDAErrorTohipError(cudaGraphHostNodeSetParams(node, pNodeParams)); -} -#if CUDA_VERSION >= CUDA_11030 -inline static hipError_t hipGraphDebugDotPrint(hipGraph_t graph, const char* path, - unsigned int flags) { - return hipCUDAErrorTohipError(cudaGraphDebugDotPrint(graph, path, flags)); -} -#endif -#if CUDA_VERSION >= CUDA_11000 -inline static hipError_t hipGraphKernelNodeCopyAttributes(hipGraphNode_t hSrc, - hipGraphNode_t hDst) { - return hipCUDAErrorTohipError(cudaGraphKernelNodeCopyAttributes(hSrc, hDst)); -} -#endif -#if CUDA_VERSION >= CUDA_11060 -inline static hipError_t hipGraphNodeSetEnabled(hipGraphExec_t hGraphExec, hipGraphNode_t hNode, - unsigned int isEnabled) { - return hipCUDAErrorTohipError(cudaGraphNodeSetEnabled(hGraphExec, hNode, isEnabled)); -} - -inline static hipError_t hipGraphNodeGetEnabled(hipGraphExec_t hGraphExec, hipGraphNode_t hNode, - unsigned int* isEnabled) { - return hipCUDAErrorTohipError(cudaGraphNodeGetEnabled(hGraphExec, hNode, isEnabled)); -} -#endif -#if CUDA_VERSION >= CUDA_11010 -inline static hipError_t hipGraphUpload(hipGraphExec_t graphExec, hipStream_t stream) { - return hipCUDAErrorTohipError(cudaGraphUpload(graphExec, stream)); -} -#endif -#endif //__CUDACC__ - -#endif // HIP_INCLUDE_HIP_NVIDIA_DETAIL_HIP_RUNTIME_API_H diff --git a/hipamd/include/hip/nvidia_detail/nvidia_hip_texture_types.h b/hipamd/include/hip/nvidia_detail/nvidia_hip_texture_types.h deleted file mode 100644 index df374d705..000000000 --- a/hipamd/include/hip/nvidia_detail/nvidia_hip_texture_types.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef HIP_INCLUDE_HIP_NVIDIA_DETAIL_HIP_TEXTURE_TYPES_H -#define HIP_INCLUDE_HIP_NVIDIA_DETAIL_HIP_TEXTURE_TYPES_H - -#include - -#endif diff --git a/hipamd/include/hip/nvidia_detail/nvidia_hip_unsafe_atomics.h b/hipamd/include/hip/nvidia_detail/nvidia_hip_unsafe_atomics.h deleted file mode 100644 index 993f17507..000000000 --- a/hipamd/include/hip/nvidia_detail/nvidia_hip_unsafe_atomics.h +++ /dev/null @@ -1,100 +0,0 @@ -/* -Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#ifndef HIP_INCLUDE_HIP_NVIDIA_DETAIL_HIP_UNSAFE_ATOMICS_H -#define HIP_INCLUDE_HIP_NVIDIA_DETAIL_HIP_UNSAFE_ATOMICS_H - -__device__ inline float unsafeAtomicAdd(float* addr, float value) { - return atomicAdd(addr, value); -} - -__device__ inline double unsafeAtomicAdd(double* addr, double value) { -#if __CUDA_ARCH__ < 600 - unsigned long long *addr_cast = (unsigned long long*)addr; - unsigned long long old_val = *addr_cast; - unsigned long long expected; - do { - expected = old_val; - old_val = atomicCAS(addr_cast, expected, - __double_as_longlong(value + - __longlong_as_double(expected))); - } while (__double_as_longlong(expected) != __double_as_longlong(old_val)); - return old_val; -#else - return atomicAdd(addr, value); -#endif -} - -__device__ inline float unsafeAtomicMax(float* addr, float value) { - return atomicMax(addr, value); -} - -__device__ inline double unsafeAtomicMax(double* addr, double val) { - return atomicMax(addr, val); -} - -__device__ inline float unsafeAtomicMin(float* addr, float value) { - return atomicMin(addr, value); -} - -__device__ inline double unsafeAtomicMin(double* addr, double val) { - return atomicMin(addr, val); -} - -__device__ inline float safeAtomicAdd(float* addr, float value) { - return atomicAdd(addr, value); -} - -__device__ inline double safeAtomicAdd(double* addr, double value) { -#if __CUDA_ARCH__ < 600 - unsigned long long *addr_cast = (unsigned long long*)addr; - unsigned long long old_val = *addr_cast; - unsigned long long expected; - do { - expected = old_val; - old_val = atomicCAS(addr_cast, expected, - __double_as_longlong(value + - __longlong_as_double(expected))); - } while (__double_as_longlong(expected) != __double_as_longlong(old_val)); - return old_val; -#else - return atomicAdd(addr, value); -#endif -} - -__device__ inline float safeAtomicMax(float* addr, float value) { - return atomicMax(addr, value); -} - -__device__ inline double safeAtomicMax(double* addr, double val) { - return atomicMax(addr, val); -} - -__device__ inline float safeAtomicMin(float* addr, float value) { - return atomicMin(addr, value); -} - -__device__ inline double safeAtomicMin(double* addr, double val) { - return atomicMin(addr, val); -} - -#endif diff --git a/hipamd/include/hip/nvidia_detail/nvidia_hiprtc.h b/hipamd/include/hip/nvidia_detail/nvidia_hiprtc.h deleted file mode 100644 index 68864e75c..000000000 --- a/hipamd/include/hip/nvidia_detail/nvidia_hiprtc.h +++ /dev/null @@ -1,172 +0,0 @@ -/* -Copyright (c) 2021 - 2022 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ -#ifndef HIPRTC_H -#define HIPRTC_H - -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif /* __cplusplus */ - -#include - -#if !defined(_WIN32) -#pragma GCC visibility push(default) -#endif - -typedef enum hiprtcResult { - HIPRTC_SUCCESS = 0, - HIPRTC_ERROR_OUT_OF_MEMORY = 1, - HIPRTC_ERROR_PROGRAM_CREATION_FAILURE = 2, - HIPRTC_ERROR_INVALID_INPUT = 3, - HIPRTC_ERROR_INVALID_PROGRAM = 4, - HIPRTC_ERROR_INVALID_OPTION = 5, - HIPRTC_ERROR_COMPILATION = 6, - HIPRTC_ERROR_BUILTIN_OPERATION_FAILURE = 7, - HIPRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION = 8, - HIPRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION = 9, - HIPRTC_ERROR_NAME_EXPRESSION_NOT_VALID = 10, - HIPRTC_ERROR_INTERNAL_ERROR = 11 -} hiprtcResult; - -inline static nvrtcResult hiprtcResultTonvrtcResult(hiprtcResult result) { - switch (result) { - case HIPRTC_SUCCESS: - return NVRTC_SUCCESS; - case HIPRTC_ERROR_OUT_OF_MEMORY: - return NVRTC_ERROR_OUT_OF_MEMORY; - case HIPRTC_ERROR_PROGRAM_CREATION_FAILURE: - return NVRTC_ERROR_PROGRAM_CREATION_FAILURE; - case HIPRTC_ERROR_INVALID_INPUT: - return NVRTC_ERROR_INVALID_INPUT; - case HIPRTC_ERROR_INVALID_PROGRAM: - return NVRTC_ERROR_INVALID_PROGRAM; - case HIPRTC_ERROR_INVALID_OPTION: - return NVRTC_ERROR_INVALID_OPTION; - case HIPRTC_ERROR_COMPILATION: - return NVRTC_ERROR_COMPILATION; - case HIPRTC_ERROR_BUILTIN_OPERATION_FAILURE: - return NVRTC_ERROR_BUILTIN_OPERATION_FAILURE; - case HIPRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION: - return NVRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION; - case HIPRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION: - return NVRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION; - case HIPRTC_ERROR_NAME_EXPRESSION_NOT_VALID: - return NVRTC_ERROR_NAME_EXPRESSION_NOT_VALID; - case HIPRTC_ERROR_INTERNAL_ERROR: - return NVRTC_ERROR_INTERNAL_ERROR; - default: - return NVRTC_ERROR_INTERNAL_ERROR; - } -} - -inline static hiprtcResult nvrtcResultTohiprtcResult(nvrtcResult result) { - switch (result) { - case NVRTC_SUCCESS: - return HIPRTC_SUCCESS; - case NVRTC_ERROR_OUT_OF_MEMORY: - return HIPRTC_ERROR_OUT_OF_MEMORY; - case NVRTC_ERROR_PROGRAM_CREATION_FAILURE: - return HIPRTC_ERROR_PROGRAM_CREATION_FAILURE; - case NVRTC_ERROR_INVALID_INPUT: - return HIPRTC_ERROR_INVALID_INPUT; - case NVRTC_ERROR_INVALID_PROGRAM: - return HIPRTC_ERROR_INVALID_PROGRAM; - case NVRTC_ERROR_INVALID_OPTION: - return HIPRTC_ERROR_INVALID_OPTION; - case NVRTC_ERROR_COMPILATION: - return HIPRTC_ERROR_COMPILATION; - case NVRTC_ERROR_BUILTIN_OPERATION_FAILURE: - return HIPRTC_ERROR_BUILTIN_OPERATION_FAILURE; - case NVRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION: - return HIPRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION; - case NVRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION: - return HIPRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION; - case NVRTC_ERROR_NAME_EXPRESSION_NOT_VALID: - return HIPRTC_ERROR_NAME_EXPRESSION_NOT_VALID; - case NVRTC_ERROR_INTERNAL_ERROR: - return HIPRTC_ERROR_INTERNAL_ERROR; - default: - return HIPRTC_ERROR_INTERNAL_ERROR; - } -} - -inline static const char* hiprtcGetErrorString(hiprtcResult result) { - return nvrtcGetErrorString(hiprtcResultTonvrtcResult(result)); -} - -inline static hiprtcResult hiprtcVersion(int* major, int* minor) { - return nvrtcResultTohiprtcResult(nvrtcVersion(major, minor)); -} - -typedef nvrtcProgram hiprtcProgram; - -inline static hiprtcResult hiprtcAddNameExpression(hiprtcProgram prog, const char* name_expression) { - return nvrtcResultTohiprtcResult(nvrtcAddNameExpression(prog, name_expression)); -} - -inline static hiprtcResult hiprtcCompileProgram(hiprtcProgram prog, int numOptions, const char** options) { - return nvrtcResultTohiprtcResult(nvrtcCompileProgram(prog, numOptions, options)); -} - -inline static hiprtcResult hiprtcCreateProgram(hiprtcProgram* prog, const char* src, const char* name, - int numHeaders, const char** headers, const char** includeNames) { - return nvrtcResultTohiprtcResult( - nvrtcCreateProgram(prog, src, name, numHeaders, headers, includeNames)); -} - -inline static hiprtcResult hiprtcDestroyProgram(hiprtcProgram* prog) { - return nvrtcResultTohiprtcResult(nvrtcDestroyProgram(prog)); -} - -inline static hiprtcResult hiprtcGetLoweredName(hiprtcProgram prog, const char* name_expression, - const char** lowered_name) { - return nvrtcResultTohiprtcResult(nvrtcGetLoweredName(prog, name_expression, lowered_name)); -} - -inline static hiprtcResult hiprtcGetProgramLog(hiprtcProgram prog, char* log) { - return nvrtcResultTohiprtcResult(nvrtcGetProgramLog(prog, log)); -} - -inline static hiprtcResult hiprtcGetProgramLogSize(hiprtcProgram prog, size_t* logSizeRet) { - return nvrtcResultTohiprtcResult(nvrtcGetProgramLogSize(prog, logSizeRet)); -} - -inline static hiprtcResult hiprtcGetCode(hiprtcProgram prog, char* code) { - return nvrtcResultTohiprtcResult(nvrtcGetPTX(prog, code)); -} - -inline static hiprtcResult hiprtcGetCodeSize(hiprtcProgram prog, size_t* codeSizeRet) { - return nvrtcResultTohiprtcResult(nvrtcGetPTXSize(prog, codeSizeRet)); -} - -#if !defined(_WIN32) -#pragma GCC visibility pop -#endif - -#ifdef __cplusplus -} -#endif /* __cplusplus */ - -#endif // HIPRTC_H diff --git a/hipamd/packaging/CMakeLists.txt b/hipamd/packaging/CMakeLists.txt index 1ff2b1d4e..7e21f025e 100644 --- a/hipamd/packaging/CMakeLists.txt +++ b/hipamd/packaging/CMakeLists.txt @@ -90,10 +90,14 @@ install(DIRECTORY ${hip_SOURCE_DIR}/bin DESTINATION . COMPONENT dev install(DIRECTORY ${HIP_COMMON_DIR}/include DESTINATION . COMPONENT dev) install(DIRECTORY ${hip_SOURCE_DIR}/include/hip/amd_detail DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/hip COMPONENT dev) -install(DIRECTORY ${hip_SOURCE_DIR}/include/hip/nvidia_detail +if(DEFINED HIPNV_DIR) + install(DIRECTORY ${HIPNV_DIR}/include/hip/nvidia_detail DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/hip COMPONENT dev) +endif() +if(HIP_PLATFORM STREQUAL "amd" ) install(FILES ${CMAKE_BINARY_DIR}/hipamd/include/hip/amd_detail/hip_prof_str.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/hip/amd_detail COMPONENT dev) +endif() install(FILES ${CMAKE_BINARY_DIR}/hipamd/include/hip/hip_version.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/hip COMPONENT dev) install(FILES ${CMAKE_BINARY_DIR}/hipamd/share/hip/version DESTINATION ${CMAKE_INSTALL_DATADIR}/hip COMPONENT dev) From 9a64e55680021b9844ae1e8812445db0cbb024cb Mon Sep 17 00:00:00 2001 From: Jatin Chaudhary Date: Wed, 1 Nov 2023 10:07:31 +0000 Subject: [PATCH 11/27] SWDEV-422808 - Add entries for missing hipDataType Change-Id: I6c41f8497564e1d718a3b5bc09c45ec4b53fd25f --- hipamd/src/hip_formatting.hpp | 72 +++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/hipamd/src/hip_formatting.hpp b/hipamd/src/hip_formatting.hpp index 00a27476b..3239ea717 100644 --- a/hipamd/src/hip_formatting.hpp +++ b/hipamd/src/hip_formatting.hpp @@ -479,6 +479,78 @@ inline std::ostream& operator<<(std::ostream& os, const hipDataType& s) { case HIP_C_64F: os << "HIP_C_64F"; break; + case HIP_R_8I: + os << "HIP_R_8I"; + break; + case HIP_C_8I: + os << "HIP_C_8I"; + break; + case HIP_R_8U: + os << "HIP_R_8U"; + break; + case HIP_C_8U: + os << "HIP_C_8U"; + break; + case HIP_R_32I: + os << "HIP_R_32I"; + break; + case HIP_C_32I: + os << "HIP_C_32I"; + break; + case HIP_R_32U: + os << "HIP_R_32U"; + break; + case HIP_C_32U: + os << "HIP_C_32U"; + break; + case HIP_R_16BF: + os << "HIP_R_16BF"; + break; + case HIP_C_16BF: + os << "HIP_C_16BF"; + break; + case HIP_R_4I: + os << "HIP_R_4I"; + break; + case HIP_C_4I: + os << "HIP_C_4I"; + break; + case HIP_R_4U: + os << "HIP_R_4U"; + break; + case HIP_C_4U: + os << "HIP_C_4U"; + break; + case HIP_R_16I: + os << "HIP_R_16I"; + break; + case HIP_C_16I: + os << "HIP_C_16I"; + break; + case HIP_R_16U: + os << "HIP_R_16U"; + break; + case HIP_C_16U: + os << "HIP_C_16U"; + break; + case HIP_R_64I: + os << "HIP_R_64I"; + break; + case HIP_C_64I: + os << "HIP_C_64I"; + break; + case HIP_R_64U: + os << "HIP_R_64U"; + break; + case HIP_C_64U: + os << "HIP_C_64U"; + break; + case HIP_R_8F_E4M3_FNUZ: + os << "HIP_R_8F_E4M3_FNUZ"; + break; + case HIP_R_8F_E5M2_FNUZ: + os << "HIP_R_8F_E5M2_FNUZ"; + break; default: os << "HIP_R_16F"; }; From 348f1a19ab9c391d2f9d5dbc7ffa099d9cad1422 Mon Sep 17 00:00:00 2001 From: sdashmiz Date: Thu, 12 Oct 2023 09:03:42 -0400 Subject: [PATCH 12/27] SWDEV-389530 - [ABI Break]Match error with cuda Signed-off-by: sdashmiz Change-Id: Id9de54b4fd1be7b6b887a4ebfcc6a3d9a70c7bc9 (cherry picked from commit 2d83abaca66b30d4823f5e6428fd27b1a26060bb) --- hipamd/src/hip_module.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hipamd/src/hip_module.cpp b/hipamd/src/hip_module.cpp index eea5efd41..ea57b2e26 100644 --- a/hipamd/src/hip_module.cpp +++ b/hipamd/src/hip_module.cpp @@ -220,7 +220,7 @@ hipError_t ihipLaunchKernel_validate(hipFunction_t f, uint32_t globalWorkSizeX, } if (globalWorkSizeX == 0 || globalWorkSizeY == 0 || globalWorkSizeZ == 0 || blockDimX == 0 || blockDimY == 0 || blockDimZ == 0) { - return hipErrorInvalidValue; + return hipErrorInvalidConfiguration; } const amd::Device* device = g_devices[deviceId]->devices()[0]; From 0bf2a9834c56f4f1bb3641f5558045225b2c95bf Mon Sep 17 00:00:00 2001 From: Rahul Garg Date: Thu, 9 Nov 2023 04:30:39 +0000 Subject: [PATCH 13/27] SWDEV-388256 - Add support of hipExtGetLastError Change-Id: Idc9e07e9422f5cbc71eb697f11c89051abcefa29 --- hipamd/include/hip/amd_detail/hip_prof_str.h | 15 ++++++++++++++- hipamd/src/amdhip.def | 1 + hipamd/src/hip_error.cpp | 8 ++++++++ hipamd/src/hip_hcc.map.in | 9 ++++++++- 4 files changed, 31 insertions(+), 2 deletions(-) diff --git a/hipamd/include/hip/amd_detail/hip_prof_str.h b/hipamd/include/hip/amd_detail/hip_prof_str.h index 27c8b26a1..0b56c6c23 100644 --- a/hipamd/include/hip/amd_detail/hip_prof_str.h +++ b/hipamd/include/hip/amd_detail/hip_prof_str.h @@ -377,7 +377,8 @@ enum hip_api_id_t { HIP_API_ID_hipArrayGetInfo = 362, HIP_API_ID_hipStreamGetDevice = 363, HIP_API_ID_hipExternalMemoryGetMappedMipmappedArray = 364, - HIP_API_ID_LAST = 364, + HIP_API_ID_hipExtGetLastError = 365, + HIP_API_ID_LAST = 365, HIP_API_ID_hipBindTexture = HIP_API_ID_NONE, HIP_API_ID_hipBindTexture2D = HIP_API_ID_NONE, @@ -781,6 +782,7 @@ static inline const char* hip_api_name(const uint32_t id) { case HIP_API_ID_hipUserObjectRetain: return "hipUserObjectRetain"; case HIP_API_ID_hipWaitExternalSemaphoresAsync: return "hipWaitExternalSemaphoresAsync"; case HIP_API_ID_hipExternalMemoryGetMappedMipmappedArray: return "hipExternalMemoryGetMappedMipmappedArray"; + case HIP_API_ID_hipExtGetLastError: return "hipExtGetLastError"; }; return "unknown"; }; @@ -1148,6 +1150,7 @@ static inline uint32_t hipApiIdByName(const char* name) { if (strcmp("hipUserObjectRetain", name) == 0) return HIP_API_ID_hipUserObjectRetain; if (strcmp("hipWaitExternalSemaphoresAsync", name) == 0) return HIP_API_ID_hipWaitExternalSemaphoresAsync; if (strcmp("hipExternalMemoryGetMappedMipmappedArray", name) == 0) return HIP_API_ID_hipExternalMemoryGetMappedMipmappedArray; + if (strcmp("hipExtGetLastError", name) == 0) return HIP_API_ID_hipExtGetLastError; return HIP_API_ID_NONE; } @@ -5448,6 +5451,9 @@ typedef struct hip_api_data_s { cb_data.args.hipWaitExternalSemaphoresAsync.numExtSems = (unsigned int)numExtSems; \ cb_data.args.hipWaitExternalSemaphoresAsync.stream = (hipStream_t)stream; \ }; +// hipExtGetLastError[] +#define INIT_hipExtGetLastError_CB_ARGS_DATA(cb_data) { \ +}; #define INIT_CB_ARGS_DATA(cb_id, cb_data) INIT_##cb_id##_CB_ARGS_DATA(cb_data) // Macros for non-public API primitives @@ -5918,6 +5924,9 @@ static inline void hipApiArgsInit(hip_api_id_t id, hip_api_data_t* data) { // hipGetLastError[] case HIP_API_ID_hipGetLastError: break; +// hipExtGetLastError[] + case HIP_API_ID_hipExtGetLastError: + break; // hipGetMipmappedArrayLevel[('hipArray_t*', 'levelArray'), ('hipMipmappedArray_const_t', 'mipmappedArray'), ('unsigned int', 'level')] case HIP_API_ID_hipGetMipmappedArrayLevel: if (data->args.hipGetMipmappedArrayLevel.levelArray) data->args.hipGetMipmappedArrayLevel.levelArray__val = *(data->args.hipGetMipmappedArrayLevel.levelArray); @@ -7641,6 +7650,10 @@ static inline const char* hipApiString(hip_api_id_t id, const hip_api_data_t* da oss << "hipGetLastError("; oss << ")"; break; + case HIP_API_ID_hipExtGetLastError: + oss << "hipExtGetLastError("; + oss << ")"; + break; case HIP_API_ID_hipGetMipmappedArrayLevel: oss << "hipGetMipmappedArrayLevel("; if (data->args.hipGetMipmappedArrayLevel.levelArray == NULL) oss << "levelArray=NULL"; diff --git a/hipamd/src/amdhip.def b/hipamd/src/amdhip.def index 7bac33593..975181e4f 100644 --- a/hipamd/src/amdhip.def +++ b/hipamd/src/amdhip.def @@ -444,3 +444,4 @@ hipGraphAddMemAllocNode hipGraphMemAllocNodeGetParams hipGraphAddMemFreeNode hipGraphMemFreeNodeGetParams +hipExtGetLastError diff --git a/hipamd/src/hip_error.cpp b/hipamd/src/hip_error.cpp index 7ffc1ce98..0089c27af 100644 --- a/hipamd/src/hip_error.cpp +++ b/hipamd/src/hip_error.cpp @@ -22,6 +22,14 @@ #include "hip_internal.hpp" +hipError_t hipExtGetLastError() +{ + HIP_INIT_API(hipExtGetLastError); + hipError_t err = hip::tls.last_error_; + hip::tls.last_error_ = hipSuccess; + return err; +} + hipError_t hipGetLastError() { HIP_INIT_API(hipGetLastError); diff --git a/hipamd/src/hip_hcc.map.in b/hipamd/src/hip_hcc.map.in index 9494a3f44..5674d3b37 100644 --- a/hipamd/src/hip_hcc.map.in +++ b/hipamd/src/hip_hcc.map.in @@ -525,4 +525,11 @@ global: hipArray3DGetDescriptor; local: *; -} hip_5.5; \ No newline at end of file +} hip_5.5; + +hip_6.0 { +global: + hipExtGetLastError; +local: + *; +} hip_5.6; From 2c7230d92bcab63deae3504ff6629ed53334f62a Mon Sep 17 00:00:00 2001 From: sdashmiz Date: Tue, 25 Jul 2023 14:29:18 -0400 Subject: [PATCH 14/27] SWDEV-332969 - [ABI Break]Substitute hipArray* with hipArray_t - hipArray will be an internal struct from rocm6.0 Signed-off-by: sdashmiz Change-Id: Icf97fe96b87be8532098cd7f9ceaad099f99c9b9 --- .../hip/amd_detail/amd_hip_runtime_pt_api.h | 4 +- hipamd/include/hip/amd_detail/hip_prof_str.h | 159 +++++++----------- hipamd/src/hip_formatting.hpp | 35 ---- hipamd/src/hip_graph.cpp | 6 +- hipamd/src/hip_graph_capture.hpp | 6 +- hipamd/src/hip_graph_helper.hpp | 10 +- hipamd/src/hip_internal.hpp | 14 ++ hipamd/src/hip_memory.cpp | 80 ++++----- 8 files changed, 132 insertions(+), 182 deletions(-) diff --git a/hipamd/include/hip/amd_detail/amd_hip_runtime_pt_api.h b/hipamd/include/hip/amd_detail/amd_hip_runtime_pt_api.h index a597dbd03..070c58bd3 100644 --- a/hipamd/include/hip/amd_detail/amd_hip_runtime_pt_api.h +++ b/hipamd/include/hip/amd_detail/amd_hip_runtime_pt_api.h @@ -136,14 +136,14 @@ hipError_t hipMemcpyToSymbolAsync_spt(const void* symbol, const void* src, size_ hipError_t hipMemcpyFromArray_spt(void* dst, hipArray_const_t src, size_t wOffsetSrc, size_t hOffset, size_t count, hipMemcpyKind kind); -hipError_t hipMemcpy2DToArray_spt(hipArray* dst, size_t wOffset, size_t hOffset, const void* src, +hipError_t hipMemcpy2DToArray_spt(hipArray_t dst, size_t wOffset, size_t hOffset, const void* src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind); hipError_t hipMemcpy2DFromArrayAsync_spt(void* dst, size_t dpitch, hipArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc, size_t width, size_t height, hipMemcpyKind kind, hipStream_t stream); -hipError_t hipMemcpy2DToArrayAsync_spt(hipArray* dst, size_t wOffset, size_t hOffset, const void* src, +hipError_t hipMemcpy2DToArrayAsync_spt(hipArray_t dst, size_t wOffset, size_t hOffset, const void* src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind, hipStream_t stream); diff --git a/hipamd/include/hip/amd_detail/hip_prof_str.h b/hipamd/include/hip/amd_detail/hip_prof_str.h index 0b56c6c23..cbf974e21 100644 --- a/hipamd/include/hip/amd_detail/hip_prof_str.h +++ b/hipamd/include/hip/amd_detail/hip_prof_str.h @@ -1176,32 +1176,29 @@ typedef struct hip_api_data_s { hipStream_t stream; } __hipPushCallConfiguration; struct { - hipArray** array; - hipArray* array__val; + hipArray_t* array; + hipArray_t array__val; const HIP_ARRAY3D_DESCRIPTOR* pAllocateArray; HIP_ARRAY3D_DESCRIPTOR pAllocateArray__val; } hipArray3DCreate; struct { HIP_ARRAY3D_DESCRIPTOR* pArrayDescriptor; HIP_ARRAY3D_DESCRIPTOR pArrayDescriptor__val; - hipArray* array; - hipArray array__val; + hipArray_t array; } hipArray3DGetDescriptor; struct { - hipArray** pHandle; - hipArray* pHandle__val; + hipArray_t* pHandle; + hipArray_t pHandle__val; const HIP_ARRAY_DESCRIPTOR* pAllocateArray; HIP_ARRAY_DESCRIPTOR pAllocateArray__val; } hipArrayCreate; struct { - hipArray* array; - hipArray array__val; + hipArray_t array; } hipArrayDestroy; struct { HIP_ARRAY_DESCRIPTOR* pArrayDescriptor; HIP_ARRAY_DESCRIPTOR pArrayDescriptor__val; - hipArray* array; - hipArray array__val; + hipArray_t array; } hipArrayGetDescriptor; struct { hipChannelFormatDesc* desc; @@ -1210,8 +1207,7 @@ typedef struct hip_api_data_s { hipExtent extent__val; unsigned int* flags; unsigned int flags__val; - hipArray* array; - hipArray array__val; + hipArray_t array; } hipArrayGetInfo; struct { int* device; @@ -1568,8 +1564,7 @@ typedef struct hip_api_data_s { void* ptr; } hipFree; struct { - hipArray* array; - hipArray array__val; + hipArray_t array; } hipFreeArray; struct { void* dev_ptr; @@ -2285,8 +2280,8 @@ typedef struct hip_api_data_s { unsigned int flags; } hipMalloc3DArray; struct { - hipArray** array; - hipArray* array__val; + hipArray_t* array; + hipArray_t array__val; const hipChannelFormatDesc* desc; hipChannelFormatDesc desc__val; size_t width; @@ -2587,8 +2582,7 @@ typedef struct hip_api_data_s { hipStream_t stream; } hipMemcpy2DFromArrayAsync; struct { - hipArray* dst; - hipArray dst__val; + hipArray_t dst; size_t wOffset; size_t hOffset; const void* src; @@ -2598,8 +2592,7 @@ typedef struct hip_api_data_s { hipMemcpyKind kind; } hipMemcpy2DToArray; struct { - hipArray* dst; - hipArray dst__val; + hipArray_t dst; size_t wOffset; size_t hOffset; const void* src; @@ -2627,8 +2620,7 @@ typedef struct hip_api_data_s { } hipMemcpyAsync; struct { void* dst; - hipArray* srcArray; - hipArray srcArray__val; + hipArray_t srcArray; size_t srcOffset; size_t count; } hipMemcpyAtoH; @@ -2678,8 +2670,7 @@ typedef struct hip_api_data_s { hipStream_t stream; } hipMemcpyFromSymbolAsync; struct { - hipArray* dstArray; - hipArray dstArray__val; + hipArray_t dstArray; size_t dstOffset; const void* srcHost; size_t count; @@ -2720,8 +2711,7 @@ typedef struct hip_api_data_s { hipStream_t stream; } hipMemcpyPeerAsync; struct { - hipArray* dst; - hipArray dst__val; + hipArray_t dst; size_t wOffset; size_t hOffset; const void* src; @@ -3297,36 +3287,36 @@ typedef struct hip_api_data_s { cb_data.args.__hipPushCallConfiguration.sharedMem = (size_t)sharedMem; \ cb_data.args.__hipPushCallConfiguration.stream = (hipStream_t)stream; \ }; -// hipArray3DCreate[('hipArray**', 'array'), ('const HIP_ARRAY3D_DESCRIPTOR*', 'pAllocateArray')] +// hipArray3DCreate[('hipArray_t*', 'array'), ('const HIP_ARRAY3D_DESCRIPTOR*', 'pAllocateArray')] #define INIT_hipArray3DCreate_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipArray3DCreate.array = (hipArray**)array; \ + cb_data.args.hipArray3DCreate.array = (hipArray_t*)array; \ cb_data.args.hipArray3DCreate.pAllocateArray = (const HIP_ARRAY3D_DESCRIPTOR*)pAllocateArray; \ }; -// hipArray3DGetDescriptor[('HIP_ARRAY3D_DESCRIPTOR*', 'pArrayDescriptor'), ('hipArray*', 'array')] +// hipArray3DGetDescriptor[('HIP_ARRAY3D_DESCRIPTOR*', 'pArrayDescriptor'), ('hipArray_t', 'array')] #define INIT_hipArray3DGetDescriptor_CB_ARGS_DATA(cb_data) { \ cb_data.args.hipArray3DGetDescriptor.pArrayDescriptor = (HIP_ARRAY3D_DESCRIPTOR*)pArrayDescriptor; \ - cb_data.args.hipArray3DGetDescriptor.array = (hipArray*)array; \ + cb_data.args.hipArray3DGetDescriptor.array = (hipArray_t)array; \ }; -// hipArrayCreate[('hipArray**', 'pHandle'), ('const HIP_ARRAY_DESCRIPTOR*', 'pAllocateArray')] +// hipArrayCreate[('hipArray_t*', 'pHandle'), ('const HIP_ARRAY_DESCRIPTOR*', 'pAllocateArray')] #define INIT_hipArrayCreate_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipArrayCreate.pHandle = (hipArray**)array; \ + cb_data.args.hipArrayCreate.pHandle = (hipArray_t*)array; \ cb_data.args.hipArrayCreate.pAllocateArray = (const HIP_ARRAY_DESCRIPTOR*)pAllocateArray; \ }; -// hipArrayDestroy[('hipArray*', 'array')] +// hipArrayDestroy[('hipArray_t', 'array')] #define INIT_hipArrayDestroy_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipArrayDestroy.array = (hipArray*)array; \ + cb_data.args.hipArrayDestroy.array = (hipArray_t)array; \ }; -// hipArrayGetDescriptor[('HIP_ARRAY_DESCRIPTOR*', 'pArrayDescriptor'), ('hipArray*', 'array')] +// hipArrayGetDescriptor[('HIP_ARRAY_DESCRIPTOR*', 'pArrayDescriptor'), ('hipArray_t', 'array')] #define INIT_hipArrayGetDescriptor_CB_ARGS_DATA(cb_data) { \ cb_data.args.hipArrayGetDescriptor.pArrayDescriptor = (HIP_ARRAY_DESCRIPTOR*)pArrayDescriptor; \ - cb_data.args.hipArrayGetDescriptor.array = (hipArray*)array; \ + cb_data.args.hipArrayGetDescriptor.array = (hipArray_t)array; \ }; -// hipArrayGetInfo[('hipChannelFormatDesc*', 'desc'), ('hipExtent*', 'extent'), ('unsigned int*', 'flags'), ('hipArray*', 'array')] +// hipArrayGetInfo[('hipChannelFormatDesc*', 'desc'), ('hipExtent*', 'extent'), ('unsigned int*', 'flags'), ('hipArray_t', 'array')] #define INIT_hipArrayGetInfo_CB_ARGS_DATA(cb_data) { \ cb_data.args.hipArrayGetInfo.desc = (hipChannelFormatDesc*)desc; \ cb_data.args.hipArrayGetInfo.extent = (hipExtent*)extent; \ cb_data.args.hipArrayGetInfo.flags = (unsigned int*)flags; \ - cb_data.args.hipArrayGetInfo.array = (hipArray*)array; \ + cb_data.args.hipArrayGetInfo.array = (hipArray_t)array; \ }; // hipChooseDevice[('int*', 'device'), ('const hipDeviceProp_t*', 'prop')] #define INIT_hipChooseDevice_CB_ARGS_DATA(cb_data) { \ @@ -3714,9 +3704,9 @@ typedef struct hip_api_data_s { #define INIT_hipFree_CB_ARGS_DATA(cb_data) { \ cb_data.args.hipFree.ptr = (void*)ptr; \ }; -// hipFreeArray[('hipArray*', 'array')] +// hipFreeArray[('hipArray_t', 'array')] #define INIT_hipFreeArray_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipFreeArray.array = (hipArray*)array; \ + cb_data.args.hipFreeArray.array = (hipArray_t)array; \ }; // hipFreeAsync[('void*', 'dev_ptr'), ('hipStream_t', 'stream')] #define INIT_hipFreeAsync_CB_ARGS_DATA(cb_data) { \ @@ -4437,9 +4427,9 @@ typedef struct hip_api_data_s { cb_data.args.hipMalloc3DArray.extent = (hipExtent)extent; \ cb_data.args.hipMalloc3DArray.flags = (unsigned int)flags; \ }; -// hipMallocArray[('hipArray**', 'array'), ('const hipChannelFormatDesc*', 'desc'), ('size_t', 'width'), ('size_t', 'height'), ('unsigned int', 'flags')] +// hipMallocArray[('hipArray_t*', 'array'), ('const hipChannelFormatDesc*', 'desc'), ('size_t', 'width'), ('size_t', 'height'), ('unsigned int', 'flags')] #define INIT_hipMallocArray_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipMallocArray.array = (hipArray**)array; \ + cb_data.args.hipMallocArray.array = (hipArray_t*)array; \ cb_data.args.hipMallocArray.desc = (const hipChannelFormatDesc*)desc; \ cb_data.args.hipMallocArray.width = (size_t)width; \ cb_data.args.hipMallocArray.height = (size_t)height; \ @@ -4743,9 +4733,9 @@ typedef struct hip_api_data_s { cb_data.args.hipMemcpy2DFromArrayAsync.kind = (hipMemcpyKind)kind; \ cb_data.args.hipMemcpy2DFromArrayAsync.stream = (hipStream_t)stream; \ }; -// hipMemcpy2DToArray[('hipArray*', 'dst'), ('size_t', 'wOffset'), ('size_t', 'hOffset'), ('const void*', 'src'), ('size_t', 'spitch'), ('size_t', 'width'), ('size_t', 'height'), ('hipMemcpyKind', 'kind')] +// hipMemcpy2DToArray[('hipArray_t', 'dst'), ('size_t', 'wOffset'), ('size_t', 'hOffset'), ('const void*', 'src'), ('size_t', 'spitch'), ('size_t', 'width'), ('size_t', 'height'), ('hipMemcpyKind', 'kind')] #define INIT_hipMemcpy2DToArray_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipMemcpy2DToArray.dst = (hipArray*)dst; \ + cb_data.args.hipMemcpy2DToArray.dst = (hipArray_t)dst; \ cb_data.args.hipMemcpy2DToArray.wOffset = (size_t)wOffset; \ cb_data.args.hipMemcpy2DToArray.hOffset = (size_t)hOffset; \ cb_data.args.hipMemcpy2DToArray.src = (const void*)src; \ @@ -4754,9 +4744,9 @@ typedef struct hip_api_data_s { cb_data.args.hipMemcpy2DToArray.height = (size_t)height; \ cb_data.args.hipMemcpy2DToArray.kind = (hipMemcpyKind)kind; \ }; -// hipMemcpy2DToArrayAsync[('hipArray*', 'dst'), ('size_t', 'wOffset'), ('size_t', 'hOffset'), ('const void*', 'src'), ('size_t', 'spitch'), ('size_t', 'width'), ('size_t', 'height'), ('hipMemcpyKind', 'kind'), ('hipStream_t', 'stream')] +// hipMemcpy2DToArrayAsync[('hipArray_t', 'dst'), ('size_t', 'wOffset'), ('size_t', 'hOffset'), ('const void*', 'src'), ('size_t', 'spitch'), ('size_t', 'width'), ('size_t', 'height'), ('hipMemcpyKind', 'kind'), ('hipStream_t', 'stream')] #define INIT_hipMemcpy2DToArrayAsync_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipMemcpy2DToArrayAsync.dst = (hipArray*)dst; \ + cb_data.args.hipMemcpy2DToArrayAsync.dst = (hipArray_t)dst; \ cb_data.args.hipMemcpy2DToArrayAsync.wOffset = (size_t)wOffset; \ cb_data.args.hipMemcpy2DToArrayAsync.hOffset = (size_t)hOffset; \ cb_data.args.hipMemcpy2DToArrayAsync.src = (const void*)src; \ @@ -4783,10 +4773,10 @@ typedef struct hip_api_data_s { cb_data.args.hipMemcpyAsync.kind = (hipMemcpyKind)kind; \ cb_data.args.hipMemcpyAsync.stream = (hipStream_t)stream; \ }; -// hipMemcpyAtoH[('void*', 'dst'), ('hipArray*', 'srcArray'), ('size_t', 'srcOffset'), ('size_t', 'count')] +// hipMemcpyAtoH[('void*', 'dst'), ('hipArray_t', 'srcArray'), ('size_t', 'srcOffset'), ('size_t', 'count')] #define INIT_hipMemcpyAtoH_CB_ARGS_DATA(cb_data) { \ cb_data.args.hipMemcpyAtoH.dst = (void*)dstHost; \ - cb_data.args.hipMemcpyAtoH.srcArray = (hipArray*)srcArray; \ + cb_data.args.hipMemcpyAtoH.srcArray = (hipArray_t)srcArray; \ cb_data.args.hipMemcpyAtoH.srcOffset = (size_t)srcOffset; \ cb_data.args.hipMemcpyAtoH.count = (size_t)ByteCount; \ }; @@ -4842,9 +4832,9 @@ typedef struct hip_api_data_s { cb_data.args.hipMemcpyFromSymbolAsync.kind = (hipMemcpyKind)kind; \ cb_data.args.hipMemcpyFromSymbolAsync.stream = (hipStream_t)stream; \ }; -// hipMemcpyHtoA[('hipArray*', 'dstArray'), ('size_t', 'dstOffset'), ('const void*', 'srcHost'), ('size_t', 'count')] +// hipMemcpyHtoA[('hipArray_t', 'dstArray'), ('size_t', 'dstOffset'), ('const void*', 'srcHost'), ('size_t', 'count')] #define INIT_hipMemcpyHtoA_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipMemcpyHtoA.dstArray = (hipArray*)dstArray; \ + cb_data.args.hipMemcpyHtoA.dstArray = (hipArray_t)dstArray; \ cb_data.args.hipMemcpyHtoA.dstOffset = (size_t)dstOffset; \ cb_data.args.hipMemcpyHtoA.srcHost = (const void*)srcHost; \ cb_data.args.hipMemcpyHtoA.count = (size_t)ByteCount; \ @@ -4888,9 +4878,9 @@ typedef struct hip_api_data_s { cb_data.args.hipMemcpyPeerAsync.sizeBytes = (size_t)sizeBytes; \ cb_data.args.hipMemcpyPeerAsync.stream = (hipStream_t)stream; \ }; -// hipMemcpyToArray[('hipArray*', 'dst'), ('size_t', 'wOffset'), ('size_t', 'hOffset'), ('const void*', 'src'), ('size_t', 'count'), ('hipMemcpyKind', 'kind')] +// hipMemcpyToArray[('hipArray_t', 'dst'), ('size_t', 'wOffset'), ('size_t', 'hOffset'), ('const void*', 'src'), ('size_t', 'count'), ('hipMemcpyKind', 'kind')] #define INIT_hipMemcpyToArray_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipMemcpyToArray.dst = (hipArray*)dst; \ + cb_data.args.hipMemcpyToArray.dst = (hipArray_t)dst; \ cb_data.args.hipMemcpyToArray.wOffset = (size_t)wOffset; \ cb_data.args.hipMemcpyToArray.hOffset = (size_t)hOffset; \ cb_data.args.hipMemcpyToArray.src = (const void*)src; \ @@ -5548,36 +5538,32 @@ static inline void hipApiArgsInit(hip_api_id_t id, hip_api_data_t* data) { // __hipPushCallConfiguration[('dim3', 'gridDim'), ('dim3', 'blockDim'), ('size_t', 'sharedMem'), ('hipStream_t', 'stream')] case HIP_API_ID___hipPushCallConfiguration: break; -// hipArray3DCreate[('hipArray**', 'array'), ('const HIP_ARRAY3D_DESCRIPTOR*', 'pAllocateArray')] +// hipArray3DCreate[('hipArray_t*', 'array'), ('const HIP_ARRAY3D_DESCRIPTOR*', 'pAllocateArray')] case HIP_API_ID_hipArray3DCreate: if (data->args.hipArray3DCreate.array) data->args.hipArray3DCreate.array__val = *(data->args.hipArray3DCreate.array); if (data->args.hipArray3DCreate.pAllocateArray) data->args.hipArray3DCreate.pAllocateArray__val = *(data->args.hipArray3DCreate.pAllocateArray); break; -// hipArray3DGetDescriptor[('HIP_ARRAY3D_DESCRIPTOR*', 'pArrayDescriptor'), ('hipArray*', 'array')] +// hipArray3DGetDescriptor[('HIP_ARRAY3D_DESCRIPTOR*', 'pArrayDescriptor'), ('hipArray_t', 'array')] case HIP_API_ID_hipArray3DGetDescriptor: if (data->args.hipArray3DGetDescriptor.pArrayDescriptor) data->args.hipArray3DGetDescriptor.pArrayDescriptor__val = *(data->args.hipArray3DGetDescriptor.pArrayDescriptor); - if (data->args.hipArray3DGetDescriptor.array) data->args.hipArray3DGetDescriptor.array__val = *(data->args.hipArray3DGetDescriptor.array); break; -// hipArrayCreate[('hipArray**', 'pHandle'), ('const HIP_ARRAY_DESCRIPTOR*', 'pAllocateArray')] +// hipArrayCreate[('hipArray_t*', 'pHandle'), ('const HIP_ARRAY_DESCRIPTOR*', 'pAllocateArray')] case HIP_API_ID_hipArrayCreate: if (data->args.hipArrayCreate.pHandle) data->args.hipArrayCreate.pHandle__val = *(data->args.hipArrayCreate.pHandle); if (data->args.hipArrayCreate.pAllocateArray) data->args.hipArrayCreate.pAllocateArray__val = *(data->args.hipArrayCreate.pAllocateArray); break; -// hipArrayDestroy[('hipArray*', 'array')] +// hipArrayDestroy[('hipArray_t', 'array')] case HIP_API_ID_hipArrayDestroy: - if (data->args.hipArrayDestroy.array) data->args.hipArrayDestroy.array__val = *(data->args.hipArrayDestroy.array); break; -// hipArrayGetDescriptor[('HIP_ARRAY_DESCRIPTOR*', 'pArrayDescriptor'), ('hipArray*', 'array')] +// hipArrayGetDescriptor[('HIP_ARRAY_DESCRIPTOR*', 'pArrayDescriptor'), ('hipArray_t', 'array')] case HIP_API_ID_hipArrayGetDescriptor: if (data->args.hipArrayGetDescriptor.pArrayDescriptor) data->args.hipArrayGetDescriptor.pArrayDescriptor__val = *(data->args.hipArrayGetDescriptor.pArrayDescriptor); - if (data->args.hipArrayGetDescriptor.array) data->args.hipArrayGetDescriptor.array__val = *(data->args.hipArrayGetDescriptor.array); break; -// hipArrayGetInfo[('hipChannelFormatDesc*', 'desc'), ('hipExtent*', 'extent'), ('unsigned int*', 'flags'), ('hipArray*', 'array')] +// hipArrayGetInfo[('hipChannelFormatDesc*', 'desc'), ('hipExtent*', 'extent'), ('unsigned int*', 'flags'), ('hipArray_t', 'array')] case HIP_API_ID_hipArrayGetInfo: if (data->args.hipArrayGetInfo.desc) data->args.hipArrayGetInfo.desc__val = *(data->args.hipArrayGetInfo.desc); if (data->args.hipArrayGetInfo.extent) data->args.hipArrayGetInfo.extent__val = *(data->args.hipArrayGetInfo.extent); if (data->args.hipArrayGetInfo.flags) data->args.hipArrayGetInfo.flags__val = *(data->args.hipArrayGetInfo.flags); - if (data->args.hipArrayGetInfo.array) data->args.hipArrayGetInfo.array__val = *(data->args.hipArrayGetInfo.array); break; // hipChooseDevice[('int*', 'device'), ('const hipDeviceProp_t*', 'prop')] case HIP_API_ID_hipChooseDevice: @@ -5863,9 +5849,8 @@ static inline void hipApiArgsInit(hip_api_id_t id, hip_api_data_t* data) { // hipFree[('void*', 'ptr')] case HIP_API_ID_hipFree: break; -// hipFreeArray[('hipArray*', 'array')] +// hipFreeArray[('hipArray_t', 'array')] case HIP_API_ID_hipFreeArray: - if (data->args.hipFreeArray.array) data->args.hipFreeArray.array__val = *(data->args.hipFreeArray.array); break; // hipFreeAsync[('void*', 'dev_ptr'), ('hipStream_t', 'stream')] case HIP_API_ID_hipFreeAsync: @@ -6340,7 +6325,7 @@ static inline void hipApiArgsInit(hip_api_id_t id, hip_api_data_t* data) { if (data->args.hipMalloc3DArray.array) data->args.hipMalloc3DArray.array__val = *(data->args.hipMalloc3DArray.array); if (data->args.hipMalloc3DArray.desc) data->args.hipMalloc3DArray.desc__val = *(data->args.hipMalloc3DArray.desc); break; -// hipMallocArray[('hipArray**', 'array'), ('const hipChannelFormatDesc*', 'desc'), ('size_t', 'width'), ('size_t', 'height'), ('unsigned int', 'flags')] +// hipMallocArray[('hipArray_t*', 'array'), ('const hipChannelFormatDesc*', 'desc'), ('size_t', 'width'), ('size_t', 'height'), ('unsigned int', 'flags')] case HIP_API_ID_hipMallocArray: if (data->args.hipMallocArray.array) data->args.hipMallocArray.array__val = *(data->args.hipMallocArray.array); if (data->args.hipMallocArray.desc) data->args.hipMallocArray.desc__val = *(data->args.hipMallocArray.desc); @@ -6520,13 +6505,11 @@ static inline void hipApiArgsInit(hip_api_id_t id, hip_api_data_t* data) { // hipMemcpy2DFromArrayAsync[('void*', 'dst'), ('size_t', 'dpitch'), ('hipArray_const_t', 'src'), ('size_t', 'wOffset'), ('size_t', 'hOffset'), ('size_t', 'width'), ('size_t', 'height'), ('hipMemcpyKind', 'kind'), ('hipStream_t', 'stream')] case HIP_API_ID_hipMemcpy2DFromArrayAsync: break; -// hipMemcpy2DToArray[('hipArray*', 'dst'), ('size_t', 'wOffset'), ('size_t', 'hOffset'), ('const void*', 'src'), ('size_t', 'spitch'), ('size_t', 'width'), ('size_t', 'height'), ('hipMemcpyKind', 'kind')] +// hipMemcpy2DToArray[('hipArray_t', 'dst'), ('size_t', 'wOffset'), ('size_t', 'hOffset'), ('const void*', 'src'), ('size_t', 'spitch'), ('size_t', 'width'), ('size_t', 'height'), ('hipMemcpyKind', 'kind')] case HIP_API_ID_hipMemcpy2DToArray: - if (data->args.hipMemcpy2DToArray.dst) data->args.hipMemcpy2DToArray.dst__val = *(data->args.hipMemcpy2DToArray.dst); break; -// hipMemcpy2DToArrayAsync[('hipArray*', 'dst'), ('size_t', 'wOffset'), ('size_t', 'hOffset'), ('const void*', 'src'), ('size_t', 'spitch'), ('size_t', 'width'), ('size_t', 'height'), ('hipMemcpyKind', 'kind'), ('hipStream_t', 'stream')] +// hipMemcpy2DToArrayAsync[('hipArray_t', 'dst'), ('size_t', 'wOffset'), ('size_t', 'hOffset'), ('const void*', 'src'), ('size_t', 'spitch'), ('size_t', 'width'), ('size_t', 'height'), ('hipMemcpyKind', 'kind'), ('hipStream_t', 'stream')] case HIP_API_ID_hipMemcpy2DToArrayAsync: - if (data->args.hipMemcpy2DToArrayAsync.dst) data->args.hipMemcpy2DToArrayAsync.dst__val = *(data->args.hipMemcpy2DToArrayAsync.dst); break; // hipMemcpy3D[('const hipMemcpy3DParms*', 'p')] case HIP_API_ID_hipMemcpy3D: @@ -6539,9 +6522,8 @@ static inline void hipApiArgsInit(hip_api_id_t id, hip_api_data_t* data) { // hipMemcpyAsync[('void*', 'dst'), ('const void*', 'src'), ('size_t', 'sizeBytes'), ('hipMemcpyKind', 'kind'), ('hipStream_t', 'stream')] case HIP_API_ID_hipMemcpyAsync: break; -// hipMemcpyAtoH[('void*', 'dst'), ('hipArray*', 'srcArray'), ('size_t', 'srcOffset'), ('size_t', 'count')] +// hipMemcpyAtoH[('void*', 'dst'), ('hipArray_t', 'srcArray'), ('size_t', 'srcOffset'), ('size_t', 'count')] case HIP_API_ID_hipMemcpyAtoH: - if (data->args.hipMemcpyAtoH.srcArray) data->args.hipMemcpyAtoH.srcArray__val = *(data->args.hipMemcpyAtoH.srcArray); break; // hipMemcpyDtoD[('hipDeviceptr_t', 'dst'), ('hipDeviceptr_t', 'src'), ('size_t', 'sizeBytes')] case HIP_API_ID_hipMemcpyDtoD: @@ -6564,9 +6546,8 @@ static inline void hipApiArgsInit(hip_api_id_t id, hip_api_data_t* data) { // hipMemcpyFromSymbolAsync[('void*', 'dst'), ('const void*', 'symbol'), ('size_t', 'sizeBytes'), ('size_t', 'offset'), ('hipMemcpyKind', 'kind'), ('hipStream_t', 'stream')] case HIP_API_ID_hipMemcpyFromSymbolAsync: break; -// hipMemcpyHtoA[('hipArray*', 'dstArray'), ('size_t', 'dstOffset'), ('const void*', 'srcHost'), ('size_t', 'count')] +// hipMemcpyHtoA[('hipArray_t', 'dstArray'), ('size_t', 'dstOffset'), ('const void*', 'srcHost'), ('size_t', 'count')] case HIP_API_ID_hipMemcpyHtoA: - if (data->args.hipMemcpyHtoA.dstArray) data->args.hipMemcpyHtoA.dstArray__val = *(data->args.hipMemcpyHtoA.dstArray); break; // hipMemcpyHtoD[('hipDeviceptr_t', 'dst'), ('void*', 'src'), ('size_t', 'sizeBytes')] case HIP_API_ID_hipMemcpyHtoD: @@ -6588,9 +6569,8 @@ static inline void hipApiArgsInit(hip_api_id_t id, hip_api_data_t* data) { // hipMemcpyPeerAsync[('void*', 'dst'), ('int', 'dstDeviceId'), ('const void*', 'src'), ('int', 'srcDevice'), ('size_t', 'sizeBytes'), ('hipStream_t', 'stream')] case HIP_API_ID_hipMemcpyPeerAsync: break; -// hipMemcpyToArray[('hipArray*', 'dst'), ('size_t', 'wOffset'), ('size_t', 'hOffset'), ('const void*', 'src'), ('size_t', 'count'), ('hipMemcpyKind', 'kind')] +// hipMemcpyToArray[('hipArray_t', 'dst'), ('size_t', 'wOffset'), ('size_t', 'hOffset'), ('const void*', 'src'), ('size_t', 'count'), ('hipMemcpyKind', 'kind')] case HIP_API_ID_hipMemcpyToArray: - if (data->args.hipMemcpyToArray.dst) data->args.hipMemcpyToArray.dst__val = *(data->args.hipMemcpyToArray.dst); break; // hipMemcpyToSymbol[('const void*', 'symbol'), ('const void*', 'src'), ('size_t', 'sizeBytes'), ('size_t', 'offset'), ('hipMemcpyKind', 'kind')] case HIP_API_ID_hipMemcpyToSymbol: @@ -6988,8 +6968,7 @@ static inline const char* hipApiString(hip_api_id_t id, const hip_api_data_t* da oss << "hipArray3DGetDescriptor("; if (data->args.hipArray3DGetDescriptor.pArrayDescriptor == NULL) oss << "pArrayDescriptor=NULL"; else { oss << "pArrayDescriptor="; roctracer::hip_support::detail::operator<<(oss, data->args.hipArray3DGetDescriptor.pArrayDescriptor__val); } - if (data->args.hipArray3DGetDescriptor.array == NULL) oss << ", array=NULL"; - else { oss << ", array="; roctracer::hip_support::detail::operator<<(oss, data->args.hipArray3DGetDescriptor.array__val); } + oss << "array="; roctracer::hip_support::detail::operator<<(oss, data->args.hipArray3DGetDescriptor.array); oss << ")"; break; case HIP_API_ID_hipArrayCreate: @@ -7002,16 +6981,14 @@ static inline const char* hipApiString(hip_api_id_t id, const hip_api_data_t* da break; case HIP_API_ID_hipArrayDestroy: oss << "hipArrayDestroy("; - if (data->args.hipArrayDestroy.array == NULL) oss << "array=NULL"; - else { oss << "array="; roctracer::hip_support::detail::operator<<(oss, data->args.hipArrayDestroy.array__val); } + oss << "array="; roctracer::hip_support::detail::operator<<(oss, data->args.hipArrayDestroy.array); oss << ")"; break; case HIP_API_ID_hipArrayGetDescriptor: oss << "hipArrayGetDescriptor("; if (data->args.hipArrayGetDescriptor.pArrayDescriptor == NULL) oss << "pArrayDescriptor=NULL"; else { oss << "pArrayDescriptor="; roctracer::hip_support::detail::operator<<(oss, data->args.hipArrayGetDescriptor.pArrayDescriptor__val); } - if (data->args.hipArrayGetDescriptor.array == NULL) oss << ", array=NULL"; - else { oss << ", array="; roctracer::hip_support::detail::operator<<(oss, data->args.hipArrayGetDescriptor.array__val); } + oss << ", array="; roctracer::hip_support::detail::operator<<(oss, data->args.hipArrayGetDescriptor.array); oss << ")"; break; case HIP_API_ID_hipArrayGetInfo: @@ -7023,7 +7000,7 @@ static inline const char* hipApiString(hip_api_id_t id, const hip_api_data_t* da if (data->args.hipArrayGetInfo.flags == NULL) oss << ", flags=NULL"; else { oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipArrayGetInfo.flags__val); } if (data->args.hipArrayGetInfo.array == NULL) oss << ", array=NULL"; - else { oss << ", array="; roctracer::hip_support::detail::operator<<(oss, data->args.hipArrayGetInfo.array__val); } + oss << ", array="; roctracer::hip_support::detail::operator<<(oss, data->args.hipArrayGetInfo.array); oss << ")"; break; case HIP_API_ID_hipChooseDevice: @@ -7546,8 +7523,7 @@ static inline const char* hipApiString(hip_api_id_t id, const hip_api_data_t* da break; case HIP_API_ID_hipFreeArray: oss << "hipFreeArray("; - if (data->args.hipFreeArray.array == NULL) oss << "array=NULL"; - else { oss << "array="; roctracer::hip_support::detail::operator<<(oss, data->args.hipFreeArray.array__val); } + oss << "array="; roctracer::hip_support::detail::operator<<(oss, data->args.hipFreeArray.array); oss << ")"; break; case HIP_API_ID_hipFreeAsync: @@ -8903,8 +8879,7 @@ static inline const char* hipApiString(hip_api_id_t id, const hip_api_data_t* da break; case HIP_API_ID_hipMemcpy2DToArray: oss << "hipMemcpy2DToArray("; - if (data->args.hipMemcpy2DToArray.dst == NULL) oss << "dst=NULL"; - else { oss << "dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DToArray.dst__val); } + oss << "dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DToArray.dst); oss << ", wOffset="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DToArray.wOffset); oss << ", hOffset="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DToArray.hOffset); oss << ", src="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DToArray.src); @@ -8916,8 +8891,7 @@ static inline const char* hipApiString(hip_api_id_t id, const hip_api_data_t* da break; case HIP_API_ID_hipMemcpy2DToArrayAsync: oss << "hipMemcpy2DToArrayAsync("; - if (data->args.hipMemcpy2DToArrayAsync.dst == NULL) oss << "dst=NULL"; - else { oss << "dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DToArrayAsync.dst__val); } + oss << "dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DToArrayAsync.dst); oss << ", wOffset="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DToArrayAsync.wOffset); oss << ", hOffset="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DToArrayAsync.hOffset); oss << ", src="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpy2DToArrayAsync.src); @@ -8953,8 +8927,7 @@ static inline const char* hipApiString(hip_api_id_t id, const hip_api_data_t* da case HIP_API_ID_hipMemcpyAtoH: oss << "hipMemcpyAtoH("; oss << "dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyAtoH.dst); - if (data->args.hipMemcpyAtoH.srcArray == NULL) oss << ", srcArray=NULL"; - else { oss << ", srcArray="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyAtoH.srcArray__val); } + oss << ", srcArray="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyAtoH.srcArray); oss << ", srcOffset="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyAtoH.srcOffset); oss << ", count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyAtoH.count); oss << ")"; @@ -9020,8 +8993,7 @@ static inline const char* hipApiString(hip_api_id_t id, const hip_api_data_t* da break; case HIP_API_ID_hipMemcpyHtoA: oss << "hipMemcpyHtoA("; - if (data->args.hipMemcpyHtoA.dstArray == NULL) oss << "dstArray=NULL"; - else { oss << "dstArray="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyHtoA.dstArray__val); } + oss << "dstArray="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyHtoA.dstArray); oss << ", dstOffset="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyHtoA.dstOffset); oss << ", srcHost="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyHtoA.srcHost); oss << ", count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyHtoA.count); @@ -9076,8 +9048,7 @@ static inline const char* hipApiString(hip_api_id_t id, const hip_api_data_t* da break; case HIP_API_ID_hipMemcpyToArray: oss << "hipMemcpyToArray("; - if (data->args.hipMemcpyToArray.dst == NULL) oss << "dst=NULL"; - else { oss << "dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyToArray.dst__val); } + oss << "dst="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyToArray.dst); oss << ", wOffset="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyToArray.wOffset); oss << ", hOffset="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyToArray.hOffset); oss << ", src="; roctracer::hip_support::detail::operator<<(oss, data->args.hipMemcpyToArray.src); diff --git a/hipamd/src/hip_formatting.hpp b/hipamd/src/hip_formatting.hpp index 3239ea717..3430f3c3f 100644 --- a/hipamd/src/hip_formatting.hpp +++ b/hipamd/src/hip_formatting.hpp @@ -755,41 +755,6 @@ inline std::ostream& operator<<(std::ostream& os, const hipResourceDesc* s) { return os; } - -inline std::ostream& operator<<(std::ostream& os, const hipArray& s) { - os << '{' - << s.data - << ',' - << s.desc - << ',' - << s.type - << ',' - << s.width - << ',' - << s.height - << ',' - << s.depth - << ',' - << s.Format - << ',' - << s.NumChannels - << ',' - << s.isDrv - << ',' - << s.textureType - << '}'; - return os; -} - -inline std::ostream& operator<<(std::ostream& os, const hipArray* s) { - if (s) { - os << *s; - } else { - os << "nullptr"; - } - return os; -} - inline std::ostream& operator<<(std::ostream& os, const textureReference& s) { os << '{' << s.normalized diff --git a/hipamd/src/hip_graph.cpp b/hipamd/src/hip_graph.cpp index 95aa925f6..d87b19e40 100644 --- a/hipamd/src/hip_graph.cpp +++ b/hipamd/src/hip_graph.cpp @@ -394,7 +394,7 @@ hipError_t capturehipMemcpy2DFromArrayAsync(hipStream_t& stream, void*& dst, siz return hipSuccess; } -hipError_t capturehipMemcpy2DToArrayAsync(hipStream_t& stream, hipArray*& dst, size_t& wOffset, +hipError_t capturehipMemcpy2DToArrayAsync(hipStream_t& stream, hipArray_t& dst, size_t& wOffset, size_t& hOffset, const void*& src, size_t& spitch, size_t& width, size_t& height, hipMemcpyKind& kind) { ClPrint(amd::LOG_INFO, amd::LOG_API, @@ -477,7 +477,7 @@ hipError_t capturehipMemcpyParam2DAsync(hipStream_t& stream, const hip_Memcpy2D* return hipSuccess; } -hipError_t capturehipMemcpyAtoHAsync(hipStream_t& stream, void*& dstHost, hipArray*& srcArray, +hipError_t capturehipMemcpyAtoHAsync(hipStream_t& stream, void*& dstHost, hipArray_t& srcArray, size_t& srcOffset, size_t& ByteCount) { ClPrint(amd::LOG_INFO, amd::LOG_API, "[hipGraph] Current capture node MemcpyParam2D on stream : %p", stream); @@ -505,7 +505,7 @@ hipError_t capturehipMemcpyAtoHAsync(hipStream_t& stream, void*& dstHost, hipArr return hipSuccess; } -hipError_t capturehipMemcpyHtoAAsync(hipStream_t& stream, hipArray*& dstArray, size_t& dstOffset, +hipError_t capturehipMemcpyHtoAAsync(hipStream_t& stream, hipArray_t& dstArray, size_t& dstOffset, const void*& srcHost, size_t& ByteCount) { ClPrint(amd::LOG_INFO, amd::LOG_API, "[hipGraph] Current capture node MemcpyParam2D on stream : %p", stream); diff --git a/hipamd/src/hip_graph_capture.hpp b/hipamd/src/hip_graph_capture.hpp index 9506ae99c..77a27a5dc 100644 --- a/hipamd/src/hip_graph_capture.hpp +++ b/hipamd/src/hip_graph_capture.hpp @@ -52,14 +52,14 @@ hipError_t capturehipMemcpy2DFromArrayAsync(hipStream_t& stream, void*& dst, siz size_t& hOffsetSrc, size_t& width, size_t& height, hipMemcpyKind& kind); -hipError_t capturehipMemcpy2DToArrayAsync(hipStream_t& stream, hipArray*& dst, size_t& wOffset, +hipError_t capturehipMemcpy2DToArrayAsync(hipStream_t& stream, hipArray_t& dst, size_t& wOffset, size_t& hOffset, const void*& src, size_t& spitch, size_t& width, size_t& height, hipMemcpyKind& kind); -hipError_t capturehipMemcpyAtoHAsync(hipStream_t& stream, void*& dstHost, hipArray*& srcArray, +hipError_t capturehipMemcpyAtoHAsync(hipStream_t& stream, void*& dstHost, hipArray_t& srcArray, size_t& srcOffset, size_t& ByteCount); -hipError_t capturehipMemcpyHtoAAsync(hipStream_t& stream, hipArray*& dstArray, size_t& dstOffset, +hipError_t capturehipMemcpyHtoAAsync(hipStream_t& stream, hipArray_t& dstArray, size_t& dstOffset, const void*& srcHost, size_t& ByteCount); hipError_t capturehipMemcpy3DAsync(hipStream_t& stream, const hipMemcpy3DParms*& p); diff --git a/hipamd/src/hip_graph_helper.hpp b/hipamd/src/hip_graph_helper.hpp index a1d82edb1..b19526739 100644 --- a/hipamd/src/hip_graph_helper.hpp +++ b/hipamd/src/hip_graph_helper.hpp @@ -65,13 +65,13 @@ hipError_t ihipMemset3DCommand(std::vector& commands, hipPitchedP hipError_t ihipMemcpySymbol_validate(const void* symbol, size_t sizeBytes, size_t offset, size_t& sym_size, hipDeviceptr_t& device_ptr); -hipError_t ihipMemcpyAtoDValidate(hipArray* srcArray, void* dstDevice, amd::Coord3D& srcOrigin, +hipError_t ihipMemcpyAtoDValidate(hipArray_t srcArray, void* dstDevice, amd::Coord3D& srcOrigin, amd::Coord3D& dstOrigin, amd::Coord3D& copyRegion, size_t dstRowPitch, size_t dstSlicePitch, amd::Memory*& dstMemory, amd::Image*& srcImage, amd::BufferRect& srcRect, amd::BufferRect& dstRect); -hipError_t ihipMemcpyDtoAValidate(void* srcDevice, hipArray* dstArray, amd::Coord3D& srcOrigin, +hipError_t ihipMemcpyDtoAValidate(void* srcDevice, hipArray_t dstArray, amd::Coord3D& srcOrigin, amd::Coord3D& dstOrigin, amd::Coord3D& copyRegion, size_t srcRowPitch, size_t srcSlicePitch, amd::Image*& dstImage, amd::Memory*& srcMemory, amd::BufferRect& dstRect, @@ -98,17 +98,17 @@ hipError_t ihipMemcpyHtoDValidate(const void* srcHost, void* dstDevice, amd::Coo amd::BufferRect& srcRect, amd::BufferRect& dstRect); -hipError_t ihipMemcpyAtoAValidate(hipArray* srcArray, hipArray* dstArray, amd::Coord3D& srcOrigin, +hipError_t ihipMemcpyAtoAValidate(hipArray_t srcArray, hipArray_t dstArray, amd::Coord3D& srcOrigin, amd::Coord3D& dstOrigin, amd::Coord3D& copyRegion, amd::Image*& srcImage, amd::Image*& dstImage); -hipError_t ihipMemcpyHtoAValidate(const void* srcHost, hipArray* dstArray, amd::Coord3D& srcOrigin, +hipError_t ihipMemcpyHtoAValidate(const void* srcHost, hipArray_t dstArray, amd::Coord3D& srcOrigin, amd::Coord3D& dstOrigin, amd::Coord3D& copyRegion, size_t srcRowPitch, size_t srcSlicePitch, amd::Image*& dstImage, amd::BufferRect& srcRect); -hipError_t ihipMemcpyAtoHValidate(hipArray* srcArray, void* dstHost, amd::Coord3D& srcOrigin, +hipError_t ihipMemcpyAtoHValidate(hipArray_t srcArray, void* dstHost, amd::Coord3D& srcOrigin, amd::Coord3D& dstOrigin, amd::Coord3D& copyRegion, size_t dstRowPitch, size_t dstSlicePitch, amd::Image*& srcImage, amd::BufferRect& dstRect); diff --git a/hipamd/src/hip_internal.hpp b/hipamd/src/hip_internal.hpp index 08be24d7a..7cdf9a86c 100644 --- a/hipamd/src/hip_internal.hpp +++ b/hipamd/src/hip_internal.hpp @@ -67,6 +67,20 @@ typedef struct ihipIpcMemHandle_st { char reserved[IHIP_IPC_MEM_RESERVED_SIZE]; } ihipIpcMemHandle_t; +typedef struct hipArray { + void* data; // FIXME: generalize this + struct hipChannelFormatDesc desc; + unsigned int type; + unsigned int width; + unsigned int height; + unsigned int depth; + enum hipArray_Format Format; + unsigned int NumChannels; + bool isDrv; + unsigned int textureType; + unsigned int flags; +}hipArray; + #define IHIP_IPC_EVENT_HANDLE_SIZE 32 #define IHIP_IPC_EVENT_RESERVED_SIZE LP64_SWITCH(28,24) typedef struct ihipIpcEventHandle_st { diff --git a/hipamd/src/hip_memory.cpp b/hipamd/src/hip_memory.cpp index 233fc1e0a..71445219c 100644 --- a/hipamd/src/hip_memory.cpp +++ b/hipamd/src/hip_memory.cpp @@ -711,7 +711,7 @@ hipError_t hipHostFree(void* ptr) { HIP_RETURN(ihipFree(ptr)); } -hipError_t ihipArrayDestroy(hipArray* array) { +hipError_t ihipArrayDestroy(hipArray_t array) { if (array == nullptr) { return hipErrorInvalidValue; } @@ -737,7 +737,7 @@ hipError_t ihipArrayDestroy(hipArray* array) { return hipSuccess; } -hipError_t hipFreeArray(hipArray* array) { +hipError_t hipFreeArray(hipArray_t array) { HIP_INIT_API(hipFreeArray, array); CHECK_STREAM_CAPTURE_SUPPORTED(); HIP_RETURN(ihipArrayDestroy(array)); @@ -1033,7 +1033,7 @@ amd::Image* ihipImageCreate(const cl_channel_order channelOrder, return image; } -hipError_t ihipArrayCreate(hipArray** array, +hipError_t ihipArrayCreate(hipArray_t* array, const HIP_ARRAY3D_DESCRIPTOR* pAllocateArray, unsigned int numMipmapLevels) { if (array == nullptr) { @@ -1096,7 +1096,7 @@ hipError_t ihipArrayCreate(hipArray** array, return hipSuccess; } -hipError_t hipArrayCreate(hipArray** array, +hipError_t hipArrayCreate(hipArray_t* array, const HIP_ARRAY_DESCRIPTOR* pAllocateArray) { HIP_INIT_API(hipArrayCreate, array, pAllocateArray); if (pAllocateArray == nullptr) { @@ -1114,7 +1114,7 @@ hipError_t hipArrayCreate(hipArray** array, } -hipError_t hipMallocArray(hipArray** array, +hipError_t hipMallocArray(hipArray_t* array, const hipChannelFormatDesc* desc, size_t width, size_t height, @@ -1136,7 +1136,7 @@ hipError_t hipMallocArray(hipArray** array, HIP_RETURN(ihipArrayCreate(array, &allocateArray, 0 /* numMipLevels */)); } -hipError_t hipArray3DCreate(hipArray** array, +hipError_t hipArray3DCreate(hipArray_t* array, const HIP_ARRAY3D_DESCRIPTOR* pAllocateArray) { HIP_INIT_API(hipArray3DCreate, array, pAllocateArray); CHECK_STREAM_CAPTURE_SUPPORTED(); @@ -1522,7 +1522,7 @@ hipError_t hipMemcpyDtoHAsync(void* dstHost, hipDeviceptr_t srcDevice, size_t By ihipMemcpy(dstHost, srcDevice, ByteCount, kind, *hip_stream, true)); } -hipError_t ihipMemcpyAtoDValidate(hipArray* srcArray, void* dstDevice, amd::Coord3D& srcOrigin, +hipError_t ihipMemcpyAtoDValidate(hipArray_t srcArray, void* dstDevice, amd::Coord3D& srcOrigin, amd::Coord3D& dstOrigin, amd::Coord3D& copyRegion, size_t dstRowPitch, size_t dstSlicePitch, amd::Memory*& dstMemory, amd::Image*& srcImage, @@ -1564,7 +1564,7 @@ hipError_t ihipMemcpyAtoDValidate(hipArray* srcArray, void* dstDevice, amd::Coor return hipSuccess; } -hipError_t ihipMemcpyAtoDCommand(amd::Command*& command, hipArray* srcArray, void* dstDevice, +hipError_t ihipMemcpyAtoDCommand(amd::Command*& command, hipArray_t srcArray, void* dstDevice, amd::Coord3D srcOrigin, amd::Coord3D dstOrigin, amd::Coord3D copyRegion, size_t dstRowPitch, size_t dstSlicePitch, hip::Stream* stream) { @@ -1595,7 +1595,7 @@ hipError_t ihipMemcpyAtoDCommand(amd::Command*& command, hipArray* srcArray, voi return hipSuccess; } -hipError_t ihipMemcpyDtoAValidate(void* srcDevice, hipArray* dstArray, amd::Coord3D& srcOrigin, +hipError_t ihipMemcpyDtoAValidate(void* srcDevice, hipArray_t dstArray, amd::Coord3D& srcOrigin, amd::Coord3D& dstOrigin, amd::Coord3D& copyRegion, size_t srcRowPitch, size_t srcSlicePitch, amd::Image*& dstImage, amd::Memory*& srcMemory, amd::BufferRect& dstRect, @@ -1637,7 +1637,7 @@ hipError_t ihipMemcpyDtoAValidate(void* srcDevice, hipArray* dstArray, amd::Coor return hipSuccess; } -hipError_t ihipMemcpyDtoACommand(amd::Command*& command, void* srcDevice, hipArray* dstArray, +hipError_t ihipMemcpyDtoACommand(amd::Command*& command, void* srcDevice, hipArray_t dstArray, amd::Coord3D srcOrigin, amd::Coord3D dstOrigin, amd::Coord3D copyRegion, size_t srcRowPitch, size_t srcSlicePitch, hip::Stream* stream) { @@ -1906,7 +1906,7 @@ hipError_t ihipMemcpyHtoH(const void* srcHost, void* dstHost, amd::Coord3D srcOr return hipSuccess; } -hipError_t ihipMemcpyAtoAValidate(hipArray* srcArray, hipArray* dstArray, amd::Coord3D& srcOrigin, +hipError_t ihipMemcpyAtoAValidate(hipArray_t srcArray, hipArray_t dstArray, amd::Coord3D& srcOrigin, amd::Coord3D& dstOrigin, amd::Coord3D& copyRegion, amd::Image*& srcImage, amd::Image*& dstImage) { if (dstArray == nullptr || srcArray == nullptr) { @@ -1937,7 +1937,7 @@ hipError_t ihipMemcpyAtoAValidate(hipArray* srcArray, hipArray* dstArray, amd::C return hipSuccess; } -hipError_t ihipMemcpyAtoACommand(amd::Command*& command, hipArray* srcArray, hipArray* dstArray, +hipError_t ihipMemcpyAtoACommand(amd::Command*& command, hipArray_t srcArray, hipArray_t dstArray, amd::Coord3D srcOrigin, amd::Coord3D dstOrigin, amd::Coord3D copyRegion, hip::Stream* stream) { amd::Image* srcImage; @@ -1965,7 +1965,7 @@ hipError_t ihipMemcpyAtoACommand(amd::Command*& command, hipArray* srcArray, hip return hipSuccess; } -hipError_t ihipMemcpyHtoAValidate(const void* srcHost, hipArray* dstArray, +hipError_t ihipMemcpyHtoAValidate(const void* srcHost, hipArray_t dstArray, amd::Coord3D& srcOrigin, amd::Coord3D& dstOrigin, amd::Coord3D& copyRegion, size_t srcRowPitch, size_t srcSlicePitch, amd::Image*& dstImage, @@ -1995,7 +1995,7 @@ hipError_t ihipMemcpyHtoAValidate(const void* srcHost, hipArray* dstArray, return hipSuccess; } -hipError_t ihipMemcpyHtoACommand(amd::Command*& command, const void* srcHost, hipArray* dstArray, +hipError_t ihipMemcpyHtoACommand(amd::Command*& command, const void* srcHost, hipArray_t dstArray, amd::Coord3D srcOrigin, amd::Coord3D dstOrigin, amd::Coord3D copyRegion, size_t srcRowPitch, size_t srcSlicePitch, hip::Stream* stream, bool isAsync = false) { @@ -2026,7 +2026,7 @@ hipError_t ihipMemcpyHtoACommand(amd::Command*& command, const void* srcHost, hi return hipSuccess; } -hipError_t ihipMemcpyAtoHValidate(hipArray* srcArray, void* dstHost, amd::Coord3D& srcOrigin, +hipError_t ihipMemcpyAtoHValidate(hipArray_t srcArray, void* dstHost, amd::Coord3D& srcOrigin, amd::Coord3D& dstOrigin, amd::Coord3D& copyRegion, size_t dstRowPitch, size_t dstSlicePitch, amd::Image*& srcImage, amd::BufferRect& dstRect) { @@ -2056,7 +2056,7 @@ hipError_t ihipMemcpyAtoHValidate(hipArray* srcArray, void* dstHost, amd::Coord3 return hipSuccess; } -hipError_t ihipMemcpyAtoHCommand(amd::Command*& command, hipArray* srcArray, void* dstHost, +hipError_t ihipMemcpyAtoHCommand(amd::Command*& command, hipArray_t srcArray, void* dstHost, amd::Coord3D srcOrigin, amd::Coord3D dstOrigin, amd::Coord3D copyRegion, size_t dstRowPitch, size_t dstSlicePitch, hip::Stream* stream, bool isAsync = false) { @@ -2481,7 +2481,7 @@ hipError_t ihipMemcpy2DToArray(hipArray_t dst, size_t wOffset, size_t hOffset, c return ihipMemcpyParam2D(&desc, stream, isAsync); } -hipError_t hipMemcpy2DToArray_common(hipArray* dst, size_t wOffset, size_t hOffset, +hipError_t hipMemcpy2DToArray_common(hipArray_t dst, size_t wOffset, size_t hOffset, const void* src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind, hipStream_t stream=nullptr, bool isAsync = false) { @@ -2499,20 +2499,20 @@ hipError_t hipMemcpy2DToArray_common(hipArray* dst, size_t wOffset, size_t hOffs return ihipMemcpy2DToArray(dst, wOffset, hOffset, src, spitch, width, height, kind, stream, isAsync); } -hipError_t hipMemcpy2DToArray(hipArray* dst, size_t wOffset, size_t hOffset, const void* src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind) { +hipError_t hipMemcpy2DToArray(hipArray_t dst, size_t wOffset, size_t hOffset, const void* src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind) { HIP_INIT_API(hipMemcpy2DToArray, dst, wOffset, hOffset, src, spitch, width, height, kind); CHECK_STREAM_CAPTURING(); HIP_RETURN_DURATION(hipMemcpy2DToArray_common(dst, wOffset, hOffset, src, spitch, width, height, kind)); } -hipError_t hipMemcpy2DToArray_spt(hipArray* dst, size_t wOffset, size_t hOffset, const void* src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind) { +hipError_t hipMemcpy2DToArray_spt(hipArray_t dst, size_t wOffset, size_t hOffset, const void* src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind) { HIP_INIT_API(hipMemcpy2DToArray, dst, wOffset, hOffset, src, spitch, width, height, kind); CHECK_STREAM_CAPTURING(); HIP_RETURN_DURATION(hipMemcpy2DToArray_common(dst, wOffset, hOffset, src, spitch, width, height, kind, getPerThreadDefaultStream())); } -hipError_t hipMemcpyToArray(hipArray* dst, size_t wOffset, size_t hOffset, const void* src, size_t count, hipMemcpyKind kind) { +hipError_t hipMemcpyToArray(hipArray_t dst, size_t wOffset, size_t hOffset, const void* src, size_t count, hipMemcpyKind kind) { HIP_INIT_API(hipMemcpyToArray, dst, wOffset, hOffset, src, count, kind); CHECK_STREAM_CAPTURING(); if (dst == nullptr) { @@ -2582,7 +2582,7 @@ hipError_t hipMemcpyFromArray_spt(void* dst, hipArray_const_t src, size_t wOffse getPerThreadDefaultStream())); } -hipError_t ihipMemcpyAtoD(hipArray* srcArray, void* dstDevice, amd::Coord3D srcOrigin, +hipError_t ihipMemcpyAtoD(hipArray_t srcArray, void* dstDevice, amd::Coord3D srcOrigin, amd::Coord3D dstOrigin, amd::Coord3D copyRegion, size_t dstRowPitch, size_t dstSlicePitch, hipStream_t stream, bool isAsync = false) { amd::Command* command; @@ -2596,7 +2596,7 @@ hipError_t ihipMemcpyAtoD(hipArray* srcArray, void* dstDevice, amd::Coord3D srcO if (status != hipSuccess) return status; return ihipMemcpyCmdEnqueue(command, isAsync); } -hipError_t ihipMemcpyDtoA(void* srcDevice, hipArray* dstArray, amd::Coord3D srcOrigin, +hipError_t ihipMemcpyDtoA(void* srcDevice, hipArray_t dstArray, amd::Coord3D srcOrigin, amd::Coord3D dstOrigin, amd::Coord3D copyRegion, size_t srcRowPitch, size_t srcSlicePitch, hipStream_t stream, bool isAsync = false) { amd::Command* command; @@ -2655,7 +2655,7 @@ hipError_t ihipMemcpyHtoD(const void* srcHost, void* dstDevice, amd::Coord3D src if (status != hipSuccess) return status; return ihipMemcpyCmdEnqueue(command, isAsync); } -hipError_t ihipMemcpyAtoA(hipArray* srcArray, hipArray* dstArray, amd::Coord3D srcOrigin, +hipError_t ihipMemcpyAtoA(hipArray_t srcArray, hipArray_t dstArray, amd::Coord3D srcOrigin, amd::Coord3D dstOrigin, amd::Coord3D copyRegion, hipStream_t stream, bool isAsync = false) { amd::Command* command; @@ -2668,7 +2668,7 @@ hipError_t ihipMemcpyAtoA(hipArray* srcArray, hipArray* dstArray, amd::Coord3D s if (status != hipSuccess) return status; return ihipMemcpyCmdEnqueue(command, isAsync); } -hipError_t ihipMemcpyHtoA(const void* srcHost, hipArray* dstArray, amd::Coord3D srcOrigin, +hipError_t ihipMemcpyHtoA(const void* srcHost, hipArray_t dstArray, amd::Coord3D srcOrigin, amd::Coord3D dstOrigin, amd::Coord3D copyRegion, size_t srcRowPitch, size_t srcSlicePitch, hipStream_t stream, bool isAsync = false) { amd::Command* command; @@ -2682,7 +2682,7 @@ hipError_t ihipMemcpyHtoA(const void* srcHost, hipArray* dstArray, amd::Coord3D if (status != hipSuccess) return status; return ihipMemcpyCmdEnqueue(command, isAsync); } -hipError_t ihipMemcpyAtoH(hipArray* srcArray, void* dstHost, amd::Coord3D srcOrigin, +hipError_t ihipMemcpyAtoH(hipArray_t srcArray, void* dstHost, amd::Coord3D srcOrigin, amd::Coord3D dstOrigin, amd::Coord3D copyRegion, size_t dstRowPitch, size_t dstSlicePitch, hipStream_t stream, bool isAsync = false) { amd::Command* command; @@ -2697,7 +2697,7 @@ hipError_t ihipMemcpyAtoH(hipArray* srcArray, void* dstHost, amd::Coord3D srcOri return ihipMemcpyCmdEnqueue(command, isAsync); } -hipError_t hipMemcpyHtoA(hipArray* dstArray, +hipError_t hipMemcpyHtoA(hipArray_t dstArray, size_t dstOffset, const void* srcHost, size_t ByteCount) { @@ -2707,7 +2707,7 @@ hipError_t hipMemcpyHtoA(hipArray* dstArray, } hipError_t hipMemcpyAtoH(void* dstHost, - hipArray* srcArray, + hipArray_t srcArray, size_t srcOffset, size_t ByteCount) { HIP_INIT_API(hipMemcpyAtoH, dstHost, srcArray, srcOffset, ByteCount); @@ -3674,14 +3674,14 @@ hipError_t hipDrvPointerGetAttributes(unsigned int numAttributes, hipPointer_att } // ================================================================================================ -hipError_t hipArrayDestroy(hipArray* array) { +hipError_t hipArrayDestroy(hipArray_t array) { HIP_INIT_API(hipArrayDestroy, array); CHECK_STREAM_CAPTURE_SUPPORTED(); HIP_RETURN(ihipArrayDestroy(array)); } hipError_t ihipArray3DGetDescriptor(HIP_ARRAY3D_DESCRIPTOR* desc, - hipArray* array) { + hipArray_t array) { { amd::ScopedLock lock(hip::hipArraySetLock); if (hip::hipArraySet.find(array) == hip::hipArraySet.end()) { @@ -3702,7 +3702,7 @@ hipError_t ihipArray3DGetDescriptor(HIP_ARRAY3D_DESCRIPTOR* desc, hipError_t hipArrayGetInfo(hipChannelFormatDesc* desc, hipExtent* extent, unsigned int* flags, - hipArray* array) { + hipArray_t array) { HIP_INIT_API(hipArrayGetInfo, desc, extent, flags, array); CHECK_STREAM_CAPTURE_SUPPORTED(); @@ -3739,7 +3739,7 @@ hipError_t hipArrayGetInfo(hipChannelFormatDesc* desc, } hipError_t hipArrayGetDescriptor(HIP_ARRAY_DESCRIPTOR* pArrayDescriptor, - hipArray* array) { + hipArray_t array) { HIP_INIT_API(hipArrayGetDescriptor, pArrayDescriptor, array); CHECK_STREAM_CAPTURE_SUPPORTED(); @@ -3766,7 +3766,7 @@ hipError_t hipArrayGetDescriptor(HIP_ARRAY_DESCRIPTOR* pArrayDescriptor, } hipError_t hipArray3DGetDescriptor(HIP_ARRAY3D_DESCRIPTOR* pArrayDescriptor, - hipArray* array) { + hipArray_t array) { HIP_INIT_API(hipArray3DGetDescriptor, pArrayDescriptor, array); CHECK_STREAM_CAPTURE_SUPPORTED(); @@ -3875,14 +3875,14 @@ hipError_t hipMemcpy2DFromArrayAsync_spt(void* dst, size_t dpitch, hipArray_cons HIP_RETURN_DURATION(hipMemcpy2DFromArray_common(dst, dpitch, src, wOffsetSrc, hOffsetSrc, width, height, kind, stream, true)); } -hipError_t hipMemcpy2DToArrayAsync(hipArray* dst, size_t wOffset, size_t hOffset, const void* src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind, hipStream_t stream) { +hipError_t hipMemcpy2DToArrayAsync(hipArray_t dst, size_t wOffset, size_t hOffset, const void* src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind, hipStream_t stream) { HIP_INIT_API(hipMemcpy2DToArrayAsync, dst, wOffset, hOffset, src, spitch, width, height, kind, stream); STREAM_CAPTURE(hipMemcpy2DToArrayAsync, stream, dst, wOffset, hOffset, src, spitch, width, height, kind); HIP_RETURN_DURATION(hipMemcpy2DToArray_common(dst, wOffset, hOffset, src, spitch, width, height, kind, stream, true)); } -hipError_t hipMemcpy2DToArrayAsync_spt(hipArray* dst, size_t wOffset, size_t hOffset, const void* src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind, hipStream_t stream) { +hipError_t hipMemcpy2DToArrayAsync_spt(hipArray_t dst, size_t wOffset, size_t hOffset, const void* src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind, hipStream_t stream) { HIP_INIT_API(hipMemcpy2DToArrayAsync, dst, wOffset, hOffset, src, spitch, width, height, kind, stream); PER_THREAD_DEFAULT_STREAM(stream); STREAM_CAPTURE(hipMemcpy2DToArrayAsync, stream, dst, wOffset, hOffset, src, spitch, width, height, @@ -3890,9 +3890,9 @@ hipError_t hipMemcpy2DToArrayAsync_spt(hipArray* dst, size_t wOffset, size_t hOf HIP_RETURN_DURATION(hipMemcpy2DToArray_common(dst, wOffset, hOffset, src, spitch, width, height, kind, stream, true)); } -hipError_t hipMemcpyAtoA(hipArray* dstArray, +hipError_t hipMemcpyAtoA(hipArray_t dstArray, size_t dstOffset, - hipArray* srcArray, + hipArray_t srcArray, size_t srcOffset, size_t ByteCount) { HIP_INIT_API(hipMemcpyAtoA, dstArray, dstOffset, srcArray, srcOffset, ByteCount); @@ -3901,7 +3901,7 @@ hipError_t hipMemcpyAtoA(hipArray* dstArray, } hipError_t hipMemcpyAtoD(hipDeviceptr_t dstDevice, - hipArray* srcArray, + hipArray_t srcArray, size_t srcOffset, size_t ByteCount) { HIP_INIT_API(hipMemcpyAtoD, dstDevice, srcArray, srcOffset, ByteCount); @@ -3910,7 +3910,7 @@ hipError_t hipMemcpyAtoD(hipDeviceptr_t dstDevice, } hipError_t hipMemcpyAtoHAsync(void* dstHost, - hipArray* srcArray, + hipArray_t srcArray, size_t srcOffset, size_t ByteCount, hipStream_t stream) { @@ -3919,7 +3919,7 @@ hipError_t hipMemcpyAtoHAsync(void* dstHost, HIP_RETURN_DURATION(ihipMemcpyAtoH(srcArray, dstHost, {srcOffset, 0, 0}, {0, 0, 0}, {ByteCount, 1, 1}, 0, 0, stream, true)); } -hipError_t hipMemcpyDtoA(hipArray* dstArray, +hipError_t hipMemcpyDtoA(hipArray_t dstArray, size_t dstOffset, hipDeviceptr_t srcDevice, size_t ByteCount) { @@ -3928,7 +3928,7 @@ hipError_t hipMemcpyDtoA(hipArray* dstArray, HIP_RETURN_DURATION(ihipMemcpyDtoA(srcDevice, dstArray, {0, 0, 0}, {dstOffset, 0, 0}, {ByteCount, 1, 1}, 0, 0, nullptr)); } -hipError_t hipMemcpyHtoAAsync(hipArray* dstArray, +hipError_t hipMemcpyHtoAAsync(hipArray_t dstArray, size_t dstOffset, const void* srcHost, size_t ByteCount, From c4f3303925d5437e60d0a928b3f080abfcf4ab77 Mon Sep 17 00:00:00 2001 From: sdashmiz Date: Wed, 2 Aug 2023 15:46:58 -0400 Subject: [PATCH 15/27] SWDEV-371332 - [ABI Break] Return success for unregistered ptr Signed-off-by: sdashmiz Change-Id: Ie58d16420578e7118997eb40a9fd6a6641b666f3 --- hipamd/src/hip_memory.cpp | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/hipamd/src/hip_memory.cpp b/hipamd/src/hip_memory.cpp index 71445219c..0d7af8267 100644 --- a/hipamd/src/hip_memory.cpp +++ b/hipamd/src/hip_memory.cpp @@ -3411,10 +3411,16 @@ hipError_t hipPointerGetAttributes(hipPointerAttribute_t* attributes, const void attributes->type = hipMemoryTypeManaged; } HIP_RETURN(hipSuccess); + } else { + attributes->type = hipMemoryTypeUnregistered; + attributes->devicePointer = nullptr; + attributes->hostPointer = nullptr; + attributes->isManaged = false; + attributes->allocationFlags = 0; + attributes->device = hipInvalidDeviceId; + LogPrintfError("Cannot get amd_mem_obj for ptr: 0x%x \n", ptr); } - - LogPrintfError("Cannot get amd_mem_obj for ptr: %p \n", ptr); - HIP_RETURN(hipErrorInvalidValue); + HIP_RETURN(hipSuccess); } // ================================================================================================ From 037fae936b861f2c899acb6650d84649c2cf436c Mon Sep 17 00:00:00 2001 From: taosang2 Date: Wed, 2 Aug 2023 17:42:19 -0400 Subject: [PATCH 16/27] SWDEV-415029 - [ABI Break] Remove hcc symbols Change-Id: Ic0082d3960aadbb7ac559141b6e84bd29a75b5cd --- hipamd/hip-config-amd.cmake | 2 +- .../include/hip/amd_detail/amd_hip_runtime_pt_api.h | 4 ++-- hipamd/include/hip/amd_detail/host_defines.h | 4 ---- hipamd/packaging/hip-devel.postinst | 12 ------------ hipamd/packaging/hip-devel.prerm | 8 -------- 5 files changed, 3 insertions(+), 27 deletions(-) diff --git a/hipamd/hip-config-amd.cmake b/hipamd/hip-config-amd.cmake index 7c86f2586..601cc57fd 100755 --- a/hipamd/hip-config-amd.cmake +++ b/hipamd/hip-config-amd.cmake @@ -84,7 +84,7 @@ endif() set(_IMPORT_PREFIX ${HIP_PACKAGE_PREFIX_DIR}) # Right now this is only supported for amd platforms set_target_properties(hip::host PROPERTIES - INTERFACE_COMPILE_DEFINITIONS "__HIP_PLATFORM_HCC__=1;__HIP_PLATFORM_AMD__=1" + INTERFACE_COMPILE_DEFINITIONS "__HIP_PLATFORM_AMD__=1" ) set_target_properties(hip::amdhip64 PROPERTIES diff --git a/hipamd/include/hip/amd_detail/amd_hip_runtime_pt_api.h b/hipamd/include/hip/amd_detail/amd_hip_runtime_pt_api.h index 070c58bd3..19e767a0d 100644 --- a/hipamd/include/hip/amd_detail/amd_hip_runtime_pt_api.h +++ b/hipamd/include/hip/amd_detail/amd_hip_runtime_pt_api.h @@ -25,7 +25,7 @@ THE SOFTWARE. #ifndef HIP_INCLUDE_HIP_HIP_RUNTIME_PT_API_H #define HIP_INCLUDE_HIP_HIP_RUNTIME_PT_API_H -#if (defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__)) && !(defined(__HIP_PLATFORM_NVCC__) || defined(__HIP_PLATFORM_NVIDIA__)) +#if defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__) /// hipStreamPerThread implementation #if defined(HIP_API_PER_THREAD_DEFAULT_STREAM) @@ -192,5 +192,5 @@ hipError_t hipLaunchHostFunc_spt(hipStream_t stream, hipHostFn_t fn, void* userD } #endif // extern "C" -#endif //(defined(__HIP_PLATFORM_HCC__) || defined(__HIP_PLATFORM_AMD__)) && !(defined(__HIP_PLATFORM_NVCC__) || defined(__HIP_PLATFORM_NVIDIA__)) +#endif //defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__) #endif //HIP_INCLUDE_HIP_HIP_RUNTIME_PT_API_H diff --git a/hipamd/include/hip/amd_detail/host_defines.h b/hipamd/include/hip/amd_detail/host_defines.h index 8caed1d18..0fad2b470 100644 --- a/hipamd/include/hip/amd_detail/host_defines.h +++ b/hipamd/include/hip/amd_detail/host_defines.h @@ -28,10 +28,6 @@ THE SOFTWARE. #ifndef HIP_INCLUDE_HIP_AMD_DETAIL_HOST_DEFINES_H #define HIP_INCLUDE_HIP_AMD_DETAIL_HOST_DEFINES_H -// The follow macro should be removed after upstream updation. -// It's defined here for workarround of rocThrust building failure. -#define HIP_INCLUDE_HIP_HCC_DETAIL_HOST_DEFINES_H - // Add guard to Generic Grid Launch method #ifndef GENERIC_GRID_LAUNCH #define GENERIC_GRID_LAUNCH 1 diff --git a/hipamd/packaging/hip-devel.postinst b/hipamd/packaging/hip-devel.postinst index 9b32b0885..9887cb9a0 100755 --- a/hipamd/packaging/hip-devel.postinst +++ b/hipamd/packaging/hip-devel.postinst @@ -21,18 +21,6 @@ ROCMDIR=@ROCM_PATH@ HIPINCDIR=$ROCMDIR/@CMAKE_INSTALL_INCLUDEDIR@/hip CURRENTDIR=`pwd` -# The following will be removed after upstream updation -cd $HIPINCDIR -ln -r -s -f amd_detail hcc_detail -ln -r -s -f nvidia_detail nvcc_detail -cd $CURRENTDIR #FILE_REORG_BACKWARD_COMPATIBILITY HIPINCDIR=$ROCMDIR/hip/include/hip -if [ -d $HIPINCDIR ]; then - # The following will be removed after upstream updation - cd $HIPINCDIR - ln -r -s -f amd_detail hcc_detail - ln -r -s -f nvidia_detail nvcc_detail - cd $CURRENTDIR -fi diff --git a/hipamd/packaging/hip-devel.prerm b/hipamd/packaging/hip-devel.prerm index 9dabd4d45..c8b7114f3 100755 --- a/hipamd/packaging/hip-devel.prerm +++ b/hipamd/packaging/hip-devel.prerm @@ -23,19 +23,11 @@ CURRENTDIR=`pwd` HIPINCDIR=$ROCMDIR/@CMAKE_INSTALL_INCLUDEDIR@/hip ([ ! -d $HIPINCDIR ]) && exit 0 -cd $HIPINCDIR -rm hcc_detail -rm nvcc_detail -cd $CURRENTDIR #FILE_REORG_BACKWARD_COMPATIBILITY #backward copatibility code , to be removed later HIPDIR=$ROCMDIR/hip HIPINCDIR=$ROCMDIR/hip/include/hip ([ ! -d $HIPINCDIR ]) && exit 0 -cd $HIPINCDIR -rm -f hcc_detail -rm -f nvcc_detail -cd $CURRENTDIR ([ ! -d $HIPDIR ]) && exit 0 rmdir --ignore-fail-on-non-empty $HIPDIR From 50b6ef26ec8918ce16c55a71cfad2ec0cd08ac7d Mon Sep 17 00:00:00 2001 From: Satyanvesh Dittakavi Date: Tue, 7 Nov 2023 09:15:32 +0000 Subject: [PATCH 17/27] SWDEV-427522 - Address MIGraphX failures due to unsafe buffer access usage Compiler seem to be stricter in compiler staging builds related to safe buffer programming when compared to other component staging builds. This seem to result in additional errors when -Werror is enabled in MIGraphX tests. Removes all the clang pragmas to ignore several type of warnings in all the headers and adds a single pragma which ignores all warnings using Change-Id: I95f302bb285b2451b19dd5dfdb7df29164b0f750 (cherry picked from commit b5d286a6d3dca489777c505fa0a187a890110d6c) --- .../hip/amd_detail/amd_channel_descriptor.h | 9 ------- .../hip/amd_detail/amd_device_functions.h | 27 ------------------- .../include/hip/amd_detail/amd_hip_atomic.h | 12 --------- .../include/hip/amd_detail/amd_hip_complex.h | 16 ----------- .../amd_detail/amd_hip_cooperative_groups.h | 13 --------- hipamd/include/hip/amd_detail/amd_hip_fp16.h | 18 ------------- .../hip/amd_detail/amd_hip_unsafe_atomics.h | 9 ------- .../hip/amd_detail/amd_math_functions.h | 10 ------- .../hip/amd_detail/amd_surface_functions.h | 17 ------------ .../hip/amd_detail/amd_warp_functions.h | 15 ----------- .../hip/amd_detail/device_library_decls.h | 11 -------- .../hip_cooperative_groups_helper.h | 11 -------- hipamd/include/hip/amd_detail/math_fwd.h | 10 ------- hipamd/include/hip/amd_detail/ockl_image.h | 9 ------- .../hip/amd_detail/texture_fetch_functions.h | 16 ----------- .../amd_detail/texture_indirect_functions.h | 15 ----------- hipamd/src/hiprtc/cmake/HIPRTC.cmake | 10 ++++--- 17 files changed, 7 insertions(+), 221 deletions(-) diff --git a/hipamd/include/hip/amd_detail/amd_channel_descriptor.h b/hipamd/include/hip/amd_detail/amd_channel_descriptor.h index f5ba75ebe..c6b150d4b 100644 --- a/hipamd/include/hip/amd_detail/amd_channel_descriptor.h +++ b/hipamd/include/hip/amd_detail/amd_channel_descriptor.h @@ -23,11 +23,6 @@ THE SOFTWARE. #ifndef HIP_INCLUDE_HIP_AMD_DETAIL_CHANNEL_DESCRIPTOR_H #define HIP_INCLUDE_HIP_AMD_DETAIL_CHANNEL_DESCRIPTOR_H -#if defined(__clang__) -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wold-style-cast" -#endif - #if !defined(__HIPCC_RTC__) #include #include @@ -360,8 +355,4 @@ struct hipChannelFormatDesc hipCreateChannelDesc(int x, int y, int z, int w, #endif /* __cplusplus */ -#if defined(__clang__) -#pragma clang diagnostic pop -#endif - #endif /* !HIP_INCLUDE_HIP_AMD_DETAIL_CHANNEL_DESCRIPTOR_H */ diff --git a/hipamd/include/hip/amd_detail/amd_device_functions.h b/hipamd/include/hip/amd_detail/amd_device_functions.h index 5c2945a1f..19f39e16e 100644 --- a/hipamd/include/hip/amd_detail/amd_device_functions.h +++ b/hipamd/include/hip/amd_detail/amd_device_functions.h @@ -23,22 +23,6 @@ THE SOFTWARE. #ifndef HIP_INCLUDE_HIP_AMD_DETAIL_DEVICE_FUNCTIONS_H #define HIP_INCLUDE_HIP_AMD_DETAIL_DEVICE_FUNCTIONS_H -#if defined(__clang__) -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wreserved-macro-identifier" -#pragma clang diagnostic ignored "-Wreserved-identifier" -#pragma clang diagnostic ignored "-Wsign-conversion" -#pragma clang diagnostic ignored "-Wc++98-compat-pedantic" -#pragma clang diagnostic ignored "-Wold-style-cast" -#pragma clang diagnostic ignored "-Wshorten-64-to-32" -#pragma clang diagnostic ignored "-Wimplicit-int-conversion" -#pragma clang diagnostic ignored "-Wimplicit-float-conversion" -#pragma clang diagnostic ignored "-Wmissing-noreturn" -#pragma clang diagnostic ignored "-Wimplicit-fallthrough" -#pragma clang diagnostic ignored "-Wunneeded-internal-declaration" -#pragma clang diagnostic ignored "-Wshift-count-overflow" -#endif - #if !defined(__HIPCC_RTC__) #include #include "host_defines.h" @@ -459,10 +443,6 @@ __device__ static inline unsigned long long int __double2ull_ru(double x) { __device__ static inline unsigned long long int __double2ull_rz(double x) { return (unsigned long long int)x; } -#if defined(__clang__) -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wc++98-compat-pedantic" -#endif __device__ static inline long long int __double_as_longlong(double x) { static_assert(sizeof(long long) == sizeof(double), ""); @@ -471,9 +451,6 @@ __device__ static inline long long int __double_as_longlong(double x) { return tmp; } -#if defined(__clang__) -#pragma clang diagnostic pop -#endif /* __device__ unsigned short __float2half_rn(float x); @@ -1121,8 +1098,4 @@ static inline __device__ void* memset(void* ptr, int val, size_t size) { } #endif // !__OPENMP_AMDGCN__ -#if defined(__clang__) -#pragma clang diagnostic pop -#endif - #endif diff --git a/hipamd/include/hip/amd_detail/amd_hip_atomic.h b/hipamd/include/hip/amd_detail/amd_hip_atomic.h index 3f55831f7..c02a57b07 100644 --- a/hipamd/include/hip/amd_detail/amd_hip_atomic.h +++ b/hipamd/include/hip/amd_detail/amd_hip_atomic.h @@ -22,14 +22,6 @@ THE SOFTWARE. #pragma once -#if defined(__clang__) -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wc++98-compat" -#pragma clang diagnostic ignored "-Wold-style-cast" -#pragma clang diagnostic ignored "-Wc++98-compat-pedantic" -#pragma clang diagnostic ignored "-Wsign-conversion" -#endif - #if !defined(__HIPCC_RTC__) #include "amd_device_functions.h" #endif @@ -1635,8 +1627,4 @@ unsigned long long atomicXor( return __atomic_fetch_xor(address, val, __ATOMIC_RELAXED); } -#if defined(__clang__) -#pragma clang diagnostic pop -#endif - #endif // __hip_atomic_compare_exchange_strong diff --git a/hipamd/include/hip/amd_detail/amd_hip_complex.h b/hipamd/include/hip/amd_detail/amd_hip_complex.h index 933fd4e16..fc8208cad 100644 --- a/hipamd/include/hip/amd_detail/amd_hip_complex.h +++ b/hipamd/include/hip/amd_detail/amd_hip_complex.h @@ -23,18 +23,6 @@ THE SOFTWARE. #ifndef HIP_INCLUDE_HIP_AMD_DETAIL_HIP_COMPLEX_H #define HIP_INCLUDE_HIP_AMD_DETAIL_HIP_COMPLEX_H -#if defined(__clang__) -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wfloat-equal" -#pragma clang diagnostic ignored "-Wc++98-compat" -#pragma clang diagnostic ignored "-Wold-style-cast" -#pragma clang diagnostic ignored "-Wreserved-macro-identifier" -#pragma clang diagnostic ignored "-Wimplicit-int-float-conversion" -#pragma clang diagnostic ignored "-Wimplicit-float-conversion" -#pragma clang diagnostic ignored "-Wdouble-promotion" -#pragma clang diagnostic ignored "-Wc++98-compat-pedantic" -#endif - #if !defined(__HIPCC_RTC__) #include "hip/amd_detail/amd_hip_vector_types.h" #endif @@ -325,8 +313,4 @@ __HOST_DEVICE__ static inline hipDoubleComplex hipCfma(hipDoubleComplex p, hipDo return make_hipDoubleComplex(real, imag); } -#if defined(__clang__) -#pragma clang diagnostic pop -#endif - #endif //HIP_INCLUDE_HIP_AMD_DETAIL_HIP_COMPLEX_H diff --git a/hipamd/include/hip/amd_detail/amd_hip_cooperative_groups.h b/hipamd/include/hip/amd_detail/amd_hip_cooperative_groups.h index 04ecca70d..eeb67bd07 100644 --- a/hipamd/include/hip/amd_detail/amd_hip_cooperative_groups.h +++ b/hipamd/include/hip/amd_detail/amd_hip_cooperative_groups.h @@ -32,15 +32,6 @@ THE SOFTWARE. #ifndef HIP_INCLUDE_HIP_AMD_DETAIL_HIP_COOPERATIVE_GROUPS_H #define HIP_INCLUDE_HIP_AMD_DETAIL_HIP_COOPERATIVE_GROUPS_H -#if defined(__clang__) -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wc++98-compat" -#pragma clang diagnostic ignored "-Wsign-conversion" -#pragma clang diagnostic ignored "-Wunused-parameter" -#pragma clang diagnostic ignored "-Wreserved-macro-identifier" -#pragma clang diagnostic ignored "-Wpadded" -#endif - #if __cplusplus #if !defined(__HIPCC_RTC__) #include @@ -842,9 +833,5 @@ __CG_QUALIFIER__ thread_block_tile tiled_partition(const Paren } } // namespace cooperative_groups -#if defined(__clang__) -#pragma clang diagnostic pop -#endif - #endif // __cplusplus #endif // HIP_INCLUDE_HIP_AMD_DETAIL_HIP_COOPERATIVE_GROUPS_H diff --git a/hipamd/include/hip/amd_detail/amd_hip_fp16.h b/hipamd/include/hip/amd_detail/amd_hip_fp16.h index 1fa2ba0d4..62d88a375 100644 --- a/hipamd/include/hip/amd_detail/amd_hip_fp16.h +++ b/hipamd/include/hip/amd_detail/amd_hip_fp16.h @@ -24,20 +24,6 @@ THE SOFTWARE. #ifndef HIP_INCLUDE_HIP_AMD_DETAIL_HIP_FP16_H #define HIP_INCLUDE_HIP_AMD_DETAIL_HIP_FP16_H -#if defined(__clang__) -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wreserved-identifier" -#pragma clang diagnostic ignored "-Wreserved-macro-identifier" -#pragma clang diagnostic ignored "-Wc++98-compat" -#pragma clang diagnostic ignored "-Wc++98-compat-pedantic" -#pragma clang diagnostic ignored "-Wsign-conversion" -#pragma clang diagnostic ignored "-Wfloat-conversion" -#pragma clang diagnostic ignored "-Wdouble-promotion" -#pragma clang diagnostic ignored "-Wnested-anon-types" -#pragma clang diagnostic ignored "-Wgnu-anonymous-struct" -#pragma clang diagnostic ignored "-Wfloat-equal" -#endif - #if defined(__HIPCC_RTC__) #define __HOST_DEVICE__ __device__ #else @@ -1820,8 +1806,4 @@ THE SOFTWARE. #endif #endif // !defined(__clang__) && defined(__GNUC__) -#if defined(__clang__) -#pragma clang diagnostic pop -#endif - #endif // HIP_INCLUDE_HIP_AMD_DETAIL_HIP_FP16_H diff --git a/hipamd/include/hip/amd_detail/amd_hip_unsafe_atomics.h b/hipamd/include/hip/amd_detail/amd_hip_unsafe_atomics.h index 836e60e9d..59841ab9b 100644 --- a/hipamd/include/hip/amd_detail/amd_hip_unsafe_atomics.h +++ b/hipamd/include/hip/amd_detail/amd_hip_unsafe_atomics.h @@ -24,11 +24,6 @@ THE SOFTWARE. #ifdef __cplusplus -#if defined(__clang__) -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wold-style-cast" -#endif - /** * @brief Unsafe floating point rmw atomic add. * @@ -567,8 +562,4 @@ __device__ inline double safeAtomicMin(double* addr, double val) { #endif } -#if defined(__clang__) -#pragma clang diagnostic pop -#endif - #endif diff --git a/hipamd/include/hip/amd_detail/amd_math_functions.h b/hipamd/include/hip/amd_detail/amd_math_functions.h index f2e6b9a51..fca4d7e40 100644 --- a/hipamd/include/hip/amd_detail/amd_math_functions.h +++ b/hipamd/include/hip/amd_detail/amd_math_functions.h @@ -22,12 +22,6 @@ THE SOFTWARE. #pragma once -#if defined(__clang__) -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wundef" -#pragma clang diagnostic ignored "-Wreserved-macro-identifier" -#endif - #if !defined(__HIPCC_RTC__) #include "hip_fp16_math_fwd.h" #include "amd_hip_vector_types.h" @@ -108,7 +102,3 @@ uint amd_mixed_dot(uint a, uint b, uint c, bool saturate) { #if !defined(__HIPCC_RTC__) #include #endif - -#if defined(__clang__) -#pragma clang diagnostic pop -#endif diff --git a/hipamd/include/hip/amd_detail/amd_surface_functions.h b/hipamd/include/hip/amd_detail/amd_surface_functions.h index 5974fb637..556988a8e 100644 --- a/hipamd/include/hip/amd_detail/amd_surface_functions.h +++ b/hipamd/include/hip/amd_detail/amd_surface_functions.h @@ -23,19 +23,6 @@ THE SOFTWARE. #ifndef HIP_INCLUDE_HIP_AMD_DETAIL_SURFACE_FUNCTIONS_H #define HIP_INCLUDE_HIP_AMD_DETAIL_SURFACE_FUNCTIONS_H -#if defined(__clang__) -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wreserved-macro-identifier" -#pragma clang diagnostic ignored "-Wc++17-extensions" -#pragma clang diagnostic ignored "-Wreserved-identifier" -#pragma clang diagnostic ignored "-Wc++98-compat" -#pragma clang diagnostic ignored "-Wold-style-cast" -#pragma clang diagnostic ignored "-Wextra-semi-stmt" -#pragma clang diagnostic ignored "-Wunused-variable" -#pragma clang diagnostic ignored "-Wunused-parameter" -#pragma clang diagnostic ignored "-Wunused-template" -#endif - #if defined(__cplusplus) #if !defined(__HIPCC_RTC__) @@ -254,8 +241,4 @@ static __device__ __hip_img_chk__ void surfCubemapLayeredwrite(T* data, hipSurfa #endif -#if defined(__clang__) -#pragma clang diagnostic pop -#endif - #endif diff --git a/hipamd/include/hip/amd_detail/amd_warp_functions.h b/hipamd/include/hip/amd_detail/amd_warp_functions.h index c43b81f96..559ab20b3 100644 --- a/hipamd/include/hip/amd_detail/amd_warp_functions.h +++ b/hipamd/include/hip/amd_detail/amd_warp_functions.h @@ -23,17 +23,6 @@ THE SOFTWARE. #ifndef HIP_INCLUDE_HIP_AMD_DETAIL_WARP_FUNCTIONS_H #define HIP_INCLUDE_HIP_AMD_DETAIL_WARP_FUNCTIONS_H -#if defined(__clang__) -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wreserved-identifier" -#pragma clang diagnostic ignored "-Wreserved-macro-identifier" -#pragma clang diagnostic ignored "-Wsign-conversion" -#pragma clang diagnostic ignored "-Wold-style-cast" -#pragma clang diagnostic ignored "-Wc++98-compat" -#pragma clang diagnostic ignored "-Wc++98-compat-pedantic" -#pragma clang diagnostic ignored "-Wunsafe-buffer-usage" -#endif - __device__ static inline unsigned __hip_ds_bpermute(int index, unsigned src) { union { int i; unsigned u; float f; } tmp; tmp.u = src; tmp.i = __builtin_amdgcn_ds_bpermute(index, tmp.i); @@ -502,8 +491,4 @@ unsigned long long __shfl_xor(unsigned long long var, int lane_mask, int width = return tmp1; } -#if defined(__clang__) -#pragma clang diagnostic pop -#endif - #endif diff --git a/hipamd/include/hip/amd_detail/device_library_decls.h b/hipamd/include/hip/amd_detail/device_library_decls.h index c055ad1d8..edc4692c8 100644 --- a/hipamd/include/hip/amd_detail/device_library_decls.h +++ b/hipamd/include/hip/amd_detail/device_library_decls.h @@ -31,13 +31,6 @@ THE SOFTWARE. #ifndef HIP_INCLUDE_HIP_AMD_DETAIL_DEVICE_LIBRARY_DECLS_H #define HIP_INCLUDE_HIP_AMD_DETAIL_DEVICE_LIBRARY_DECLS_H -#if defined(__clang__) -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wc++98-compat-pedantic" -#pragma clang diagnostic ignored "-Wreserved-identifier" -#pragma clang diagnostic ignored "-Wold-style-cast" -#endif - #if !defined(__HIPCC_RTC__) #include "hip/amd_detail/host_defines.h" #endif @@ -137,8 +130,4 @@ __device__ inline static __local void* __to_local(unsigned x) { return (__local #define __CLK_LOCAL_MEM_FENCE 0x01 typedef unsigned __cl_mem_fence_flags; -#if defined(__clang__) -#pragma clang diagnostic pop -#endif - #endif diff --git a/hipamd/include/hip/amd_detail/hip_cooperative_groups_helper.h b/hipamd/include/hip/amd_detail/hip_cooperative_groups_helper.h index a451b144e..f62246a03 100644 --- a/hipamd/include/hip/amd_detail/hip_cooperative_groups_helper.h +++ b/hipamd/include/hip/amd_detail/hip_cooperative_groups_helper.h @@ -40,14 +40,6 @@ THE SOFTWARE. #define __align__(x) __attribute__((aligned(x))) #endif -#if defined(__clang__) -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wreserved-macro-identifier" -#pragma clang diagnostic ignored "-Wc++98-compat" -#pragma clang diagnostic ignored "-Wc++98-compat-pedantic" -#pragma clang diagnostic ignored "-Wshorten-64-to-32" -#endif - #if !defined(__CG_QUALIFIER__) #define __CG_QUALIFIER__ __device__ __forceinline__ #endif @@ -245,9 +237,6 @@ __CG_STATIC_QUALIFIER__ unsigned int masked_bit_count(lane_mask x, unsigned int /** * @} */ -#if defined(__clang__) -#pragma clang diagnostic pop -#endif #endif // __cplusplus #endif // HIP_INCLUDE_HIP_AMD_DETAIL_HIP_COOPERATIVE_GROUPS_HELPER_H diff --git a/hipamd/include/hip/amd_detail/math_fwd.h b/hipamd/include/hip/amd_detail/math_fwd.h index dbfd6e061..9951f8fc3 100644 --- a/hipamd/include/hip/amd_detail/math_fwd.h +++ b/hipamd/include/hip/amd_detail/math_fwd.h @@ -22,12 +22,6 @@ THE SOFTWARE. #pragma once -#if defined(__clang__) -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wundef" -#pragma clang diagnostic ignored "-Wreserved-identifier" -#endif - #if !defined(__HIPCC_RTC__) #include "host_defines.h" #include "amd_hip_vector_types.h" // For Native_vec_ @@ -702,7 +696,3 @@ double __ocml_fma_rtz_f64(double, double, double); #if defined(__cplusplus) } // extern "C" #endif - -#if defined(__clang__) -#pragma clang diagnostic pop -#endif diff --git a/hipamd/include/hip/amd_detail/ockl_image.h b/hipamd/include/hip/amd_detail/ockl_image.h index 136c6e352..50223add4 100644 --- a/hipamd/include/hip/amd_detail/ockl_image.h +++ b/hipamd/include/hip/amd_detail/ockl_image.h @@ -22,11 +22,6 @@ THE SOFTWARE. #pragma once -#if defined(__clang__) -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wreserved-identifier" -#endif - #if !defined(__HIPCC_RTC__) #include #endif @@ -180,7 +175,3 @@ __device__ int __ockl_image_channel_order_CM(unsigned int ADDRESS_SPACE_CONSTANT __device__ int __ockl_image_channel_order_CMa(unsigned int ADDRESS_SPACE_CONSTANT* i); } - -#if defined(__clang__) -#pragma clang diagnostic pop -#endif \ No newline at end of file diff --git a/hipamd/include/hip/amd_detail/texture_fetch_functions.h b/hipamd/include/hip/amd_detail/texture_fetch_functions.h index baf2b2803..c4dcbe78d 100644 --- a/hipamd/include/hip/amd_detail/texture_fetch_functions.h +++ b/hipamd/include/hip/amd_detail/texture_fetch_functions.h @@ -22,18 +22,6 @@ THE SOFTWARE. #pragma once -#if defined(__clang__) -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wc++17-extensions" -#pragma clang diagnostic ignored "-Wreserved-identifier" -#pragma clang diagnostic ignored "-Wc++98-compat" -#pragma clang diagnostic ignored "-Wold-style-cast" -#pragma clang diagnostic ignored "-Wextra-semi-stmt" -#pragma clang diagnostic ignored "-Wunused-variable" -#pragma clang diagnostic ignored "-Wunused-parameter" -#pragma clang diagnostic ignored "-Wunused-template" -#endif - #if defined(__cplusplus) #if !defined(__HIPCC_RTC__) @@ -501,7 +489,3 @@ static __forceinline__ __device__ __hip_img_chk__ __hip_tex2dgather_ret_t Date: Wed, 8 Nov 2023 11:19:56 -0600 Subject: [PATCH 18/27] SWDEV-431560 - use XCC_ID in __smid() for gfx941 and gfx942 __smid() needs to use both HW_ID and XCC_ID for gfx940, gfx941, and gfx942. Previously, we only did this for gfx940 and thus XCC_ID was incorrectly not passed back on the other two architectures. Change-Id: I9fb13b6cef3280e15463443a180174629d03f8b2 --- hipamd/include/hip/amd_detail/amd_device_functions.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/hipamd/include/hip/amd_detail/amd_device_functions.h b/hipamd/include/hip/amd_detail/amd_device_functions.h index 19f39e16e..37ef53ff2 100644 --- a/hipamd/include/hip/amd_detail/amd_device_functions.h +++ b/hipamd/include/hip/amd_detail/amd_device_functions.h @@ -926,7 +926,7 @@ int __syncthreads_or(int predicate) PIPE_ID 7:6 Pipeline from which the wave was dispatched. CU_ID 11:8 Compute Unit the wave is assigned to. SH_ID 12 Shader Array (within an SE) the wave is assigned to. - SE_ID 15:13 Shader Engine the wave is assigned to for gfx908, gfx90a, gfx940 + SE_ID 15:13 Shader Engine the wave is assigned to for gfx908, gfx90a, gfx940-942 14:13 Shader Engine the wave is assigned to for Vega. TG_ID 19:16 Thread-group ID VM_ID 23:20 Virtual Memory ID @@ -955,7 +955,7 @@ int __syncthreads_or(int predicate) #if (defined(__gfx908__) || defined(__gfx90a__) || \ defined(__GFX11__)) #define HW_ID_SE_ID_SIZE 3 -#else //4 SEs/XCC for gfx940 +#else //4 SEs/XCC for gfx940-942 #define HW_ID_SE_ID_SIZE 2 #endif #if (defined(__GFX10__) || defined(__GFX11__)) @@ -966,7 +966,7 @@ int __syncthreads_or(int predicate) #define HW_ID_SE_ID_OFFSET 13 #endif -#if (defined(__gfx940__)) +#if (defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__)) #define XCC_ID 20 #define XCC_ID_XCC_ID_SIZE 4 #define XCC_ID_XCC_ID_OFFSET 0 @@ -1004,7 +1004,7 @@ unsigned __smid(void) unsigned sa_id = __builtin_amdgcn_s_getreg( GETREG_IMMED(HW_ID_SA_ID_SIZE - 1, HW_ID_SA_ID_OFFSET, HW_ID)); #else - #if defined(__gfx940__) + #if (defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__)) unsigned xcc_id = __builtin_amdgcn_s_getreg( GETREG_IMMED(XCC_ID_XCC_ID_SIZE - 1, XCC_ID_XCC_ID_OFFSET, XCC_ID)); #endif @@ -1017,7 +1017,7 @@ unsigned __smid(void) temp = (temp << HW_ID_WGP_ID_SIZE) | wgp_id; return temp; //TODO : CU Mode impl - #elif defined(__gfx940__) + #elif (defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__)) unsigned temp = xcc_id; temp = (temp << HW_ID_SE_ID_SIZE) | se_id; temp = (temp << HW_ID_CU_ID_SIZE) | cu_id; From 1a0330dea065fa8316d1df70b8ed769cdcd51466 Mon Sep 17 00:00:00 2001 From: Maneesh Gupta Date: Thu, 2 Nov 2023 11:24:09 +0000 Subject: [PATCH 19/27] SWDEV-430415 - Bundle libamdhip64.so.5 in runtime package Change-Id: I4d2c8cc5a8d34d9f7d4a92ff11ff36d756d019d5 --- hipamd/CMakeLists.txt | 10 +++ hipamd/download_libamhip64_v5.sh | 117 +++++++++++++++++++++++++++++++ hipamd/packaging/CMakeLists.txt | 1 + 3 files changed, 128 insertions(+) create mode 100755 hipamd/download_libamhip64_v5.sh diff --git a/hipamd/CMakeLists.txt b/hipamd/CMakeLists.txt index 9f68e5a70..7ad30019d 100755 --- a/hipamd/CMakeLists.txt +++ b/hipamd/CMakeLists.txt @@ -297,6 +297,16 @@ if(HIP_RUNTIME STREQUAL "rocclr") add_subdirectory(src) endif() +# Download libamdhip64.so.5 +if(HIP_PLATFORM STREQUAL "amd") + if(NOT WIN32) + execute_process(COMMAND sh -c "${CMAKE_CURRENT_SOURCE_DIR}/download_libamhip64_v5.sh" WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} COMMAND_ECHO STDERR RESULT_VARIABLE DWLD_HIP_SO_RC) + if (DWLD_HIP_SO_RC AND NOT DWLD_HIP_SO_RC EQUAL 0) + message(FATAL_ERROR "Failed to download libamdhip64.so.5") + endif() + endif() +endif() + # Build doxygen documentation find_program(DOXYGEN_EXE doxygen) if(DOXYGEN_EXE) diff --git a/hipamd/download_libamhip64_v5.sh b/hipamd/download_libamhip64_v5.sh new file mode 100755 index 000000000..5b7517aa5 --- /dev/null +++ b/hipamd/download_libamhip64_v5.sh @@ -0,0 +1,117 @@ +#!/bin/bash +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +function download_and_extract_deb() +{ + # Download 5.7.1 hip runtime deb package + wget -O hip-runtime-amd.deb $1 + + # Extract libamdhip64.so.5.7.50701 + dpkg-deb --fsys-tarfile hip-runtime-amd.deb | tar xf - ./opt/rocm-5.7.1/lib/libamdhip64.so.5.7.50701 + + # Rename to libamdhip64.so.5 + mv ./opt/rocm-5.7.1/lib/libamdhip64.so.5.7.50701 libamdhip64.so.5 + + # Clean up + rm -r ./opt; rm hip-runtime-amd.deb +} + +function download_and_extract_rpm() +{ + # Download 5.7.1 hip runtime rpm package + wget -O hip-runtime-amd.rpm $1 + + # Extract libamdhip64.so.5.7.50701 + rpm2cpio hip-runtime-amd.rpm | cpio -idm + + # Rename to libamdhip64.so.5 + mv ./opt/rocm-5.7.1/lib/libamdhip64.so.5.7.50701 libamdhip64.so.5 + + # Clean up + rm -r ./opt; rm hip-runtime-amd.rpm +} + +# Detect OS +ID=$(sed -n 's/^ID=//p' /etc/os-release) +VERSION_ID=$(sed -n 's/^VERSION_ID=//p' /etc/os-release) +PLATFORM_ID=$(sed -n 's/^PLATFORM_ID=//p' /etc/os-release) +echo "ID=$ID" +echo "VERSION_ID=$VERSION_ID" +echo "PLATFORM_ID=$PLATFORM_ID" + +# Download and extract libamdhip64.so.5.7.50701 based on OS +case "$ID" in + ubuntu) + # Ubuntu + if [ "$VERSION_ID" == "\"20.04\"" ]; then + # Ubuntu 20.04 + download_link="https://repo.radeon.com/rocm/apt/5.7.1/pool/main/h/hip-runtime-amd/hip-runtime-amd_5.7.31921.50701-98~20.04_amd64.deb" + download_and_extract_deb $download_link + elif [ "$VERSION_ID" == "\"22.04\"" ]; then + # Ubuntu 22.04 + download_link="https://repo.radeon.com/rocm/apt/5.7.1/pool/main/h/hip-runtime-amd/hip-runtime-amd_5.7.31921.50701-98~22.04_amd64.deb" + download_and_extract_deb $download_link + else + # Unknown Ubuntu version + echo "Unknown Ubuntu OS" + fi + ;; + \"centos\") + # CentOS + if [ "$VERSION_ID" == "\"7\"" ]; then + # CentOS 7 + download_link="https://repo.radeon.com/rocm/yum/5.7.1/main/hip-runtime-amd-5.7.31921.50701-98.el7.x86_64.rpm" + download_and_extract_rpm $download_link + else + # Unknown CentOS version + echo "Unknown CentOS" + fi + ;; + \"rhel\") + # RHEL + if [ "$PLATFORM_ID" == "\"platform:el8\"" ]; then + # RHEL 8 + download_link="https://repo.radeon.com/rocm/rhel8/5.7.1/main/hip-runtime-amd-5.7.31921.50701-98.el8.x86_64.rpm" + download_and_extract_rpm $download_link + elif [ "$PLATFORM_ID" == "\"platform:el9\"" ]; then + # RHEL 9 + download_link="https://repo.radeon.com/rocm/rhel9/5.7.1/main/hip-runtime-amd-5.7.31921.50701-98.el9.x86_64.rpm" + download_and_extract_rpm $download_link + else + # Unknown RHEL version + echo "Unknown RHEL OS" + fi + ;; + \"sles\") + # SLES + if [ "$VERSION_ID" == "\"15.4\"" ]; then + # SLES 15SP4 + download_link="https://repo.radeon.com/rocm/zyp/5.7.1/main/hip-runtime-amd-5.7.31921.50701-sles154.98.x86_64.rpm" + download_and_extract_rpm $download_link + else + # Unknown SLES version + echo "Unknown SLES OS" + fi + ;; + *) + echo "Unknown OS" + ;; +esac diff --git a/hipamd/packaging/CMakeLists.txt b/hipamd/packaging/CMakeLists.txt index 7e21f025e..a877364da 100644 --- a/hipamd/packaging/CMakeLists.txt +++ b/hipamd/packaging/CMakeLists.txt @@ -42,6 +42,7 @@ set(CPACK_RPM_PACKAGE_LICENSE "MIT") #Begin binary files install if(HIP_PLATFORM STREQUAL "amd" ) if(BUILD_SHARED_LIBS) + install(PROGRAMS ${PROJECT_BINARY_DIR}/libamdhip64.so.5 DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT binary) install(PROGRAMS ${PROJECT_BINARY_DIR}/lib/libamdhip64.so DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT binary) install(PROGRAMS ${PROJECT_BINARY_DIR}/lib/libamdhip64.so.${HIP_LIB_VERSION_MAJOR} DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT binary) install(PROGRAMS ${PROJECT_BINARY_DIR}/lib/libamdhip64.so.${HIP_LIB_VERSION_STRING} DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT binary) From 4d0aa65ce3c583614aaad81c0034eaf89c896da2 Mon Sep 17 00:00:00 2001 From: jiabaxie Date: Mon, 13 Nov 2023 21:40:09 -0500 Subject: [PATCH 20/27] SWDEV-427855, SWDEV-306642 - CLR change for profiler, and Add texture/surface/device capabilities device struct entries - alias hipGetDeviceProperties to hipGetDevicePropertiesR0600 - alias hipDeviceProp_t to hipDeviceProp_tR0600 - remove gcnArch from new device property struct - add new requested struct members Change-Id: If3f5dbef3d608487d9f6f419285f4bf577ea9bf0 --- hipamd/include/hip/amd_detail/hip_prof_str.h | 492 +++++++++++-- hipamd/src/CMakeLists.txt | 25 +- hipamd/src/amdhip.def | 4 + hipamd/src/hip_device.cpp | 232 +++++- hipamd/src/hip_device_runtime.cpp | 735 +++++++++++-------- hipamd/src/hip_gl.cpp | 1 + hipamd/src/hip_hcc.map.in | 4 + hipamd/src/hip_peer.cpp | 2 +- hipamd/src/hip_prof_gen.py | 23 +- hipamd/src/hiprtc/hiprtcInternal.cpp | 7 +- rocclr/device/device.hpp | 7 +- rocclr/device/pal/paldevice.cpp | 3 + rocclr/device/rocm/rocdevice.cpp | 21 + 13 files changed, 1126 insertions(+), 430 deletions(-) diff --git a/hipamd/include/hip/amd_detail/hip_prof_str.h b/hipamd/include/hip/amd_detail/hip_prof_str.h index cbf974e21..562bd14c9 100644 --- a/hipamd/include/hip/amd_detail/hip_prof_str.h +++ b/hipamd/include/hip/amd_detail/hip_prof_str.h @@ -7,7 +7,12 @@ #define _HIP_PROF_STR_H #define HIP_PROF_VER 1 -#include +#include +#include +#include "amd_hip_gl_interop.h" + +#define HIP_API_ID_CONCAT_HELPER(a,b) a##b +#define HIP_API_ID_CONCAT(a,b) HIP_API_ID_CONCAT_HELPER(a,b) // HIP API callbacks ID enumeration enum hip_api_id_t { @@ -18,7 +23,7 @@ enum hip_api_id_t { HIP_API_ID_hipArray3DCreate = 3, HIP_API_ID_hipArrayCreate = 4, HIP_API_ID_hipArrayDestroy = 5, - HIP_API_ID_hipChooseDevice = 6, + HIP_API_ID_hipChooseDeviceR0000 = 6, HIP_API_ID_hipConfigureCall = 7, HIP_API_ID_hipCtxCreate = 8, HIP_API_ID_hipCtxDestroy = 9, @@ -93,7 +98,7 @@ enum hip_api_id_t { HIP_API_ID_hipGetDevice = 78, HIP_API_ID_hipGetDeviceCount = 79, HIP_API_ID_hipGetDeviceFlags = 80, - HIP_API_ID_hipGetDeviceProperties = 81, + HIP_API_ID_hipGetDevicePropertiesR0000 = 81, HIP_API_ID_RESERVED_82 = 82, HIP_API_ID_hipGetErrorString = 83, HIP_API_ID_hipGetLastError = 84, @@ -378,7 +383,23 @@ enum hip_api_id_t { HIP_API_ID_hipStreamGetDevice = 363, HIP_API_ID_hipExternalMemoryGetMappedMipmappedArray = 364, HIP_API_ID_hipExtGetLastError = 365, - HIP_API_ID_LAST = 365, + HIP_API_ID_hipChooseDeviceR0600 = 366, + HIP_API_ID_hipDrvGraphAddMemcpyNode = 367, + HIP_API_ID_hipDrvGraphMemcpyNodeGetParams = 368, + HIP_API_ID_hipDrvGraphMemcpyNodeSetParams = 369, + HIP_API_ID_hipGetDevicePropertiesR0600 = 370, + HIP_API_ID_hipGraphAddExternalSemaphoresSignalNode = 371, + HIP_API_ID_hipGraphAddExternalSemaphoresWaitNode = 372, + HIP_API_ID_hipGraphExecExternalSemaphoresSignalNodeSetParams = 373, + HIP_API_ID_hipGraphExecExternalSemaphoresWaitNodeSetParams = 374, + HIP_API_ID_hipGraphExternalSemaphoresSignalNodeGetParams = 375, + HIP_API_ID_hipGraphExternalSemaphoresSignalNodeSetParams = 376, + HIP_API_ID_hipGraphExternalSemaphoresWaitNodeGetParams = 377, + HIP_API_ID_hipGraphExternalSemaphoresWaitNodeSetParams = 378, + HIP_API_ID_LAST = 378, + + HIP_API_ID_hipChooseDevice = HIP_API_ID_CONCAT(HIP_API_ID_,hipChooseDevice), + HIP_API_ID_hipGetDeviceProperties = HIP_API_ID_CONCAT(HIP_API_ID_,hipGetDeviceProperties), HIP_API_ID_hipBindTexture = HIP_API_ID_NONE, HIP_API_ID_hipBindTexture2D = HIP_API_ID_NONE, @@ -393,14 +414,11 @@ enum hip_api_id_t { HIP_API_ID_hipGetTextureObjectTextureDesc = HIP_API_ID_NONE, HIP_API_ID_hipGetTextureReference = HIP_API_ID_NONE, HIP_API_ID_hipMemcpy2DArrayToArray = HIP_API_ID_NONE, - HIP_API_ID_hipMemcpyArrayToArray = HIP_API_ID_NONE, HIP_API_ID_hipMemcpyAtoA = HIP_API_ID_NONE, HIP_API_ID_hipMemcpyAtoD = HIP_API_ID_NONE, HIP_API_ID_hipMemcpyAtoHAsync = HIP_API_ID_NONE, HIP_API_ID_hipMemcpyDtoA = HIP_API_ID_NONE, - HIP_API_ID_hipMemcpyFromArrayAsync = HIP_API_ID_NONE, HIP_API_ID_hipMemcpyHtoAAsync = HIP_API_ID_NONE, - HIP_API_ID_hipMemcpyToArrayAsync = HIP_API_ID_NONE, HIP_API_ID_hipSetValidDevices = HIP_API_ID_NONE, HIP_API_ID_hipTexObjectCreate = HIP_API_ID_NONE, HIP_API_ID_hipTexObjectDestroy = HIP_API_ID_NONE, @@ -419,6 +437,9 @@ enum hip_api_id_t { HIP_API_ID_hipUnbindTexture = HIP_API_ID_NONE, }; +#undef HIP_API_ID_CONCAT_HELPER +#undef HIP_API_ID_CONCAT + // Return the HIP API string for a given callback ID static inline const char* hip_api_name(const uint32_t id) { switch(id) { @@ -430,7 +451,8 @@ static inline const char* hip_api_name(const uint32_t id) { case HIP_API_ID_hipArrayDestroy: return "hipArrayDestroy"; case HIP_API_ID_hipArrayGetDescriptor: return "hipArrayGetDescriptor"; case HIP_API_ID_hipArrayGetInfo: return "hipArrayGetInfo"; - case HIP_API_ID_hipChooseDevice: return "hipChooseDevice"; + case HIP_API_ID_hipChooseDeviceR0000: return "hipChooseDeviceR0000"; + case HIP_API_ID_hipChooseDeviceR0600: return "hipChooseDeviceR0600"; case HIP_API_ID_hipConfigureCall: return "hipConfigureCall"; case HIP_API_ID_hipCreateSurfaceObject: return "hipCreateSurfaceObject"; case HIP_API_ID_hipCtxCreate: return "hipCtxCreate"; @@ -485,6 +507,9 @@ static inline const char* hip_api_name(const uint32_t id) { case HIP_API_ID_hipDeviceSynchronize: return "hipDeviceSynchronize"; case HIP_API_ID_hipDeviceTotalMem: return "hipDeviceTotalMem"; case HIP_API_ID_hipDriverGetVersion: return "hipDriverGetVersion"; + case HIP_API_ID_hipDrvGraphAddMemcpyNode: return "hipDrvGraphAddMemcpyNode"; + case HIP_API_ID_hipDrvGraphMemcpyNodeGetParams: return "hipDrvGraphMemcpyNodeGetParams"; + case HIP_API_ID_hipDrvGraphMemcpyNodeSetParams: return "hipDrvGraphMemcpyNodeSetParams"; case HIP_API_ID_hipDrvMemcpy2DUnaligned: return "hipDrvMemcpy2DUnaligned"; case HIP_API_ID_hipDrvMemcpy3D: return "hipDrvMemcpy3D"; case HIP_API_ID_hipDrvMemcpy3DAsync: return "hipDrvMemcpy3DAsync"; @@ -496,6 +521,7 @@ static inline const char* hip_api_name(const uint32_t id) { case HIP_API_ID_hipEventQuery: return "hipEventQuery"; case HIP_API_ID_hipEventRecord: return "hipEventRecord"; case HIP_API_ID_hipEventSynchronize: return "hipEventSynchronize"; + case HIP_API_ID_hipExtGetLastError: return "hipExtGetLastError"; case HIP_API_ID_hipExtGetLinkTypeAndHopCount: return "hipExtGetLinkTypeAndHopCount"; case HIP_API_ID_hipExtLaunchKernel: return "hipExtLaunchKernel"; case HIP_API_ID_hipExtLaunchMultiKernelMultiDevice: return "hipExtLaunchMultiKernelMultiDevice"; @@ -504,6 +530,7 @@ static inline const char* hip_api_name(const uint32_t id) { case HIP_API_ID_hipExtStreamCreateWithCUMask: return "hipExtStreamCreateWithCUMask"; case HIP_API_ID_hipExtStreamGetCUMask: return "hipExtStreamGetCUMask"; case HIP_API_ID_hipExternalMemoryGetMappedBuffer: return "hipExternalMemoryGetMappedBuffer"; + case HIP_API_ID_hipExternalMemoryGetMappedMipmappedArray: return "hipExternalMemoryGetMappedMipmappedArray"; case HIP_API_ID_hipFree: return "hipFree"; case HIP_API_ID_hipFreeArray: return "hipFreeArray"; case HIP_API_ID_hipFreeAsync: return "hipFreeAsync"; @@ -519,7 +546,8 @@ static inline const char* hip_api_name(const uint32_t id) { case HIP_API_ID_hipGetDevice: return "hipGetDevice"; case HIP_API_ID_hipGetDeviceCount: return "hipGetDeviceCount"; case HIP_API_ID_hipGetDeviceFlags: return "hipGetDeviceFlags"; - case HIP_API_ID_hipGetDeviceProperties: return "hipGetDeviceProperties"; + case HIP_API_ID_hipGetDevicePropertiesR0000: return "hipGetDevicePropertiesR0000"; + case HIP_API_ID_hipGetDevicePropertiesR0600: return "hipGetDevicePropertiesR0600"; case HIP_API_ID_hipGetErrorString: return "hipGetErrorString"; case HIP_API_ID_hipGetLastError: return "hipGetLastError"; case HIP_API_ID_hipGetMipmappedArrayLevel: return "hipGetMipmappedArrayLevel"; @@ -530,6 +558,8 @@ static inline const char* hip_api_name(const uint32_t id) { case HIP_API_ID_hipGraphAddEmptyNode: return "hipGraphAddEmptyNode"; case HIP_API_ID_hipGraphAddEventRecordNode: return "hipGraphAddEventRecordNode"; case HIP_API_ID_hipGraphAddEventWaitNode: return "hipGraphAddEventWaitNode"; + case HIP_API_ID_hipGraphAddExternalSemaphoresSignalNode: return "hipGraphAddExternalSemaphoresSignalNode"; + case HIP_API_ID_hipGraphAddExternalSemaphoresWaitNode: return "hipGraphAddExternalSemaphoresWaitNode"; case HIP_API_ID_hipGraphAddHostNode: return "hipGraphAddHostNode"; case HIP_API_ID_hipGraphAddKernelNode: return "hipGraphAddKernelNode"; case HIP_API_ID_hipGraphAddMemAllocNode: return "hipGraphAddMemAllocNode"; @@ -553,6 +583,8 @@ static inline const char* hip_api_name(const uint32_t id) { case HIP_API_ID_hipGraphExecDestroy: return "hipGraphExecDestroy"; case HIP_API_ID_hipGraphExecEventRecordNodeSetEvent: return "hipGraphExecEventRecordNodeSetEvent"; case HIP_API_ID_hipGraphExecEventWaitNodeSetEvent: return "hipGraphExecEventWaitNodeSetEvent"; + case HIP_API_ID_hipGraphExecExternalSemaphoresSignalNodeSetParams: return "hipGraphExecExternalSemaphoresSignalNodeSetParams"; + case HIP_API_ID_hipGraphExecExternalSemaphoresWaitNodeSetParams: return "hipGraphExecExternalSemaphoresWaitNodeSetParams"; case HIP_API_ID_hipGraphExecHostNodeSetParams: return "hipGraphExecHostNodeSetParams"; case HIP_API_ID_hipGraphExecKernelNodeSetParams: return "hipGraphExecKernelNodeSetParams"; case HIP_API_ID_hipGraphExecMemcpyNodeSetParams: return "hipGraphExecMemcpyNodeSetParams"; @@ -561,6 +593,10 @@ static inline const char* hip_api_name(const uint32_t id) { case HIP_API_ID_hipGraphExecMemcpyNodeSetParamsToSymbol: return "hipGraphExecMemcpyNodeSetParamsToSymbol"; case HIP_API_ID_hipGraphExecMemsetNodeSetParams: return "hipGraphExecMemsetNodeSetParams"; case HIP_API_ID_hipGraphExecUpdate: return "hipGraphExecUpdate"; + case HIP_API_ID_hipGraphExternalSemaphoresSignalNodeGetParams: return "hipGraphExternalSemaphoresSignalNodeGetParams"; + case HIP_API_ID_hipGraphExternalSemaphoresSignalNodeSetParams: return "hipGraphExternalSemaphoresSignalNodeSetParams"; + case HIP_API_ID_hipGraphExternalSemaphoresWaitNodeGetParams: return "hipGraphExternalSemaphoresWaitNodeGetParams"; + case HIP_API_ID_hipGraphExternalSemaphoresWaitNodeSetParams: return "hipGraphExternalSemaphoresWaitNodeSetParams"; case HIP_API_ID_hipGraphGetEdges: return "hipGraphGetEdges"; case HIP_API_ID_hipGraphGetNodes: return "hipGraphGetNodes"; case HIP_API_ID_hipGraphGetRootNodes: return "hipGraphGetRootNodes"; @@ -781,8 +817,6 @@ static inline const char* hip_api_name(const uint32_t id) { case HIP_API_ID_hipUserObjectRelease: return "hipUserObjectRelease"; case HIP_API_ID_hipUserObjectRetain: return "hipUserObjectRetain"; case HIP_API_ID_hipWaitExternalSemaphoresAsync: return "hipWaitExternalSemaphoresAsync"; - case HIP_API_ID_hipExternalMemoryGetMappedMipmappedArray: return "hipExternalMemoryGetMappedMipmappedArray"; - case HIP_API_ID_hipExtGetLastError: return "hipExtGetLastError"; }; return "unknown"; }; @@ -798,7 +832,8 @@ static inline uint32_t hipApiIdByName(const char* name) { if (strcmp("hipArrayDestroy", name) == 0) return HIP_API_ID_hipArrayDestroy; if (strcmp("hipArrayGetDescriptor", name) == 0) return HIP_API_ID_hipArrayGetDescriptor; if (strcmp("hipArrayGetInfo", name) == 0) return HIP_API_ID_hipArrayGetInfo; - if (strcmp("hipChooseDevice", name) == 0) return HIP_API_ID_hipChooseDevice; + if (strcmp("hipChooseDeviceR0000", name) == 0) return HIP_API_ID_hipChooseDeviceR0000; + if (strcmp("hipChooseDeviceR0600", name) == 0) return HIP_API_ID_hipChooseDeviceR0600; if (strcmp("hipConfigureCall", name) == 0) return HIP_API_ID_hipConfigureCall; if (strcmp("hipCreateSurfaceObject", name) == 0) return HIP_API_ID_hipCreateSurfaceObject; if (strcmp("hipCtxCreate", name) == 0) return HIP_API_ID_hipCtxCreate; @@ -853,6 +888,9 @@ static inline uint32_t hipApiIdByName(const char* name) { if (strcmp("hipDeviceSynchronize", name) == 0) return HIP_API_ID_hipDeviceSynchronize; if (strcmp("hipDeviceTotalMem", name) == 0) return HIP_API_ID_hipDeviceTotalMem; if (strcmp("hipDriverGetVersion", name) == 0) return HIP_API_ID_hipDriverGetVersion; + if (strcmp("hipDrvGraphAddMemcpyNode", name) == 0) return HIP_API_ID_hipDrvGraphAddMemcpyNode; + if (strcmp("hipDrvGraphMemcpyNodeGetParams", name) == 0) return HIP_API_ID_hipDrvGraphMemcpyNodeGetParams; + if (strcmp("hipDrvGraphMemcpyNodeSetParams", name) == 0) return HIP_API_ID_hipDrvGraphMemcpyNodeSetParams; if (strcmp("hipDrvMemcpy2DUnaligned", name) == 0) return HIP_API_ID_hipDrvMemcpy2DUnaligned; if (strcmp("hipDrvMemcpy3D", name) == 0) return HIP_API_ID_hipDrvMemcpy3D; if (strcmp("hipDrvMemcpy3DAsync", name) == 0) return HIP_API_ID_hipDrvMemcpy3DAsync; @@ -864,6 +902,7 @@ static inline uint32_t hipApiIdByName(const char* name) { if (strcmp("hipEventQuery", name) == 0) return HIP_API_ID_hipEventQuery; if (strcmp("hipEventRecord", name) == 0) return HIP_API_ID_hipEventRecord; if (strcmp("hipEventSynchronize", name) == 0) return HIP_API_ID_hipEventSynchronize; + if (strcmp("hipExtGetLastError", name) == 0) return HIP_API_ID_hipExtGetLastError; if (strcmp("hipExtGetLinkTypeAndHopCount", name) == 0) return HIP_API_ID_hipExtGetLinkTypeAndHopCount; if (strcmp("hipExtLaunchKernel", name) == 0) return HIP_API_ID_hipExtLaunchKernel; if (strcmp("hipExtLaunchMultiKernelMultiDevice", name) == 0) return HIP_API_ID_hipExtLaunchMultiKernelMultiDevice; @@ -872,6 +911,7 @@ static inline uint32_t hipApiIdByName(const char* name) { if (strcmp("hipExtStreamCreateWithCUMask", name) == 0) return HIP_API_ID_hipExtStreamCreateWithCUMask; if (strcmp("hipExtStreamGetCUMask", name) == 0) return HIP_API_ID_hipExtStreamGetCUMask; if (strcmp("hipExternalMemoryGetMappedBuffer", name) == 0) return HIP_API_ID_hipExternalMemoryGetMappedBuffer; + if (strcmp("hipExternalMemoryGetMappedMipmappedArray", name) == 0) return HIP_API_ID_hipExternalMemoryGetMappedMipmappedArray; if (strcmp("hipFree", name) == 0) return HIP_API_ID_hipFree; if (strcmp("hipFreeArray", name) == 0) return HIP_API_ID_hipFreeArray; if (strcmp("hipFreeAsync", name) == 0) return HIP_API_ID_hipFreeAsync; @@ -887,7 +927,8 @@ static inline uint32_t hipApiIdByName(const char* name) { if (strcmp("hipGetDevice", name) == 0) return HIP_API_ID_hipGetDevice; if (strcmp("hipGetDeviceCount", name) == 0) return HIP_API_ID_hipGetDeviceCount; if (strcmp("hipGetDeviceFlags", name) == 0) return HIP_API_ID_hipGetDeviceFlags; - if (strcmp("hipGetDeviceProperties", name) == 0) return HIP_API_ID_hipGetDeviceProperties; + if (strcmp("hipGetDevicePropertiesR0000", name) == 0) return HIP_API_ID_hipGetDevicePropertiesR0000; + if (strcmp("hipGetDevicePropertiesR0600", name) == 0) return HIP_API_ID_hipGetDevicePropertiesR0600; if (strcmp("hipGetErrorString", name) == 0) return HIP_API_ID_hipGetErrorString; if (strcmp("hipGetLastError", name) == 0) return HIP_API_ID_hipGetLastError; if (strcmp("hipGetMipmappedArrayLevel", name) == 0) return HIP_API_ID_hipGetMipmappedArrayLevel; @@ -898,6 +939,8 @@ static inline uint32_t hipApiIdByName(const char* name) { if (strcmp("hipGraphAddEmptyNode", name) == 0) return HIP_API_ID_hipGraphAddEmptyNode; if (strcmp("hipGraphAddEventRecordNode", name) == 0) return HIP_API_ID_hipGraphAddEventRecordNode; if (strcmp("hipGraphAddEventWaitNode", name) == 0) return HIP_API_ID_hipGraphAddEventWaitNode; + if (strcmp("hipGraphAddExternalSemaphoresSignalNode", name) == 0) return HIP_API_ID_hipGraphAddExternalSemaphoresSignalNode; + if (strcmp("hipGraphAddExternalSemaphoresWaitNode", name) == 0) return HIP_API_ID_hipGraphAddExternalSemaphoresWaitNode; if (strcmp("hipGraphAddHostNode", name) == 0) return HIP_API_ID_hipGraphAddHostNode; if (strcmp("hipGraphAddKernelNode", name) == 0) return HIP_API_ID_hipGraphAddKernelNode; if (strcmp("hipGraphAddMemAllocNode", name) == 0) return HIP_API_ID_hipGraphAddMemAllocNode; @@ -921,6 +964,8 @@ static inline uint32_t hipApiIdByName(const char* name) { if (strcmp("hipGraphExecDestroy", name) == 0) return HIP_API_ID_hipGraphExecDestroy; if (strcmp("hipGraphExecEventRecordNodeSetEvent", name) == 0) return HIP_API_ID_hipGraphExecEventRecordNodeSetEvent; if (strcmp("hipGraphExecEventWaitNodeSetEvent", name) == 0) return HIP_API_ID_hipGraphExecEventWaitNodeSetEvent; + if (strcmp("hipGraphExecExternalSemaphoresSignalNodeSetParams", name) == 0) return HIP_API_ID_hipGraphExecExternalSemaphoresSignalNodeSetParams; + if (strcmp("hipGraphExecExternalSemaphoresWaitNodeSetParams", name) == 0) return HIP_API_ID_hipGraphExecExternalSemaphoresWaitNodeSetParams; if (strcmp("hipGraphExecHostNodeSetParams", name) == 0) return HIP_API_ID_hipGraphExecHostNodeSetParams; if (strcmp("hipGraphExecKernelNodeSetParams", name) == 0) return HIP_API_ID_hipGraphExecKernelNodeSetParams; if (strcmp("hipGraphExecMemcpyNodeSetParams", name) == 0) return HIP_API_ID_hipGraphExecMemcpyNodeSetParams; @@ -929,6 +974,10 @@ static inline uint32_t hipApiIdByName(const char* name) { if (strcmp("hipGraphExecMemcpyNodeSetParamsToSymbol", name) == 0) return HIP_API_ID_hipGraphExecMemcpyNodeSetParamsToSymbol; if (strcmp("hipGraphExecMemsetNodeSetParams", name) == 0) return HIP_API_ID_hipGraphExecMemsetNodeSetParams; if (strcmp("hipGraphExecUpdate", name) == 0) return HIP_API_ID_hipGraphExecUpdate; + if (strcmp("hipGraphExternalSemaphoresSignalNodeGetParams", name) == 0) return HIP_API_ID_hipGraphExternalSemaphoresSignalNodeGetParams; + if (strcmp("hipGraphExternalSemaphoresSignalNodeSetParams", name) == 0) return HIP_API_ID_hipGraphExternalSemaphoresSignalNodeSetParams; + if (strcmp("hipGraphExternalSemaphoresWaitNodeGetParams", name) == 0) return HIP_API_ID_hipGraphExternalSemaphoresWaitNodeGetParams; + if (strcmp("hipGraphExternalSemaphoresWaitNodeSetParams", name) == 0) return HIP_API_ID_hipGraphExternalSemaphoresWaitNodeSetParams; if (strcmp("hipGraphGetEdges", name) == 0) return HIP_API_ID_hipGraphGetEdges; if (strcmp("hipGraphGetNodes", name) == 0) return HIP_API_ID_hipGraphGetNodes; if (strcmp("hipGraphGetRootNodes", name) == 0) return HIP_API_ID_hipGraphGetRootNodes; @@ -1149,8 +1198,6 @@ static inline uint32_t hipApiIdByName(const char* name) { if (strcmp("hipUserObjectRelease", name) == 0) return HIP_API_ID_hipUserObjectRelease; if (strcmp("hipUserObjectRetain", name) == 0) return HIP_API_ID_hipUserObjectRetain; if (strcmp("hipWaitExternalSemaphoresAsync", name) == 0) return HIP_API_ID_hipWaitExternalSemaphoresAsync; - if (strcmp("hipExternalMemoryGetMappedMipmappedArray", name) == 0) return HIP_API_ID_hipExternalMemoryGetMappedMipmappedArray; - if (strcmp("hipExtGetLastError", name) == 0) return HIP_API_ID_hipExtGetLastError; return HIP_API_ID_NONE; } @@ -1212,9 +1259,15 @@ typedef struct hip_api_data_s { struct { int* device; int device__val; - const hipDeviceProp_t* prop; - hipDeviceProp_t prop__val; - } hipChooseDevice; + const hipDeviceProp_tR0000* prop; + hipDeviceProp_tR0000 prop__val; + } hipChooseDeviceR0000; + struct { + int* device; + int device__val; + const hipDeviceProp_tR0600* prop; + hipDeviceProp_tR0600 prop__val; + } hipChooseDeviceR0600; struct { dim3 gridDim; dim3 blockDim; @@ -1441,6 +1494,27 @@ typedef struct hip_api_data_s { int* driverVersion; int driverVersion__val; } hipDriverGetVersion; + struct { + hipGraphNode_t* phGraphNode; + hipGraphNode_t phGraphNode__val; + hipGraph_t hGraph; + const hipGraphNode_t* dependencies; + hipGraphNode_t dependencies__val; + size_t numDependencies; + const HIP_MEMCPY3D* copyParams; + HIP_MEMCPY3D copyParams__val; + hipCtx_t ctx; + } hipDrvGraphAddMemcpyNode; + struct { + hipGraphNode_t hNode; + HIP_MEMCPY3D* nodeParams; + HIP_MEMCPY3D nodeParams__val; + } hipDrvGraphMemcpyNodeGetParams; + struct { + hipGraphNode_t hNode; + const HIP_MEMCPY3D* nodeParams; + HIP_MEMCPY3D nodeParams__val; + } hipDrvGraphMemcpyNodeSetParams; struct { const hip_Memcpy2D* pCopy; hip_Memcpy2D pCopy__val; @@ -1560,6 +1634,13 @@ typedef struct hip_api_data_s { const hipExternalMemoryBufferDesc* bufferDesc; hipExternalMemoryBufferDesc bufferDesc__val; } hipExternalMemoryGetMappedBuffer; + struct { + hipMipmappedArray_t* mipmap; + hipMipmappedArray_t mipmap__val; + hipExternalMemory_t extMem; + const hipExternalMemoryMipmappedArrayDesc* mipmapDesc; + hipExternalMemoryMipmappedArrayDesc mipmapDesc__val; + } hipExternalMemoryGetMappedMipmappedArray; struct { void* ptr; } hipFree; @@ -1626,10 +1707,15 @@ typedef struct hip_api_data_s { unsigned int flags__val; } hipGetDeviceFlags; struct { - hipDeviceProp_t* props; - hipDeviceProp_t props__val; - hipDevice_t device; - } hipGetDeviceProperties; + hipDeviceProp_tR0000* prop; + hipDeviceProp_tR0000 prop__val; + int device; + } hipGetDevicePropertiesR0000; + struct { + hipDeviceProp_tR0600* prop; + hipDeviceProp_tR0600 prop__val; + int deviceId; + } hipGetDevicePropertiesR0600; struct { hipArray_t* levelArray; hipArray_t levelArray__val; @@ -1689,6 +1775,26 @@ typedef struct hip_api_data_s { size_t numDependencies; hipEvent_t event; } hipGraphAddEventWaitNode; + struct { + hipGraphNode_t* pGraphNode; + hipGraphNode_t pGraphNode__val; + hipGraph_t graph; + const hipGraphNode_t* pDependencies; + hipGraphNode_t pDependencies__val; + size_t numDependencies; + const hipExternalSemaphoreSignalNodeParams* nodeParams; + hipExternalSemaphoreSignalNodeParams nodeParams__val; + } hipGraphAddExternalSemaphoresSignalNode; + struct { + hipGraphNode_t* pGraphNode; + hipGraphNode_t pGraphNode__val; + hipGraph_t graph; + const hipGraphNode_t* pDependencies; + hipGraphNode_t pDependencies__val; + size_t numDependencies; + const hipExternalSemaphoreWaitNodeParams* nodeParams; + hipExternalSemaphoreWaitNodeParams nodeParams__val; + } hipGraphAddExternalSemaphoresWaitNode; struct { hipGraphNode_t* pGraphNode; hipGraphNode_t pGraphNode__val; @@ -1849,6 +1955,18 @@ typedef struct hip_api_data_s { hipGraphNode_t hNode; hipEvent_t event; } hipGraphExecEventWaitNodeSetEvent; + struct { + hipGraphExec_t hGraphExec; + hipGraphNode_t hNode; + const hipExternalSemaphoreSignalNodeParams* nodeParams; + hipExternalSemaphoreSignalNodeParams nodeParams__val; + } hipGraphExecExternalSemaphoresSignalNodeSetParams; + struct { + hipGraphExec_t hGraphExec; + hipGraphNode_t hNode; + const hipExternalSemaphoreWaitNodeParams* nodeParams; + hipExternalSemaphoreWaitNodeParams nodeParams__val; + } hipGraphExecExternalSemaphoresWaitNodeSetParams; struct { hipGraphExec_t hGraphExec; hipGraphNode_t node; @@ -1907,6 +2025,26 @@ typedef struct hip_api_data_s { hipGraphExecUpdateResult* updateResult_out; hipGraphExecUpdateResult updateResult_out__val; } hipGraphExecUpdate; + struct { + hipGraphNode_t hNode; + hipExternalSemaphoreSignalNodeParams* params_out; + hipExternalSemaphoreSignalNodeParams params_out__val; + } hipGraphExternalSemaphoresSignalNodeGetParams; + struct { + hipGraphNode_t hNode; + const hipExternalSemaphoreSignalNodeParams* nodeParams; + hipExternalSemaphoreSignalNodeParams nodeParams__val; + } hipGraphExternalSemaphoresSignalNodeSetParams; + struct { + hipGraphNode_t hNode; + hipExternalSemaphoreWaitNodeParams* params_out; + hipExternalSemaphoreWaitNodeParams params_out__val; + } hipGraphExternalSemaphoresWaitNodeGetParams; + struct { + hipGraphNode_t hNode; + const hipExternalSemaphoreWaitNodeParams* nodeParams; + hipExternalSemaphoreWaitNodeParams nodeParams__val; + } hipGraphExternalSemaphoresWaitNodeSetParams; struct { hipGraph_t graph; hipGraphNode_t* from; @@ -3262,12 +3400,6 @@ typedef struct hip_api_data_s { unsigned int numExtSems; hipStream_t stream; } hipWaitExternalSemaphoresAsync; - struct { - hipMipmappedArray_t* mipmap; - hipExternalMemory_t extMem; - const hipExternalMemoryMipmappedArrayDesc* mipmapDesc; - hipExternalMemoryMipmappedArrayDesc mipmapDesc__val; - } hipExternalMemoryGetMappedMipmappedArray; } args; uint64_t *phase_data; } hip_api_data_t; @@ -3318,10 +3450,15 @@ typedef struct hip_api_data_s { cb_data.args.hipArrayGetInfo.flags = (unsigned int*)flags; \ cb_data.args.hipArrayGetInfo.array = (hipArray_t)array; \ }; -// hipChooseDevice[('int*', 'device'), ('const hipDeviceProp_t*', 'prop')] -#define INIT_hipChooseDevice_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipChooseDevice.device = (int*)device; \ - cb_data.args.hipChooseDevice.prop = (const hipDeviceProp_t*)properties; \ +// hipChooseDeviceR0000[('int*', 'device'), ('const hipDeviceProp_tR0000*', 'prop')] +#define INIT_hipChooseDeviceR0000_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipChooseDeviceR0000.device = (int*)device; \ + cb_data.args.hipChooseDeviceR0000.prop = (const hipDeviceProp_tR0000*)properties; \ +}; +// hipChooseDeviceR0600[('int*', 'device'), ('const hipDeviceProp_tR0600*', 'prop')] +#define INIT_hipChooseDeviceR0600_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipChooseDeviceR0600.device = (int*)device; \ + cb_data.args.hipChooseDeviceR0600.prop = (const hipDeviceProp_tR0600*)properties; \ }; // hipConfigureCall[('dim3', 'gridDim'), ('dim3', 'blockDim'), ('size_t', 'sharedMem'), ('hipStream_t', 'stream')] #define INIT_hipConfigureCall_CB_ARGS_DATA(cb_data) { \ @@ -3576,6 +3713,15 @@ typedef struct hip_api_data_s { #define INIT_hipDriverGetVersion_CB_ARGS_DATA(cb_data) { \ cb_data.args.hipDriverGetVersion.driverVersion = (int*)driverVersion; \ }; +// hipDrvGraphAddMemcpyNode[('hipGraphNode_t*', 'phGraphNode'), ('hipGraph_t', 'hGraph'), ('const hipGraphNode_t*', 'dependencies'), ('size_t', 'numDependencies'), ('const HIP_MEMCPY3D*', 'copyParams'), ('hipCtx_t', 'ctx')] +#define INIT_hipDrvGraphAddMemcpyNode_CB_ARGS_DATA(cb_data) { \ +}; +// hipDrvGraphMemcpyNodeGetParams[('hipGraphNode_t', 'hNode'), ('HIP_MEMCPY3D*', 'nodeParams')] +#define INIT_hipDrvGraphMemcpyNodeGetParams_CB_ARGS_DATA(cb_data) { \ +}; +// hipDrvGraphMemcpyNodeSetParams[('hipGraphNode_t', 'hNode'), ('const HIP_MEMCPY3D*', 'nodeParams')] +#define INIT_hipDrvGraphMemcpyNodeSetParams_CB_ARGS_DATA(cb_data) { \ +}; // hipDrvMemcpy2DUnaligned[('const hip_Memcpy2D*', 'pCopy')] #define INIT_hipDrvMemcpy2DUnaligned_CB_ARGS_DATA(cb_data) { \ cb_data.args.hipDrvMemcpy2DUnaligned.pCopy = (const hip_Memcpy2D*)pCopy; \ @@ -3628,6 +3774,9 @@ typedef struct hip_api_data_s { #define INIT_hipEventSynchronize_CB_ARGS_DATA(cb_data) { \ cb_data.args.hipEventSynchronize.event = (hipEvent_t)event; \ }; +// hipExtGetLastError[] +#define INIT_hipExtGetLastError_CB_ARGS_DATA(cb_data) { \ +}; // hipExtGetLinkTypeAndHopCount[('int', 'device1'), ('int', 'device2'), ('unsigned int*', 'linktype'), ('unsigned int*', 'hopcount')] #define INIT_hipExtGetLinkTypeAndHopCount_CB_ARGS_DATA(cb_data) { \ cb_data.args.hipExtGetLinkTypeAndHopCount.device1 = (int)device1; \ @@ -3696,10 +3845,10 @@ typedef struct hip_api_data_s { }; // hipExternalMemoryGetMappedMipmappedArray[('hipMipmappedArray_t*', 'mipmap'), ('hipExternalMemory_t', 'extMem'), ('const hipExternalMemoryMipmappedArrayDesc*', 'mipmapDesc')] #define INIT_hipExternalMemoryGetMappedMipmappedArray_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipExternalMemoryGetMappedMipmappedArray.mipmap = (hipMipmappedArray_t*)mipmap; \ - cb_data.args.hipExternalMemoryGetMappedMipmappedArray.extMem = (hipExternalMemory_t)extMem; \ - cb_data.args.hipExternalMemoryGetMappedMipmappedArray.mipmapDesc = (const hipExternalMemoryMipmappedArrayDesc*)mipmapDesc; \ - }; + cb_data.args.hipExternalMemoryGetMappedMipmappedArray.mipmap = (hipMipmappedArray_t*)mipmap; \ + cb_data.args.hipExternalMemoryGetMappedMipmappedArray.extMem = (hipExternalMemory_t)extMem; \ + cb_data.args.hipExternalMemoryGetMappedMipmappedArray.mipmapDesc = (const hipExternalMemoryMipmappedArrayDesc*)mipmapDesc; \ +}; // hipFree[('void*', 'ptr')] #define INIT_hipFree_CB_ARGS_DATA(cb_data) { \ cb_data.args.hipFree.ptr = (void*)ptr; \ @@ -3772,10 +3921,15 @@ typedef struct hip_api_data_s { #define INIT_hipGetDeviceFlags_CB_ARGS_DATA(cb_data) { \ cb_data.args.hipGetDeviceFlags.flags = (unsigned int*)flags; \ }; -// hipGetDeviceProperties[('hipDeviceProp_t*', 'props'), ('hipDevice_t', 'device')] -#define INIT_hipGetDeviceProperties_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipGetDeviceProperties.props = (hipDeviceProp_t*)props; \ - cb_data.args.hipGetDeviceProperties.device = (hipDevice_t)device; \ +// hipGetDevicePropertiesR0000[('hipDeviceProp_tR0000*', 'prop'), ('int', 'device')] +#define INIT_hipGetDevicePropertiesR0000_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGetDevicePropertiesR0000.prop = (hipDeviceProp_tR0000*)prop; \ + cb_data.args.hipGetDevicePropertiesR0000.device = (int)device; \ +}; +// hipGetDevicePropertiesR0600[('hipDeviceProp_tR0600*', 'prop'), ('int', 'deviceId')] +#define INIT_hipGetDevicePropertiesR0600_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGetDevicePropertiesR0600.prop = (hipDeviceProp_tR0600*)prop; \ + cb_data.args.hipGetDevicePropertiesR0600.deviceId = (int)device; \ }; // hipGetErrorString[] #define INIT_hipGetErrorString_CB_ARGS_DATA(cb_data) { \ @@ -3837,6 +3991,12 @@ typedef struct hip_api_data_s { cb_data.args.hipGraphAddEventWaitNode.numDependencies = (size_t)numDependencies; \ cb_data.args.hipGraphAddEventWaitNode.event = (hipEvent_t)event; \ }; +// hipGraphAddExternalSemaphoresSignalNode[('hipGraphNode_t*', 'pGraphNode'), ('hipGraph_t', 'graph'), ('const hipGraphNode_t*', 'pDependencies'), ('size_t', 'numDependencies'), ('const hipExternalSemaphoreSignalNodeParams*', 'nodeParams')] +#define INIT_hipGraphAddExternalSemaphoresSignalNode_CB_ARGS_DATA(cb_data) { \ +}; +// hipGraphAddExternalSemaphoresWaitNode[('hipGraphNode_t*', 'pGraphNode'), ('hipGraph_t', 'graph'), ('const hipGraphNode_t*', 'pDependencies'), ('size_t', 'numDependencies'), ('const hipExternalSemaphoreWaitNodeParams*', 'nodeParams')] +#define INIT_hipGraphAddExternalSemaphoresWaitNode_CB_ARGS_DATA(cb_data) { \ +}; // hipGraphAddHostNode[('hipGraphNode_t*', 'pGraphNode'), ('hipGraph_t', 'graph'), ('const hipGraphNode_t*', 'pDependencies'), ('size_t', 'numDependencies'), ('const hipHostNodeParams*', 'pNodeParams')] #define INIT_hipGraphAddHostNode_CB_ARGS_DATA(cb_data) { \ cb_data.args.hipGraphAddHostNode.pGraphNode = (hipGraphNode_t*)pGraphNode; \ @@ -3991,6 +4151,12 @@ typedef struct hip_api_data_s { cb_data.args.hipGraphExecEventWaitNodeSetEvent.hNode = (hipGraphNode_t)hNode; \ cb_data.args.hipGraphExecEventWaitNodeSetEvent.event = (hipEvent_t)event; \ }; +// hipGraphExecExternalSemaphoresSignalNodeSetParams[('hipGraphExec_t', 'hGraphExec'), ('hipGraphNode_t', 'hNode'), ('const hipExternalSemaphoreSignalNodeParams*', 'nodeParams')] +#define INIT_hipGraphExecExternalSemaphoresSignalNodeSetParams_CB_ARGS_DATA(cb_data) { \ +}; +// hipGraphExecExternalSemaphoresWaitNodeSetParams[('hipGraphExec_t', 'hGraphExec'), ('hipGraphNode_t', 'hNode'), ('const hipExternalSemaphoreWaitNodeParams*', 'nodeParams')] +#define INIT_hipGraphExecExternalSemaphoresWaitNodeSetParams_CB_ARGS_DATA(cb_data) { \ +}; // hipGraphExecHostNodeSetParams[('hipGraphExec_t', 'hGraphExec'), ('hipGraphNode_t', 'node'), ('const hipHostNodeParams*', 'pNodeParams')] #define INIT_hipGraphExecHostNodeSetParams_CB_ARGS_DATA(cb_data) { \ cb_data.args.hipGraphExecHostNodeSetParams.hGraphExec = (hipGraphExec_t)hGraphExec; \ @@ -4051,6 +4217,18 @@ typedef struct hip_api_data_s { cb_data.args.hipGraphExecUpdate.hErrorNode_out = (hipGraphNode_t*)hErrorNode_out; \ cb_data.args.hipGraphExecUpdate.updateResult_out = (hipGraphExecUpdateResult*)updateResult_out; \ }; +// hipGraphExternalSemaphoresSignalNodeGetParams[('hipGraphNode_t', 'hNode'), ('hipExternalSemaphoreSignalNodeParams*', 'params_out')] +#define INIT_hipGraphExternalSemaphoresSignalNodeGetParams_CB_ARGS_DATA(cb_data) { \ +}; +// hipGraphExternalSemaphoresSignalNodeSetParams[('hipGraphNode_t', 'hNode'), ('const hipExternalSemaphoreSignalNodeParams*', 'nodeParams')] +#define INIT_hipGraphExternalSemaphoresSignalNodeSetParams_CB_ARGS_DATA(cb_data) { \ +}; +// hipGraphExternalSemaphoresWaitNodeGetParams[('hipGraphNode_t', 'hNode'), ('hipExternalSemaphoreWaitNodeParams*', 'params_out')] +#define INIT_hipGraphExternalSemaphoresWaitNodeGetParams_CB_ARGS_DATA(cb_data) { \ +}; +// hipGraphExternalSemaphoresWaitNodeSetParams[('hipGraphNode_t', 'hNode'), ('const hipExternalSemaphoreWaitNodeParams*', 'nodeParams')] +#define INIT_hipGraphExternalSemaphoresWaitNodeSetParams_CB_ARGS_DATA(cb_data) { \ +}; // hipGraphGetEdges[('hipGraph_t', 'graph'), ('hipGraphNode_t*', 'from'), ('hipGraphNode_t*', 'to'), ('size_t*', 'numEdges')] #define INIT_hipGraphGetEdges_CB_ARGS_DATA(cb_data) { \ cb_data.args.hipGraphGetEdges.graph = (hipGraph_t)graph; \ @@ -5441,9 +5619,6 @@ typedef struct hip_api_data_s { cb_data.args.hipWaitExternalSemaphoresAsync.numExtSems = (unsigned int)numExtSems; \ cb_data.args.hipWaitExternalSemaphoresAsync.stream = (hipStream_t)stream; \ }; -// hipExtGetLastError[] -#define INIT_hipExtGetLastError_CB_ARGS_DATA(cb_data) { \ -}; #define INIT_CB_ARGS_DATA(cb_id, cb_data) INIT_##cb_id##_CB_ARGS_DATA(cb_data) // Macros for non-public API primitives @@ -5473,8 +5648,6 @@ typedef struct hip_api_data_s { #define INIT_hipGetTextureReference_CB_ARGS_DATA(cb_data) {}; // hipMemcpy2DArrayToArray() #define INIT_hipMemcpy2DArrayToArray_CB_ARGS_DATA(cb_data) {}; -// hipMemcpyArrayToArray() -#define INIT_hipMemcpyArrayToArray_CB_ARGS_DATA(cb_data) {}; // hipMemcpyAtoA() #define INIT_hipMemcpyAtoA_CB_ARGS_DATA(cb_data) {}; // hipMemcpyAtoD() @@ -5483,12 +5656,8 @@ typedef struct hip_api_data_s { #define INIT_hipMemcpyAtoHAsync_CB_ARGS_DATA(cb_data) {}; // hipMemcpyDtoA() #define INIT_hipMemcpyDtoA_CB_ARGS_DATA(cb_data) {}; -// hipMemcpyFromArrayAsync() -#define INIT_hipMemcpyFromArrayAsync_CB_ARGS_DATA(cb_data) {}; // hipMemcpyHtoAAsync() #define INIT_hipMemcpyHtoAAsync_CB_ARGS_DATA(cb_data) {}; -// hipMemcpyToArrayAsync() -#define INIT_hipMemcpyToArrayAsync_CB_ARGS_DATA(cb_data) {}; // hipSetValidDevices() #define INIT_hipSetValidDevices_CB_ARGS_DATA(cb_data) {}; // hipTexObjectCreate() @@ -5565,10 +5734,15 @@ static inline void hipApiArgsInit(hip_api_id_t id, hip_api_data_t* data) { if (data->args.hipArrayGetInfo.extent) data->args.hipArrayGetInfo.extent__val = *(data->args.hipArrayGetInfo.extent); if (data->args.hipArrayGetInfo.flags) data->args.hipArrayGetInfo.flags__val = *(data->args.hipArrayGetInfo.flags); break; -// hipChooseDevice[('int*', 'device'), ('const hipDeviceProp_t*', 'prop')] - case HIP_API_ID_hipChooseDevice: - if (data->args.hipChooseDevice.device) data->args.hipChooseDevice.device__val = *(data->args.hipChooseDevice.device); - if (data->args.hipChooseDevice.prop) data->args.hipChooseDevice.prop__val = *(data->args.hipChooseDevice.prop); +// hipChooseDeviceR0000[('int*', 'device'), ('const hipDeviceProp_tR0000*', 'prop')] + case HIP_API_ID_hipChooseDeviceR0000: + if (data->args.hipChooseDeviceR0000.device) data->args.hipChooseDeviceR0000.device__val = *(data->args.hipChooseDeviceR0000.device); + if (data->args.hipChooseDeviceR0000.prop) data->args.hipChooseDeviceR0000.prop__val = *(data->args.hipChooseDeviceR0000.prop); + break; +// hipChooseDeviceR0600[('int*', 'device'), ('const hipDeviceProp_tR0600*', 'prop')] + case HIP_API_ID_hipChooseDeviceR0600: + if (data->args.hipChooseDeviceR0600.device) data->args.hipChooseDeviceR0600.device__val = *(data->args.hipChooseDeviceR0600.device); + if (data->args.hipChooseDeviceR0600.prop) data->args.hipChooseDeviceR0600.prop__val = *(data->args.hipChooseDeviceR0600.prop); break; // hipConfigureCall[('dim3', 'gridDim'), ('dim3', 'blockDim'), ('size_t', 'sharedMem'), ('hipStream_t', 'stream')] case HIP_API_ID_hipConfigureCall: @@ -5765,6 +5939,20 @@ static inline void hipApiArgsInit(hip_api_id_t id, hip_api_data_t* data) { case HIP_API_ID_hipDriverGetVersion: if (data->args.hipDriverGetVersion.driverVersion) data->args.hipDriverGetVersion.driverVersion__val = *(data->args.hipDriverGetVersion.driverVersion); break; +// hipDrvGraphAddMemcpyNode[('hipGraphNode_t*', 'phGraphNode'), ('hipGraph_t', 'hGraph'), ('const hipGraphNode_t*', 'dependencies'), ('size_t', 'numDependencies'), ('const HIP_MEMCPY3D*', 'copyParams'), ('hipCtx_t', 'ctx')] + case HIP_API_ID_hipDrvGraphAddMemcpyNode: + if (data->args.hipDrvGraphAddMemcpyNode.phGraphNode) data->args.hipDrvGraphAddMemcpyNode.phGraphNode__val = *(data->args.hipDrvGraphAddMemcpyNode.phGraphNode); + if (data->args.hipDrvGraphAddMemcpyNode.dependencies) data->args.hipDrvGraphAddMemcpyNode.dependencies__val = *(data->args.hipDrvGraphAddMemcpyNode.dependencies); + if (data->args.hipDrvGraphAddMemcpyNode.copyParams) data->args.hipDrvGraphAddMemcpyNode.copyParams__val = *(data->args.hipDrvGraphAddMemcpyNode.copyParams); + break; +// hipDrvGraphMemcpyNodeGetParams[('hipGraphNode_t', 'hNode'), ('HIP_MEMCPY3D*', 'nodeParams')] + case HIP_API_ID_hipDrvGraphMemcpyNodeGetParams: + if (data->args.hipDrvGraphMemcpyNodeGetParams.nodeParams) data->args.hipDrvGraphMemcpyNodeGetParams.nodeParams__val = *(data->args.hipDrvGraphMemcpyNodeGetParams.nodeParams); + break; +// hipDrvGraphMemcpyNodeSetParams[('hipGraphNode_t', 'hNode'), ('const HIP_MEMCPY3D*', 'nodeParams')] + case HIP_API_ID_hipDrvGraphMemcpyNodeSetParams: + if (data->args.hipDrvGraphMemcpyNodeSetParams.nodeParams) data->args.hipDrvGraphMemcpyNodeSetParams.nodeParams__val = *(data->args.hipDrvGraphMemcpyNodeSetParams.nodeParams); + break; // hipDrvMemcpy2DUnaligned[('const hip_Memcpy2D*', 'pCopy')] case HIP_API_ID_hipDrvMemcpy2DUnaligned: if (data->args.hipDrvMemcpy2DUnaligned.pCopy) data->args.hipDrvMemcpy2DUnaligned.pCopy__val = *(data->args.hipDrvMemcpy2DUnaligned.pCopy); @@ -5806,6 +5994,9 @@ static inline void hipApiArgsInit(hip_api_id_t id, hip_api_data_t* data) { // hipEventSynchronize[('hipEvent_t', 'event')] case HIP_API_ID_hipEventSynchronize: break; +// hipExtGetLastError[] + case HIP_API_ID_hipExtGetLastError: + break; // hipExtGetLinkTypeAndHopCount[('int', 'device1'), ('int', 'device2'), ('unsigned int*', 'linktype'), ('unsigned int*', 'hopcount')] case HIP_API_ID_hipExtGetLinkTypeAndHopCount: if (data->args.hipExtGetLinkTypeAndHopCount.linktype) data->args.hipExtGetLinkTypeAndHopCount.linktype__val = *(data->args.hipExtGetLinkTypeAndHopCount.linktype); @@ -5844,6 +6035,7 @@ static inline void hipApiArgsInit(hip_api_id_t id, hip_api_data_t* data) { break; // hipExternalMemoryGetMappedMipmappedArray[('hipMipmappedArray_t*', 'mipmap'), ('hipExternalMemory_t', 'extMem'), ('const hipExternalMemoryMipmappedArrayDesc*', 'mipmapDesc')] case HIP_API_ID_hipExternalMemoryGetMappedMipmappedArray: + if (data->args.hipExternalMemoryGetMappedMipmappedArray.mipmap) data->args.hipExternalMemoryGetMappedMipmappedArray.mipmap__val = *(data->args.hipExternalMemoryGetMappedMipmappedArray.mipmap); if (data->args.hipExternalMemoryGetMappedMipmappedArray.mipmapDesc) data->args.hipExternalMemoryGetMappedMipmappedArray.mipmapDesc__val = *(data->args.hipExternalMemoryGetMappedMipmappedArray.mipmapDesc); break; // hipFree[('void*', 'ptr')] @@ -5899,9 +6091,13 @@ static inline void hipApiArgsInit(hip_api_id_t id, hip_api_data_t* data) { case HIP_API_ID_hipGetDeviceFlags: if (data->args.hipGetDeviceFlags.flags) data->args.hipGetDeviceFlags.flags__val = *(data->args.hipGetDeviceFlags.flags); break; -// hipGetDeviceProperties[('hipDeviceProp_t*', 'props'), ('hipDevice_t', 'device')] - case HIP_API_ID_hipGetDeviceProperties: - if (data->args.hipGetDeviceProperties.props) data->args.hipGetDeviceProperties.props__val = *(data->args.hipGetDeviceProperties.props); +// hipGetDevicePropertiesR0000[('hipDeviceProp_tR0000*', 'prop'), ('int', 'device')] + case HIP_API_ID_hipGetDevicePropertiesR0000: + if (data->args.hipGetDevicePropertiesR0000.prop) data->args.hipGetDevicePropertiesR0000.prop__val = *(data->args.hipGetDevicePropertiesR0000.prop); + break; +// hipGetDevicePropertiesR0600[('hipDeviceProp_tR0600*', 'prop'), ('int', 'deviceId')] + case HIP_API_ID_hipGetDevicePropertiesR0600: + if (data->args.hipGetDevicePropertiesR0600.prop) data->args.hipGetDevicePropertiesR0600.prop__val = *(data->args.hipGetDevicePropertiesR0600.prop); break; // hipGetErrorString[] case HIP_API_ID_hipGetErrorString: @@ -5909,9 +6105,6 @@ static inline void hipApiArgsInit(hip_api_id_t id, hip_api_data_t* data) { // hipGetLastError[] case HIP_API_ID_hipGetLastError: break; -// hipExtGetLastError[] - case HIP_API_ID_hipExtGetLastError: - break; // hipGetMipmappedArrayLevel[('hipArray_t*', 'levelArray'), ('hipMipmappedArray_const_t', 'mipmappedArray'), ('unsigned int', 'level')] case HIP_API_ID_hipGetMipmappedArrayLevel: if (data->args.hipGetMipmappedArrayLevel.levelArray) data->args.hipGetMipmappedArrayLevel.levelArray__val = *(data->args.hipGetMipmappedArrayLevel.levelArray); @@ -5949,6 +6142,18 @@ static inline void hipApiArgsInit(hip_api_id_t id, hip_api_data_t* data) { if (data->args.hipGraphAddEventWaitNode.pGraphNode) data->args.hipGraphAddEventWaitNode.pGraphNode__val = *(data->args.hipGraphAddEventWaitNode.pGraphNode); if (data->args.hipGraphAddEventWaitNode.pDependencies) data->args.hipGraphAddEventWaitNode.pDependencies__val = *(data->args.hipGraphAddEventWaitNode.pDependencies); break; +// hipGraphAddExternalSemaphoresSignalNode[('hipGraphNode_t*', 'pGraphNode'), ('hipGraph_t', 'graph'), ('const hipGraphNode_t*', 'pDependencies'), ('size_t', 'numDependencies'), ('const hipExternalSemaphoreSignalNodeParams*', 'nodeParams')] + case HIP_API_ID_hipGraphAddExternalSemaphoresSignalNode: + if (data->args.hipGraphAddExternalSemaphoresSignalNode.pGraphNode) data->args.hipGraphAddExternalSemaphoresSignalNode.pGraphNode__val = *(data->args.hipGraphAddExternalSemaphoresSignalNode.pGraphNode); + if (data->args.hipGraphAddExternalSemaphoresSignalNode.pDependencies) data->args.hipGraphAddExternalSemaphoresSignalNode.pDependencies__val = *(data->args.hipGraphAddExternalSemaphoresSignalNode.pDependencies); + if (data->args.hipGraphAddExternalSemaphoresSignalNode.nodeParams) data->args.hipGraphAddExternalSemaphoresSignalNode.nodeParams__val = *(data->args.hipGraphAddExternalSemaphoresSignalNode.nodeParams); + break; +// hipGraphAddExternalSemaphoresWaitNode[('hipGraphNode_t*', 'pGraphNode'), ('hipGraph_t', 'graph'), ('const hipGraphNode_t*', 'pDependencies'), ('size_t', 'numDependencies'), ('const hipExternalSemaphoreWaitNodeParams*', 'nodeParams')] + case HIP_API_ID_hipGraphAddExternalSemaphoresWaitNode: + if (data->args.hipGraphAddExternalSemaphoresWaitNode.pGraphNode) data->args.hipGraphAddExternalSemaphoresWaitNode.pGraphNode__val = *(data->args.hipGraphAddExternalSemaphoresWaitNode.pGraphNode); + if (data->args.hipGraphAddExternalSemaphoresWaitNode.pDependencies) data->args.hipGraphAddExternalSemaphoresWaitNode.pDependencies__val = *(data->args.hipGraphAddExternalSemaphoresWaitNode.pDependencies); + if (data->args.hipGraphAddExternalSemaphoresWaitNode.nodeParams) data->args.hipGraphAddExternalSemaphoresWaitNode.nodeParams__val = *(data->args.hipGraphAddExternalSemaphoresWaitNode.nodeParams); + break; // hipGraphAddHostNode[('hipGraphNode_t*', 'pGraphNode'), ('hipGraph_t', 'graph'), ('const hipGraphNode_t*', 'pDependencies'), ('size_t', 'numDependencies'), ('const hipHostNodeParams*', 'pNodeParams')] case HIP_API_ID_hipGraphAddHostNode: if (data->args.hipGraphAddHostNode.pGraphNode) data->args.hipGraphAddHostNode.pGraphNode__val = *(data->args.hipGraphAddHostNode.pGraphNode); @@ -6047,6 +6252,14 @@ static inline void hipApiArgsInit(hip_api_id_t id, hip_api_data_t* data) { // hipGraphExecEventWaitNodeSetEvent[('hipGraphExec_t', 'hGraphExec'), ('hipGraphNode_t', 'hNode'), ('hipEvent_t', 'event')] case HIP_API_ID_hipGraphExecEventWaitNodeSetEvent: break; +// hipGraphExecExternalSemaphoresSignalNodeSetParams[('hipGraphExec_t', 'hGraphExec'), ('hipGraphNode_t', 'hNode'), ('const hipExternalSemaphoreSignalNodeParams*', 'nodeParams')] + case HIP_API_ID_hipGraphExecExternalSemaphoresSignalNodeSetParams: + if (data->args.hipGraphExecExternalSemaphoresSignalNodeSetParams.nodeParams) data->args.hipGraphExecExternalSemaphoresSignalNodeSetParams.nodeParams__val = *(data->args.hipGraphExecExternalSemaphoresSignalNodeSetParams.nodeParams); + break; +// hipGraphExecExternalSemaphoresWaitNodeSetParams[('hipGraphExec_t', 'hGraphExec'), ('hipGraphNode_t', 'hNode'), ('const hipExternalSemaphoreWaitNodeParams*', 'nodeParams')] + case HIP_API_ID_hipGraphExecExternalSemaphoresWaitNodeSetParams: + if (data->args.hipGraphExecExternalSemaphoresWaitNodeSetParams.nodeParams) data->args.hipGraphExecExternalSemaphoresWaitNodeSetParams.nodeParams__val = *(data->args.hipGraphExecExternalSemaphoresWaitNodeSetParams.nodeParams); + break; // hipGraphExecHostNodeSetParams[('hipGraphExec_t', 'hGraphExec'), ('hipGraphNode_t', 'node'), ('const hipHostNodeParams*', 'pNodeParams')] case HIP_API_ID_hipGraphExecHostNodeSetParams: if (data->args.hipGraphExecHostNodeSetParams.pNodeParams) data->args.hipGraphExecHostNodeSetParams.pNodeParams__val = *(data->args.hipGraphExecHostNodeSetParams.pNodeParams); @@ -6077,6 +6290,22 @@ static inline void hipApiArgsInit(hip_api_id_t id, hip_api_data_t* data) { if (data->args.hipGraphExecUpdate.hErrorNode_out) data->args.hipGraphExecUpdate.hErrorNode_out__val = *(data->args.hipGraphExecUpdate.hErrorNode_out); if (data->args.hipGraphExecUpdate.updateResult_out) data->args.hipGraphExecUpdate.updateResult_out__val = *(data->args.hipGraphExecUpdate.updateResult_out); break; +// hipGraphExternalSemaphoresSignalNodeGetParams[('hipGraphNode_t', 'hNode'), ('hipExternalSemaphoreSignalNodeParams*', 'params_out')] + case HIP_API_ID_hipGraphExternalSemaphoresSignalNodeGetParams: + if (data->args.hipGraphExternalSemaphoresSignalNodeGetParams.params_out) data->args.hipGraphExternalSemaphoresSignalNodeGetParams.params_out__val = *(data->args.hipGraphExternalSemaphoresSignalNodeGetParams.params_out); + break; +// hipGraphExternalSemaphoresSignalNodeSetParams[('hipGraphNode_t', 'hNode'), ('const hipExternalSemaphoreSignalNodeParams*', 'nodeParams')] + case HIP_API_ID_hipGraphExternalSemaphoresSignalNodeSetParams: + if (data->args.hipGraphExternalSemaphoresSignalNodeSetParams.nodeParams) data->args.hipGraphExternalSemaphoresSignalNodeSetParams.nodeParams__val = *(data->args.hipGraphExternalSemaphoresSignalNodeSetParams.nodeParams); + break; +// hipGraphExternalSemaphoresWaitNodeGetParams[('hipGraphNode_t', 'hNode'), ('hipExternalSemaphoreWaitNodeParams*', 'params_out')] + case HIP_API_ID_hipGraphExternalSemaphoresWaitNodeGetParams: + if (data->args.hipGraphExternalSemaphoresWaitNodeGetParams.params_out) data->args.hipGraphExternalSemaphoresWaitNodeGetParams.params_out__val = *(data->args.hipGraphExternalSemaphoresWaitNodeGetParams.params_out); + break; +// hipGraphExternalSemaphoresWaitNodeSetParams[('hipGraphNode_t', 'hNode'), ('const hipExternalSemaphoreWaitNodeParams*', 'nodeParams')] + case HIP_API_ID_hipGraphExternalSemaphoresWaitNodeSetParams: + if (data->args.hipGraphExternalSemaphoresWaitNodeSetParams.nodeParams) data->args.hipGraphExternalSemaphoresWaitNodeSetParams.nodeParams__val = *(data->args.hipGraphExternalSemaphoresWaitNodeSetParams.nodeParams); + break; // hipGraphGetEdges[('hipGraph_t', 'graph'), ('hipGraphNode_t*', 'from'), ('hipGraphNode_t*', 'to'), ('size_t*', 'numEdges')] case HIP_API_ID_hipGraphGetEdges: if (data->args.hipGraphGetEdges.from) data->args.hipGraphGetEdges.from__val = *(data->args.hipGraphGetEdges.from); @@ -7003,12 +7232,20 @@ static inline const char* hipApiString(hip_api_id_t id, const hip_api_data_t* da oss << ", array="; roctracer::hip_support::detail::operator<<(oss, data->args.hipArrayGetInfo.array); oss << ")"; break; - case HIP_API_ID_hipChooseDevice: - oss << "hipChooseDevice("; - if (data->args.hipChooseDevice.device == NULL) oss << "device=NULL"; - else { oss << "device="; roctracer::hip_support::detail::operator<<(oss, data->args.hipChooseDevice.device__val); } - if (data->args.hipChooseDevice.prop == NULL) oss << ", prop=NULL"; - else { oss << ", prop="; roctracer::hip_support::detail::operator<<(oss, data->args.hipChooseDevice.prop__val); } + case HIP_API_ID_hipChooseDeviceR0000: + oss << "hipChooseDeviceR0000("; + if (data->args.hipChooseDeviceR0000.device == NULL) oss << "device=NULL"; + else { oss << "device="; roctracer::hip_support::detail::operator<<(oss, data->args.hipChooseDeviceR0000.device__val); } + if (data->args.hipChooseDeviceR0000.prop == NULL) oss << ", prop=NULL"; + else { oss << ", prop="; roctracer::hip_support::detail::operator<<(oss, data->args.hipChooseDeviceR0000.prop__val); } + oss << ")"; + break; + case HIP_API_ID_hipChooseDeviceR0600: + oss << "hipChooseDeviceR0600("; + if (data->args.hipChooseDeviceR0600.device == NULL) oss << "device=NULL"; + else { oss << "device="; roctracer::hip_support::detail::operator<<(oss, data->args.hipChooseDeviceR0600.device__val); } + if (data->args.hipChooseDeviceR0600.prop == NULL) oss << ", prop=NULL"; + else { oss << ", prop="; roctracer::hip_support::detail::operator<<(oss, data->args.hipChooseDeviceR0600.prop__val); } oss << ")"; break; case HIP_API_ID_hipConfigureCall: @@ -7351,6 +7588,33 @@ static inline const char* hipApiString(hip_api_id_t id, const hip_api_data_t* da else { oss << "driverVersion="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDriverGetVersion.driverVersion__val); } oss << ")"; break; + case HIP_API_ID_hipDrvGraphAddMemcpyNode: + oss << "hipDrvGraphAddMemcpyNode("; + if (data->args.hipDrvGraphAddMemcpyNode.phGraphNode == NULL) oss << "phGraphNode=NULL"; + else { oss << "phGraphNode="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDrvGraphAddMemcpyNode.phGraphNode__val); } + oss << ", hGraph="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDrvGraphAddMemcpyNode.hGraph); + if (data->args.hipDrvGraphAddMemcpyNode.dependencies == NULL) oss << ", dependencies=NULL"; + else { oss << ", dependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDrvGraphAddMemcpyNode.dependencies__val); } + oss << ", numDependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDrvGraphAddMemcpyNode.numDependencies); + if (data->args.hipDrvGraphAddMemcpyNode.copyParams == NULL) oss << ", copyParams=NULL"; + else { oss << ", copyParams="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDrvGraphAddMemcpyNode.copyParams__val); } + oss << ", ctx="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDrvGraphAddMemcpyNode.ctx); + oss << ")"; + break; + case HIP_API_ID_hipDrvGraphMemcpyNodeGetParams: + oss << "hipDrvGraphMemcpyNodeGetParams("; + oss << "hNode="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDrvGraphMemcpyNodeGetParams.hNode); + if (data->args.hipDrvGraphMemcpyNodeGetParams.nodeParams == NULL) oss << ", nodeParams=NULL"; + else { oss << ", nodeParams="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDrvGraphMemcpyNodeGetParams.nodeParams__val); } + oss << ")"; + break; + case HIP_API_ID_hipDrvGraphMemcpyNodeSetParams: + oss << "hipDrvGraphMemcpyNodeSetParams("; + oss << "hNode="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDrvGraphMemcpyNodeSetParams.hNode); + if (data->args.hipDrvGraphMemcpyNodeSetParams.nodeParams == NULL) oss << ", nodeParams=NULL"; + else { oss << ", nodeParams="; roctracer::hip_support::detail::operator<<(oss, data->args.hipDrvGraphMemcpyNodeSetParams.nodeParams__val); } + oss << ")"; + break; case HIP_API_ID_hipDrvMemcpy2DUnaligned: oss << "hipDrvMemcpy2DUnaligned("; if (data->args.hipDrvMemcpy2DUnaligned.pCopy == NULL) oss << "pCopy=NULL"; @@ -7422,6 +7686,10 @@ static inline const char* hipApiString(hip_api_id_t id, const hip_api_data_t* da oss << "event="; roctracer::hip_support::detail::operator<<(oss, data->args.hipEventSynchronize.event); oss << ")"; break; + case HIP_API_ID_hipExtGetLastError: + oss << "hipExtGetLastError("; + oss << ")"; + break; case HIP_API_ID_hipExtGetLinkTypeAndHopCount: oss << "hipExtGetLinkTypeAndHopCount("; oss << "device1="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExtGetLinkTypeAndHopCount.device1); @@ -7510,12 +7778,13 @@ static inline const char* hipApiString(hip_api_id_t id, const hip_api_data_t* da break; case HIP_API_ID_hipExternalMemoryGetMappedMipmappedArray: oss << "hipExternalMemoryGetMappedMipmappedArray("; - oss << "mipmap="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExternalMemoryGetMappedMipmappedArray.mipmap); + if (data->args.hipExternalMemoryGetMappedMipmappedArray.mipmap == NULL) oss << "mipmap=NULL"; + else { oss << "mipmap="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExternalMemoryGetMappedMipmappedArray.mipmap__val); } oss << ", extMem="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExternalMemoryGetMappedMipmappedArray.extMem); if (data->args.hipExternalMemoryGetMappedMipmappedArray.mipmapDesc == NULL) oss << ", mipmapDesc=NULL"; else { oss << ", mipmapDesc="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExternalMemoryGetMappedMipmappedArray.mipmapDesc__val); } oss << ")"; - break; + break; case HIP_API_ID_hipFree: oss << "hipFree("; oss << "ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipFree.ptr); @@ -7611,11 +7880,18 @@ static inline const char* hipApiString(hip_api_id_t id, const hip_api_data_t* da else { oss << "flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGetDeviceFlags.flags__val); } oss << ")"; break; - case HIP_API_ID_hipGetDeviceProperties: - oss << "hipGetDeviceProperties("; - if (data->args.hipGetDeviceProperties.props == NULL) oss << "props=NULL"; - else { oss << "props="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGetDeviceProperties.props__val); } - oss << ", device="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGetDeviceProperties.device); + case HIP_API_ID_hipGetDevicePropertiesR0000: + oss << "hipGetDevicePropertiesR0000("; + if (data->args.hipGetDevicePropertiesR0000.prop == NULL) oss << "prop=NULL"; + else { oss << "prop="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGetDevicePropertiesR0000.prop__val); } + oss << ", device="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGetDevicePropertiesR0000.device); + oss << ")"; + break; + case HIP_API_ID_hipGetDevicePropertiesR0600: + oss << "hipGetDevicePropertiesR0600("; + if (data->args.hipGetDevicePropertiesR0600.prop == NULL) oss << "prop=NULL"; + else { oss << "prop="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGetDevicePropertiesR0600.prop__val); } + oss << ", deviceId="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGetDevicePropertiesR0600.deviceId); oss << ")"; break; case HIP_API_ID_hipGetErrorString: @@ -7626,10 +7902,6 @@ static inline const char* hipApiString(hip_api_id_t id, const hip_api_data_t* da oss << "hipGetLastError("; oss << ")"; break; - case HIP_API_ID_hipExtGetLastError: - oss << "hipExtGetLastError("; - oss << ")"; - break; case HIP_API_ID_hipGetMipmappedArrayLevel: oss << "hipGetMipmappedArrayLevel("; if (data->args.hipGetMipmappedArrayLevel.levelArray == NULL) oss << "levelArray=NULL"; @@ -7705,6 +7977,30 @@ static inline const char* hipApiString(hip_api_id_t id, const hip_api_data_t* da oss << ", event="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddEventWaitNode.event); oss << ")"; break; + case HIP_API_ID_hipGraphAddExternalSemaphoresSignalNode: + oss << "hipGraphAddExternalSemaphoresSignalNode("; + if (data->args.hipGraphAddExternalSemaphoresSignalNode.pGraphNode == NULL) oss << "pGraphNode=NULL"; + else { oss << "pGraphNode="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddExternalSemaphoresSignalNode.pGraphNode__val); } + oss << ", graph="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddExternalSemaphoresSignalNode.graph); + if (data->args.hipGraphAddExternalSemaphoresSignalNode.pDependencies == NULL) oss << ", pDependencies=NULL"; + else { oss << ", pDependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddExternalSemaphoresSignalNode.pDependencies__val); } + oss << ", numDependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddExternalSemaphoresSignalNode.numDependencies); + if (data->args.hipGraphAddExternalSemaphoresSignalNode.nodeParams == NULL) oss << ", nodeParams=NULL"; + else { oss << ", nodeParams="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddExternalSemaphoresSignalNode.nodeParams__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphAddExternalSemaphoresWaitNode: + oss << "hipGraphAddExternalSemaphoresWaitNode("; + if (data->args.hipGraphAddExternalSemaphoresWaitNode.pGraphNode == NULL) oss << "pGraphNode=NULL"; + else { oss << "pGraphNode="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddExternalSemaphoresWaitNode.pGraphNode__val); } + oss << ", graph="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddExternalSemaphoresWaitNode.graph); + if (data->args.hipGraphAddExternalSemaphoresWaitNode.pDependencies == NULL) oss << ", pDependencies=NULL"; + else { oss << ", pDependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddExternalSemaphoresWaitNode.pDependencies__val); } + oss << ", numDependencies="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddExternalSemaphoresWaitNode.numDependencies); + if (data->args.hipGraphAddExternalSemaphoresWaitNode.nodeParams == NULL) oss << ", nodeParams=NULL"; + else { oss << ", nodeParams="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphAddExternalSemaphoresWaitNode.nodeParams__val); } + oss << ")"; + break; case HIP_API_ID_hipGraphAddHostNode: oss << "hipGraphAddHostNode("; if (data->args.hipGraphAddHostNode.pGraphNode == NULL) oss << "pGraphNode=NULL"; @@ -7911,6 +8207,22 @@ static inline const char* hipApiString(hip_api_id_t id, const hip_api_data_t* da oss << ", event="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecEventWaitNodeSetEvent.event); oss << ")"; break; + case HIP_API_ID_hipGraphExecExternalSemaphoresSignalNodeSetParams: + oss << "hipGraphExecExternalSemaphoresSignalNodeSetParams("; + oss << "hGraphExec="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecExternalSemaphoresSignalNodeSetParams.hGraphExec); + oss << ", hNode="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecExternalSemaphoresSignalNodeSetParams.hNode); + if (data->args.hipGraphExecExternalSemaphoresSignalNodeSetParams.nodeParams == NULL) oss << ", nodeParams=NULL"; + else { oss << ", nodeParams="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecExternalSemaphoresSignalNodeSetParams.nodeParams__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphExecExternalSemaphoresWaitNodeSetParams: + oss << "hipGraphExecExternalSemaphoresWaitNodeSetParams("; + oss << "hGraphExec="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecExternalSemaphoresWaitNodeSetParams.hGraphExec); + oss << ", hNode="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecExternalSemaphoresWaitNodeSetParams.hNode); + if (data->args.hipGraphExecExternalSemaphoresWaitNodeSetParams.nodeParams == NULL) oss << ", nodeParams=NULL"; + else { oss << ", nodeParams="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecExternalSemaphoresWaitNodeSetParams.nodeParams__val); } + oss << ")"; + break; case HIP_API_ID_hipGraphExecHostNodeSetParams: oss << "hipGraphExecHostNodeSetParams("; oss << "hGraphExec="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecHostNodeSetParams.hGraphExec); @@ -7985,6 +8297,34 @@ static inline const char* hipApiString(hip_api_id_t id, const hip_api_data_t* da else { oss << ", updateResult_out="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExecUpdate.updateResult_out__val); } oss << ")"; break; + case HIP_API_ID_hipGraphExternalSemaphoresSignalNodeGetParams: + oss << "hipGraphExternalSemaphoresSignalNodeGetParams("; + oss << "hNode="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExternalSemaphoresSignalNodeGetParams.hNode); + if (data->args.hipGraphExternalSemaphoresSignalNodeGetParams.params_out == NULL) oss << ", params_out=NULL"; + else { oss << ", params_out="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExternalSemaphoresSignalNodeGetParams.params_out__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphExternalSemaphoresSignalNodeSetParams: + oss << "hipGraphExternalSemaphoresSignalNodeSetParams("; + oss << "hNode="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExternalSemaphoresSignalNodeSetParams.hNode); + if (data->args.hipGraphExternalSemaphoresSignalNodeSetParams.nodeParams == NULL) oss << ", nodeParams=NULL"; + else { oss << ", nodeParams="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExternalSemaphoresSignalNodeSetParams.nodeParams__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphExternalSemaphoresWaitNodeGetParams: + oss << "hipGraphExternalSemaphoresWaitNodeGetParams("; + oss << "hNode="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExternalSemaphoresWaitNodeGetParams.hNode); + if (data->args.hipGraphExternalSemaphoresWaitNodeGetParams.params_out == NULL) oss << ", params_out=NULL"; + else { oss << ", params_out="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExternalSemaphoresWaitNodeGetParams.params_out__val); } + oss << ")"; + break; + case HIP_API_ID_hipGraphExternalSemaphoresWaitNodeSetParams: + oss << "hipGraphExternalSemaphoresWaitNodeSetParams("; + oss << "hNode="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExternalSemaphoresWaitNodeSetParams.hNode); + if (data->args.hipGraphExternalSemaphoresWaitNodeSetParams.nodeParams == NULL) oss << ", nodeParams=NULL"; + else { oss << ", nodeParams="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphExternalSemaphoresWaitNodeSetParams.nodeParams__val); } + oss << ")"; + break; case HIP_API_ID_hipGraphGetEdges: oss << "hipGraphGetEdges("; oss << "graph="; roctracer::hip_support::detail::operator<<(oss, data->args.hipGraphGetEdges.graph); diff --git a/hipamd/src/CMakeLists.txt b/hipamd/src/CMakeLists.txt index fb8ad70bc..5e96c42da 100644 --- a/hipamd/src/CMakeLists.txt +++ b/hipamd/src/CMakeLists.txt @@ -212,10 +212,12 @@ if(USE_PROF_API) set(PROF_API_STR "${PROJECT_BINARY_DIR}/include/hip/amd_detail/hip_prof_str.h") set(PROF_API_STR_IN "${CMAKE_SOURCE_DIR}/hipamd/include/hip/amd_detail/hip_prof_str.h") set(PROF_API_HDR "${HIP_COMMON_INCLUDE_DIR}/hip/hip_runtime_api.h") + set(PROF_GL_HDR "${CMAKE_SOURCE_DIR}/hipamd/include/hip/amd_detail/amd_hip_gl_interop.h") + set(PROF_API_DEPRECATED "${HIP_COMMON_INCLUDE_DIR}/hip/hip_deprecated.h") set(PROF_API_SRC "${CMAKE_CURRENT_SOURCE_DIR}") set(PROF_API_GEN "${CMAKE_CURRENT_SOURCE_DIR}/hip_prof_gen.py") set(PROF_API_LOG "${PROJECT_BINARY_DIR}/hip_prof_gen.log.txt") - + set(PROF_API_NEWHDR "${PROJECT_BINARY_DIR}/new_header.h") find_package(Python3 COMPONENTS Interpreter REQUIRED) execute_process(COMMAND ${Python3_EXECUTABLE} -c "import CppHeaderParser" @@ -229,14 +231,29 @@ if(USE_PROF_API) ") endif() + add_custom_command(OUTPUT ${PROF_API_NEWHDR}.i + COMMAND ${CMAKE_COMMAND} -E cat ${PROF_API_HDR} ${PROF_GL_HDR} > ${PROF_API_NEWHDR} + COMMAND ${CMAKE_C_COMPILER} + "-D$,;-D>" + "-I$,;-I>" + "-DHIP_INCLUDE_HIP_HIP_RUNTIME_PT_API_H=1" + ${c_flags} + $ + ${CPP_EXTRA_C_FLAGS} + -E ${PROF_API_NEWHDR} -o ${PROF_API_NEWHDR}.i + COMMAND_EXPAND_LISTS VERBATIM + IMPLICIT_DEPENDS C ${PROF_API_HDR} ${PROF_GL_HDR} ${PROF_API_DEPRECATED} + DEPENDS ${PROF_API_HDR} ${PROF_GL_HDR} ${PROF_API_DEPRECATED} + COMMENT "Generating new header from hip_runtime_api.h") + add_custom_command(OUTPUT ${PROF_API_STR} - COMMAND ${Python3_EXECUTABLE} ${PROF_API_GEN} -v -t --priv ${PROF_API_HDR} ${PROF_API_SRC} ${PROF_API_STR_IN} ${PROF_API_STR} - DEPENDS ${PROF_API_STR_IN} ${PROF_API_HDR} ${PROF_API_GEN} + COMMAND ${Python3_EXECUTABLE} ${PROF_API_GEN} -v -t --priv ${PROF_API_NEWHDR}.i ${PROF_API_SRC} ${PROF_API_STR_IN} ${PROF_API_STR} + DEPENDS ${PROF_API_STR_IN} ${PROF_API_NEWHDR}.i ${PROF_API_GEN} COMMENT "Generating profiling primitives: ${PROF_API_STR}") add_custom_target(gen-prof-api-str-header ALL DEPENDS ${PROF_API_STR} - SOURCES ${PROF_API_HDR}) + SOURCES ${PROF_API_NEWHDR}.i) set_target_properties(amdhip64 PROPERTIES PUBLIC_HEADER ${PROF_API_STR}) diff --git a/hipamd/src/amdhip.def b/hipamd/src/amdhip.def index 975181e4f..4436713fa 100644 --- a/hipamd/src/amdhip.def +++ b/hipamd/src/amdhip.def @@ -1,5 +1,7 @@ EXPORTS hipChooseDevice +hipChooseDeviceR0000 +hipChooseDeviceR0600 hipCtxCreate hipCtxDestroy hipCtxDisablePeerAccess @@ -63,6 +65,8 @@ hipFuncSetSharedMemConfig hipGetDevice hipGetDeviceCount hipGetDeviceProperties +hipGetDevicePropertiesR0000 +hipGetDevicePropertiesR0600 hipGetErrorName hipGetErrorString hipGetLastError diff --git a/hipamd/src/hip_device.cpp b/hipamd/src/hip_device.cpp index 268e2ca81..947460c16 100644 --- a/hipamd/src/hip_device.cpp +++ b/hipamd/src/hip_device.cpp @@ -19,10 +19,14 @@ THE SOFTWARE. */ #include +#include #include "hip_internal.hpp" #include "hip_mempool_impl.hpp" +#undef hipGetDeviceProperties +#undef hipDeviceProp_t + namespace hip { // ================================================================================================ @@ -139,12 +143,12 @@ Device::~Device() { graph_mem_pool_->release(); } - if (null_stream_!= nullptr) { + if (null_stream_ != nullptr) { hip::Stream::Destroy(null_stream_); } } -} +} // namespace hip void ihipDestroyDevice() { for (auto deviceHandle : g_devices) { @@ -171,8 +175,7 @@ hipError_t hipDeviceGet(hipDevice_t* device, int deviceId) { HIP_RETURN(ihipDeviceGet(device, deviceId)); } -hipError_t hipDeviceTotalMem (size_t *bytes, hipDevice_t device) { - +hipError_t hipDeviceTotalMem(size_t* bytes, hipDevice_t device) { HIP_INIT_API(hipDeviceTotalMem, bytes, device); if (device < 0 || static_cast(device) >= g_devices.size()) { @@ -191,8 +194,7 @@ hipError_t hipDeviceTotalMem (size_t *bytes, hipDevice_t device) { HIP_RETURN(hipSuccess); } -hipError_t hipDeviceComputeCapability(int *major, int *minor, hipDevice_t device) { - +hipError_t hipDeviceComputeCapability(int* major, int* minor, hipDevice_t device) { HIP_INIT_API(hipDeviceComputeCapability, major, minor, device); if (device < 0 || static_cast(device) >= g_devices.size()) { @@ -232,8 +234,7 @@ hipError_t ihipDeviceGetCount(int* count) { return hipSuccess; } -hipError_t hipDeviceGetName(char *name, int len, hipDevice_t device) { - +hipError_t hipDeviceGetName(char* name, int len, hipDevice_t device) { HIP_INIT_API(hipDeviceGetName, (void*)name, len, device); if (device < 0 || static_cast(device) >= g_devices.size()) { @@ -275,7 +276,7 @@ hipError_t hipDeviceGetUuid(hipUUID* uuid, hipDevice_t device) { HIP_RETURN(hipSuccess); } -hipError_t ihipGetDeviceProperties(hipDeviceProp_t* props, hipDevice_t device) { +hipError_t ihipGetDeviceProperties(hipDeviceProp_tR0600* props, int device) { if (props == nullptr) { return hipErrorInvalidValue; } @@ -286,8 +287,199 @@ hipError_t ihipGetDeviceProperties(hipDeviceProp_t* props, hipDevice_t device) { auto* deviceHandle = g_devices[device]->devices()[0]; constexpr auto int32_max = static_cast(std::numeric_limits::max()); - constexpr auto uint16_max = static_cast(std::numeric_limits::max())+1; - hipDeviceProp_t deviceProps = {0}; + constexpr auto uint16_max = static_cast(std::numeric_limits::max()) + 1; + hipDeviceProp_tR0600 deviceProps = {0}; + + const auto& info = deviceHandle->info(); + const auto& isa = deviceHandle->isa(); + ::strncpy(deviceProps.name, info.boardName_, sizeof(info.boardName_)); + ::strncpy(deviceProps.uuid.bytes, info.uuid_, sizeof(info.uuid_)); + deviceProps.totalGlobalMem = info.globalMemSize_; + deviceProps.sharedMemPerBlock = info.localMemSizePerCU_; + deviceProps.sharedMemPerMultiprocessor = info.localMemSizePerCU_ * info.numRTCUs_; + deviceProps.regsPerBlock = info.availableRegistersPerCU_; + deviceProps.warpSize = info.wavefrontWidth_; + deviceProps.maxThreadsPerBlock = info.maxWorkGroupSize_; + deviceProps.maxThreadsDim[0] = info.maxWorkItemSizes_[0]; + deviceProps.maxThreadsDim[1] = info.maxWorkItemSizes_[1]; + deviceProps.maxThreadsDim[2] = info.maxWorkItemSizes_[2]; + deviceProps.maxGridSize[0] = int32_max; + deviceProps.maxGridSize[1] = uint16_max; + deviceProps.maxGridSize[2] = uint16_max; + deviceProps.clockRate = info.maxEngineClockFrequency_ * 1000; + deviceProps.memoryClockRate = info.maxMemoryClockFrequency_ * 1000; + deviceProps.memoryBusWidth = info.globalMemChannels_; + deviceProps.totalConstMem = std::min(info.maxConstantBufferSize_, int32_max); + deviceProps.major = isa.versionMajor(); + deviceProps.minor = isa.versionMinor(); + deviceProps.multiProcessorCount = info.maxComputeUnits_; + deviceProps.l2CacheSize = info.l2CacheSize_; + deviceProps.maxThreadsPerMultiProcessor = info.maxThreadsPerCU_; + deviceProps.maxBlocksPerMultiProcessor = int(info.maxThreadsPerCU_ / info.maxWorkGroupSize_); + deviceProps.computeMode = 0; + deviceProps.clockInstructionRate = info.timeStampFrequency_; + deviceProps.arch.hasGlobalInt32Atomics = 1; + deviceProps.arch.hasGlobalFloatAtomicExch = 1; + deviceProps.arch.hasSharedInt32Atomics = 1; + deviceProps.arch.hasSharedFloatAtomicExch = 1; + deviceProps.arch.hasFloatAtomicAdd = 1; + deviceProps.arch.hasGlobalInt64Atomics = 1; + deviceProps.arch.hasSharedInt64Atomics = 1; + deviceProps.hostNativeAtomicSupported = info.pcie_atomics_ ? 1 : 0; + deviceProps.arch.hasDoubles = 1; + deviceProps.arch.hasWarpVote = 1; + deviceProps.arch.hasWarpBallot = 1; + deviceProps.arch.hasWarpShuffle = 1; + deviceProps.arch.hasFunnelShift = 0; + deviceProps.arch.hasThreadFenceSystem = 1; + deviceProps.arch.hasSyncThreadsExt = 0; + deviceProps.arch.hasSurfaceFuncs = 0; + deviceProps.arch.has3dGrid = 1; + deviceProps.arch.hasDynamicParallelism = 0; + deviceProps.concurrentKernels = 1; + deviceProps.pciDomainID = info.pciDomainID; + deviceProps.pciBusID = info.deviceTopology_.pcie.bus; + deviceProps.pciDeviceID = info.deviceTopology_.pcie.device; + deviceProps.maxSharedMemoryPerMultiProcessor = info.localMemSizePerCU_; + deviceProps.canMapHostMemory = 1; + deviceProps.regsPerMultiprocessor = info.availableRegistersPerCU_; + sprintf(deviceProps.gcnArchName, "%s", isa.targetId()); + deviceProps.cooperativeLaunch = info.cooperativeGroups_; + deviceProps.cooperativeMultiDeviceLaunch = info.cooperativeMultiDeviceGroups_; + + deviceProps.cooperativeMultiDeviceUnmatchedFunc = info.cooperativeMultiDeviceGroups_; + deviceProps.cooperativeMultiDeviceUnmatchedGridDim = info.cooperativeMultiDeviceGroups_; + deviceProps.cooperativeMultiDeviceUnmatchedBlockDim = info.cooperativeMultiDeviceGroups_; + deviceProps.cooperativeMultiDeviceUnmatchedSharedMem = info.cooperativeMultiDeviceGroups_; + + deviceProps.maxTexture1DLinear = + std::min(16 * info.imageMaxBufferSize_, int32_max); // Max pixel size is 16 bytes + deviceProps.maxTexture1DMipmap = std::min(16 * info.imageMaxBufferSize_, int32_max); + deviceProps.maxTexture1D = deviceProps.maxSurface1D = std::min(info.image1DMaxWidth_, int32_max); + deviceProps.maxTexture2D[0] = deviceProps.maxSurface2D[0] = + std::min(info.image2DMaxWidth_, int32_max); + deviceProps.maxTexture2D[1] = deviceProps.maxSurface2D[1] = + std::min(info.image2DMaxHeight_, int32_max); + deviceProps.maxTexture3D[0] = deviceProps.maxSurface3D[0] = + std::min(info.image3DMaxWidth_, int32_max); + deviceProps.maxTexture3D[1] = deviceProps.maxSurface3D[1] = + std::min(info.image3DMaxHeight_, int32_max); + deviceProps.maxTexture3D[2] = deviceProps.maxSurface3D[2] = + std::min(info.image3DMaxDepth_, int32_max); + deviceProps.maxTexture1DLayered[0] = deviceProps.maxSurface1DLayered[0] = + std::min(info.image1DAMaxWidth_, int32_max); + deviceProps.maxTexture1DLayered[1] = deviceProps.maxSurface1DLayered[1] = + std::min(info.imageMaxArraySize_, int32_max); + deviceProps.maxTexture2DLayered[0] = deviceProps.maxSurface2DLayered[0] = + std::min(info.image2DAMaxWidth_[0], int32_max); + deviceProps.maxTexture2DLayered[1] = deviceProps.maxSurface2DLayered[1] = + std::min(info.image2DAMaxWidth_[1], int32_max); + deviceProps.maxTexture2DLayered[2] = deviceProps.maxSurface2DLayered[2] = + std::min(info.imageMaxArraySize_, int32_max); + deviceProps.hdpMemFlushCntl = info.hdpMemFlushCntl; + deviceProps.hdpRegFlushCntl = info.hdpRegFlushCntl; + + deviceProps.memPitch = std::min(info.maxMemAllocSize_, int32_max); + deviceProps.textureAlignment = deviceProps.surfaceAlignment = info.imageBaseAddressAlignment_; + deviceProps.texturePitchAlignment = info.imagePitchAlignment_; + deviceProps.kernelExecTimeoutEnabled = 0; + deviceProps.ECCEnabled = info.errorCorrectionSupport_ ? 1 : 0; + deviceProps.isLargeBar = info.largeBar_ ? 1 : 0; + deviceProps.asicRevision = info.asicRevision_; + deviceProps.ipcEventSupported = 1; + deviceProps.streamPrioritiesSupported = 1; + deviceProps.multiGpuBoardGroupID = info.deviceTopology_.pcie.device; + + // HMM capabilities + deviceProps.asyncEngineCount = info.numAsyncQueues_; + deviceProps.deviceOverlap = (info.numAsyncQueues_ > 0) ? 1 : 0; + deviceProps.unifiedAddressing = info.hmmDirectHostAccess_; + deviceProps.managedMemory = info.hmmSupported_; + deviceProps.concurrentManagedAccess = info.hmmSupported_; + deviceProps.directManagedMemAccessFromHost = info.hmmDirectHostAccess_; + deviceProps.canUseHostPointerForRegisteredMem = info.hostUnifiedMemory_; + deviceProps.pageableMemoryAccess = info.hmmCpuMemoryAccessible_; + deviceProps.hostRegisterSupported = info.hostUnifiedMemory_; + deviceProps.pageableMemoryAccessUsesHostPageTables = info.hostUnifiedMemory_; + + // Mem pool + deviceProps.memoryPoolsSupported = HIP_MEM_POOL_SUPPORT; + deviceProps.memoryPoolSupportedHandleTypes = 0; + + // Caching behavior + deviceProps.globalL1CacheSupported = 1; + deviceProps.localL1CacheSupported = 1; + deviceProps.persistingL2CacheMaxSize = info.l2CacheSize_; + deviceProps.reservedSharedMemPerBlock = 0; + deviceProps.sharedMemPerBlockOptin = 0; + + // Unsupported features + // Single to double precision perf ratio + deviceProps.singleToDoublePrecisionPerfRatio = 0; + // Flag hipHostRegisterReadOnly + deviceProps.hostRegisterReadOnlySupported = 0; + // Compute preemption + deviceProps.computePreemptionSupported = 0; + // Cubemaps + deviceProps.maxTextureCubemap = 0; + deviceProps.maxTextureCubemapLayered[0] = 0; + deviceProps.maxTextureCubemapLayered[1] = 0; + deviceProps.maxSurfaceCubemap = 0; + deviceProps.maxSurfaceCubemapLayered[0] = 0; + deviceProps.maxSurfaceCubemapLayered[1] = 0; + // Texture gather ops + deviceProps.maxTexture2DGather[0] = 0; + deviceProps.maxTexture2DGather[1] = 0; + // Textures bound to pitch memory + deviceProps.maxTexture2DLinear[0] = 0; + deviceProps.maxTexture2DLinear[1] = 0; + deviceProps.maxTexture2DLinear[2] = 0; + // Alternate 3D texture + deviceProps.maxTexture3DAlt[0] = 0; + deviceProps.maxTexture3DAlt[1] = 0; + deviceProps.maxTexture3DAlt[2] = 0; + // access policy + deviceProps.accessPolicyMaxWindowSize = 0; + // cluster launch + deviceProps.clusterLaunch = 0; + // Mapping HIP array + deviceProps.deferredMappingHipArraySupported = 0; + // RDMA options + deviceProps.gpuDirectRDMASupported = 0; + deviceProps.gpuDirectRDMAFlushWritesOptions = 0; + deviceProps.gpuDirectRDMAWritesOrdering = 0; + // luid - TODO populate it only on windows + std::memset(deviceProps.luid, 0, sizeof(deviceProps.luid)); + deviceProps.luidDeviceNodeMask = 0; + deviceProps.sparseHipArraySupported = 0; + deviceProps.timelineSemaphoreInteropSupported = 0; + deviceProps.unifiedFunctionPointers = 0; + + *props = deviceProps; + return hipSuccess; +} + +hipError_t hipGetDevicePropertiesR0600(hipDeviceProp_tR0600* prop, int device) { + HIP_INIT_API(hipGetDevicePropertiesR0600, prop, device); + + HIP_RETURN(ihipGetDeviceProperties(prop, device)); +} + +hipError_t hipGetDevicePropertiesR0000(hipDeviceProp_tR0000* prop, int device) { + HIP_INIT_API(hipGetDevicePropertiesR0000, prop, device); + + if (prop == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + if (unsigned(device) >= g_devices.size()) { + HIP_RETURN(hipErrorInvalidDevice); + } + auto* deviceHandle = g_devices[device]->devices()[0]; + + constexpr auto int32_max = static_cast(std::numeric_limits::max()); + constexpr auto uint16_max = static_cast(std::numeric_limits::max()) + 1; + hipDeviceProp_tR0000 deviceProps = {0}; const auto& info = deviceHandle->info(); const auto& isa = deviceHandle->isa(); @@ -348,8 +540,9 @@ hipError_t ihipGetDeviceProperties(hipDeviceProp_t* props, hipDevice_t device) { deviceProps.cooperativeMultiDeviceUnmatchedBlockDim = info.cooperativeMultiDeviceGroups_; deviceProps.cooperativeMultiDeviceUnmatchedSharedMem = info.cooperativeMultiDeviceGroups_; - deviceProps.maxTexture1DLinear = std::min(16 * info.imageMaxBufferSize_, int32_max); // Max pixel size is 16 bytes - deviceProps.maxTexture1D = std::min(info.image1DMaxWidth_, int32_max); + deviceProps.maxTexture1DLinear = + std::min(16 * info.imageMaxBufferSize_, int32_max); // Max pixel size is 16 bytes + deviceProps.maxTexture1D = std::min(info.image1DMaxWidth_, int32_max); deviceProps.maxTexture2D[0] = std::min(info.image2DMaxWidth_, int32_max); deviceProps.maxTexture2D[1] = std::min(info.image2DMaxHeight_, int32_max); deviceProps.maxTexture3D[0] = std::min(info.image3DMaxWidth_, int32_max); @@ -373,12 +566,11 @@ hipError_t ihipGetDeviceProperties(hipDeviceProp_t* props, hipDevice_t device) { deviceProps.pageableMemoryAccess = info.hmmCpuMemoryAccessible_; deviceProps.pageableMemoryAccessUsesHostPageTables = info.hostUnifiedMemory_; - *props = deviceProps; - return hipSuccess; + *prop = deviceProps; + HIP_RETURN(hipSuccess); } -hipError_t hipGetDeviceProperties(hipDeviceProp_t* props, hipDevice_t device) { - HIP_INIT_API(hipGetDeviceProperties, props, device); - - HIP_RETURN(ihipGetDeviceProperties(props, device)); -} +extern "C" hipError_t hipGetDeviceProperties(hipDeviceProp_tR0000* props, hipDevice_t device); +hipError_t hipGetDeviceProperties(hipDeviceProp_tR0000* props, hipDevice_t device) { + return hipGetDevicePropertiesR0000(props, device); +} \ No newline at end of file diff --git a/hipamd/src/hip_device_runtime.cpp b/hipamd/src/hip_device_runtime.cpp index f6ce98ad4..1920c831a 100644 --- a/hipamd/src/hip_device_runtime.cpp +++ b/hipamd/src/hip_device_runtime.cpp @@ -22,122 +22,144 @@ #include "hip_internal.hpp" -hipError_t hipChooseDevice(int* device, const hipDeviceProp_t* properties) { - - HIP_INIT_API(hipChooseDevice, device, properties); +#undef hipChooseDevice +#undef hipDeviceProp_t +template +hipError_t ihipChooseDevice(int* device, const DeviceProp* properties) { if (device == nullptr || properties == nullptr) { - HIP_RETURN(hipErrorInvalidValue); + return hipErrorInvalidValue; } *device = 0; cl_uint maxMatchedCount = 0; int count = 0; - HIP_RETURN_ONFAIL(ihipDeviceGetCount(&count)); + IHIP_RETURN_ONFAIL(ihipDeviceGetCount(&count)); - for (cl_int i = 0; i< count; ++i) { - hipDeviceProp_t currentProp = {0}; + for (cl_int i = 0; i < count; ++i) { + DeviceProp currentProp = {0}; cl_uint validPropCount = 0; cl_uint matchedCount = 0; - hipError_t err = ihipGetDeviceProperties(¤tProp, i); + hipError_t err = hipSuccess; + + if constexpr (std::is_same_v){ + err = ihipGetDeviceProperties(¤tProp, i); + } + else { + err = hipGetDevicePropertiesR0000(¤tProp, i); + } + if (properties->major != 0) { validPropCount++; - if(currentProp.major >= properties->major) { + if (currentProp.major >= properties->major) { matchedCount++; } } if (properties->minor != 0) { validPropCount++; - if(currentProp.minor >= properties->minor) { + if (currentProp.minor >= properties->minor) { matchedCount++; } } - if(properties->totalGlobalMem != 0) { - validPropCount++; - if(currentProp.totalGlobalMem >= properties->totalGlobalMem) { - matchedCount++; - } + if (properties->totalGlobalMem != 0) { + validPropCount++; + if (currentProp.totalGlobalMem >= properties->totalGlobalMem) { + matchedCount++; + } } - if(properties->sharedMemPerBlock != 0) { - validPropCount++; - if(currentProp.sharedMemPerBlock >= properties->sharedMemPerBlock) { - matchedCount++; - } + if (properties->sharedMemPerBlock != 0) { + validPropCount++; + if (currentProp.sharedMemPerBlock >= properties->sharedMemPerBlock) { + matchedCount++; + } } - if(properties->maxThreadsPerBlock != 0) { - validPropCount++; - if(currentProp.maxThreadsPerBlock >= properties->maxThreadsPerBlock ) { - matchedCount++; - } + if (properties->maxThreadsPerBlock != 0) { + validPropCount++; + if (currentProp.maxThreadsPerBlock >= properties->maxThreadsPerBlock) { + matchedCount++; + } } - if(properties->totalConstMem != 0) { - validPropCount++; - if(currentProp.totalConstMem >= properties->totalConstMem ) { - matchedCount++; - } + if (properties->totalConstMem != 0) { + validPropCount++; + if (currentProp.totalConstMem >= properties->totalConstMem) { + matchedCount++; + } } - if(properties->multiProcessorCount != 0) { - validPropCount++; - if(currentProp.multiProcessorCount >= - properties->multiProcessorCount ) { - matchedCount++; - } + if (properties->multiProcessorCount != 0) { + validPropCount++; + if (currentProp.multiProcessorCount >= properties->multiProcessorCount) { + matchedCount++; + } } - if(properties->maxThreadsPerMultiProcessor != 0) { - validPropCount++; - if(currentProp.maxThreadsPerMultiProcessor >= - properties->maxThreadsPerMultiProcessor ) { - matchedCount++; - } + if (properties->maxThreadsPerMultiProcessor != 0) { + validPropCount++; + if (currentProp.maxThreadsPerMultiProcessor >= properties->maxThreadsPerMultiProcessor) { + matchedCount++; + } } - if(properties->memoryClockRate != 0) { - validPropCount++; - if(currentProp.memoryClockRate >= properties->memoryClockRate ) { - matchedCount++; - } + if (properties->memoryClockRate != 0) { + validPropCount++; + if (currentProp.memoryClockRate >= properties->memoryClockRate) { + matchedCount++; + } } - if(properties->memoryBusWidth != 0) { - validPropCount++; - if(currentProp.memoryBusWidth >= properties->memoryBusWidth ) { - matchedCount++; - } + if (properties->memoryBusWidth != 0) { + validPropCount++; + if (currentProp.memoryBusWidth >= properties->memoryBusWidth) { + matchedCount++; + } } - if(properties->l2CacheSize != 0) { - validPropCount++; - if(currentProp.l2CacheSize >= properties->l2CacheSize ) { - matchedCount++; - } + if (properties->l2CacheSize != 0) { + validPropCount++; + if (currentProp.l2CacheSize >= properties->l2CacheSize) { + matchedCount++; + } } - if(properties->regsPerBlock != 0) { - validPropCount++; - if(currentProp.regsPerBlock >= properties->regsPerBlock ) { - matchedCount++; - } + if (properties->regsPerBlock != 0) { + validPropCount++; + if (currentProp.regsPerBlock >= properties->regsPerBlock) { + matchedCount++; + } } - if(properties->maxSharedMemoryPerMultiProcessor != 0) { - validPropCount++; - if(currentProp.maxSharedMemoryPerMultiProcessor >= - properties->maxSharedMemoryPerMultiProcessor ) { - matchedCount++; - } + if (properties->maxSharedMemoryPerMultiProcessor != 0) { + validPropCount++; + if (currentProp.maxSharedMemoryPerMultiProcessor >= + properties->maxSharedMemoryPerMultiProcessor) { + matchedCount++; + } } - if(properties->warpSize != 0) { - validPropCount++; - if(currentProp.warpSize >= properties->warpSize ) { - matchedCount++; - } + if (properties->warpSize != 0) { + validPropCount++; + if (currentProp.warpSize >= properties->warpSize) { + matchedCount++; + } } - if(validPropCount == matchedCount) { + if (validPropCount == matchedCount) { *device = matchedCount > maxMatchedCount ? i : *device; maxMatchedCount = std::max(matchedCount, maxMatchedCount); } } + return hipSuccess; +} + +hipError_t hipChooseDeviceR0600(int* device, const hipDeviceProp_tR0600* properties) { + HIP_INIT_API(hipChooseDeviceR0600, device, properties); + HIP_RETURN(ihipChooseDevice(device, properties)); +} + +hipError_t hipChooseDeviceR0000(int* device, const hipDeviceProp_tR0000* properties) { + HIP_INIT_API(hipChooseDeviceR0000, device, properties); + HIP_RETURN(ihipChooseDevice(device, properties)); HIP_RETURN(hipSuccess); } -hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device) { +extern "C" hipError_t hipChooseDevice(int* device, const hipDeviceProp_tR0000* properties); +hipError_t hipChooseDevice(int* device, const hipDeviceProp_tR0000* properties) { + return hipChooseDeviceR0000(device, properties); +} +hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device) { HIP_INIT_API(hipDeviceGetAttribute, pi, attr, device); if (pi == nullptr) { @@ -151,214 +173,282 @@ hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device) HIP_RETURN(hipErrorInvalidDevice); } - //FIXME: should we cache the props, or just select from deviceHandle->info_? - hipDeviceProp_t prop = {0}; + // FIXME: should we cache the props, or just select from deviceHandle->info_? + hipDeviceProp_tR0600 prop = {0}; HIP_RETURN_ONFAIL(ihipGetDeviceProperties(&prop, device)); constexpr auto int32_max = static_cast(std::numeric_limits::max()); switch (attr) { - case hipDeviceAttributeMaxThreadsPerBlock: - *pi = prop.maxThreadsPerBlock; - break; - case hipDeviceAttributeMaxBlockDimX: - *pi = prop.maxThreadsDim[0]; - break; - case hipDeviceAttributeMaxBlockDimY: - *pi = prop.maxThreadsDim[1]; - break; - case hipDeviceAttributeMaxBlockDimZ: - *pi = prop.maxThreadsDim[2]; - break; - case hipDeviceAttributeMaxGridDimX: - *pi = prop.maxGridSize[0]; - break; - case hipDeviceAttributeMaxGridDimY: - *pi = prop.maxGridSize[1]; - break; - case hipDeviceAttributeMaxGridDimZ: - *pi = prop.maxGridSize[2]; - break; - case hipDeviceAttributeMaxSharedMemoryPerBlock: - *pi = prop.sharedMemPerBlock; - break; - case hipDeviceAttributeTotalConstantMemory: - // size_t to int casting - *pi = std::min(prop.totalConstMem, int32_max); - break; - case hipDeviceAttributeWarpSize: - *pi = prop.warpSize; - break; - case hipDeviceAttributeMaxRegistersPerBlock: - *pi = prop.regsPerBlock; - break; - case hipDeviceAttributeClockRate: - *pi = prop.clockRate; - break; - case hipDeviceAttributeWallClockRate: - *pi = g_devices[device]->devices()[0]->info().wallClockFrequency_; - break; - case hipDeviceAttributeMemoryClockRate: - *pi = prop.memoryClockRate; - break; - case hipDeviceAttributeMemoryBusWidth: - *pi = prop.memoryBusWidth; - break; - case hipDeviceAttributeMultiprocessorCount: - *pi = prop.multiProcessorCount; - break; - case hipDeviceAttributeComputeMode: - *pi = prop.computeMode; - break; - case hipDeviceAttributeL2CacheSize: - *pi = prop.l2CacheSize; - break; - case hipDeviceAttributeMaxThreadsPerMultiProcessor: - *pi = prop.maxThreadsPerMultiProcessor; - break; - case hipDeviceAttributeComputeCapabilityMajor: - *pi = prop.major; - break; - case hipDeviceAttributeComputeCapabilityMinor: - *pi = prop.minor; - break; - case hipDeviceAttributePciBusId: - *pi = prop.pciBusID; - break; - case hipDeviceAttributeConcurrentKernels: - *pi = prop.concurrentKernels; - break; - case hipDeviceAttributePciDeviceId: - *pi = prop.pciDeviceID; - break; - case hipDeviceAttributeMaxSharedMemoryPerMultiprocessor: - *pi = prop.maxSharedMemoryPerMultiProcessor; - break; - case hipDeviceAttributeIsMultiGpuBoard: - *pi = prop.isMultiGpuBoard; - break; - case hipDeviceAttributeCooperativeLaunch: - *pi = prop.cooperativeLaunch; - break; - case hipDeviceAttributeCooperativeMultiDeviceLaunch: - *pi = prop.cooperativeMultiDeviceLaunch; - break; - case hipDeviceAttributeIntegrated: - *pi = prop.integrated; - break; - case hipDeviceAttributeMaxTexture1DWidth: - *pi = prop.maxTexture1D; - break; - case hipDeviceAttributeMaxTexture2DWidth: - *pi = prop.maxTexture2D[0]; - break; - case hipDeviceAttributeMaxTexture2DHeight: - *pi = prop.maxTexture2D[1]; - break; - case hipDeviceAttributeMaxTexture3DWidth: - *pi = prop.maxTexture3D[0]; - break; - case hipDeviceAttributeMaxTexture3DHeight: - *pi = prop.maxTexture3D[1]; - break; - case hipDeviceAttributeMaxTexture3DDepth: - *pi = prop.maxTexture3D[2]; - break; - case hipDeviceAttributeHdpMemFlushCntl: - *reinterpret_cast(pi) = prop.hdpMemFlushCntl; - break; - case hipDeviceAttributeHdpRegFlushCntl: - *reinterpret_cast(pi) = prop.hdpRegFlushCntl; - break; - case hipDeviceAttributeMaxPitch: - // size_t to int casting - *pi = std::min(prop.memPitch, int32_max); - break; - case hipDeviceAttributeTextureAlignment: - *pi = prop.textureAlignment; - break; - case hipDeviceAttributeTexturePitchAlignment: - *pi = prop.texturePitchAlignment; - break; - case hipDeviceAttributeKernelExecTimeout: - *pi = prop.kernelExecTimeoutEnabled; - break; - case hipDeviceAttributeCanMapHostMemory: - *pi = prop.canMapHostMemory; - break; - case hipDeviceAttributeEccEnabled: - *pi = prop.ECCEnabled; - break; - case hipDeviceAttributeCooperativeMultiDeviceUnmatchedFunc: - *pi = prop.cooperativeMultiDeviceUnmatchedFunc; - break; - case hipDeviceAttributeCooperativeMultiDeviceUnmatchedGridDim: - *pi = prop.cooperativeMultiDeviceUnmatchedGridDim; - break; - case hipDeviceAttributeCooperativeMultiDeviceUnmatchedBlockDim: - *pi = prop.cooperativeMultiDeviceUnmatchedBlockDim; - break; - case hipDeviceAttributeCooperativeMultiDeviceUnmatchedSharedMem: - *pi = prop.cooperativeMultiDeviceUnmatchedSharedMem; - break; - case hipDeviceAttributeAsicRevision: - *pi = prop.asicRevision; - break; - case hipDeviceAttributeManagedMemory: - *pi = prop.managedMemory; - break; - case hipDeviceAttributeDirectManagedMemAccessFromHost: - *pi = prop.directManagedMemAccessFromHost; - break; - case hipDeviceAttributeConcurrentManagedAccess: - *pi = prop.concurrentManagedAccess; - break; - case hipDeviceAttributePageableMemoryAccess: - *pi = prop.pageableMemoryAccess; - break; - case hipDeviceAttributePageableMemoryAccessUsesHostPageTables: - *pi = prop.pageableMemoryAccessUsesHostPageTables; - break; - case hipDeviceAttributeIsLargeBar: - *pi = prop.isLargeBar; - break; - case hipDeviceAttributeUnifiedAddressing: - // HIP runtime always uses SVM for host memory allocations. - // Note: Host registered memory isn't covered by this feature - // and still requires hipMemHostGetDevicePointer() call - *pi = true; - break; - case hipDeviceAttributeCanUseStreamWaitValue: - // hipStreamWaitValue64() and hipStreamWaitValue32() support - *pi = g_devices[device]->devices()[0]->info().aqlBarrierValue_; - break; - case hipDeviceAttributeImageSupport: - *pi = static_cast(g_devices[device]->devices()[0]->info().imageSupport_); - break; - case hipDeviceAttributePhysicalMultiProcessorCount: - *pi = g_devices[device]->devices()[0]->info().maxPhysicalComputeUnits_; - break; - case hipDeviceAttributeFineGrainSupport: - *pi = static_cast(g_devices[device]->devices()[0]->isFineGrainSupported()); - break; - case hipDeviceAttributeMemoryPoolsSupported: - *pi = HIP_MEM_POOL_SUPPORT; - break; - case hipDeviceAttributeVirtualMemoryManagementSupported: - *pi = static_cast(g_devices[device]->devices()[0]->info().virtualMemoryManagement_); - break; - case hipDeviceAttributeHostRegisterSupported: - *pi = true; - break; - default: - HIP_RETURN(hipErrorInvalidValue); + case hipDeviceAttributeMaxThreadsPerBlock: + *pi = prop.maxThreadsPerBlock; + break; + case hipDeviceAttributeAsyncEngineCount: + *pi = prop.asyncEngineCount; + break; + case hipDeviceAttributeMaxBlockDimX: + *pi = prop.maxThreadsDim[0]; + break; + case hipDeviceAttributeMaxBlockDimY: + *pi = prop.maxThreadsDim[1]; + break; + case hipDeviceAttributeMaxBlockDimZ: + *pi = prop.maxThreadsDim[2]; + break; + case hipDeviceAttributeMaxGridDimX: + *pi = prop.maxGridSize[0]; + break; + case hipDeviceAttributeMaxGridDimY: + *pi = prop.maxGridSize[1]; + break; + case hipDeviceAttributeMaxGridDimZ: + *pi = prop.maxGridSize[2]; + break; + case hipDeviceAttributeMaxSurface1D: + *pi = prop.maxSurface1D; + break; + case hipDeviceAttributeMaxSharedMemoryPerBlock: + *pi = prop.sharedMemPerBlock; + break; + case hipDeviceAttributeSharedMemPerBlockOptin: + *pi = prop.sharedMemPerBlockOptin; + break; + case hipDeviceAttributeSharedMemPerMultiprocessor: + *pi = prop.sharedMemPerMultiprocessor; + break; + case hipDeviceAttributeStreamPrioritiesSupported: + *pi = prop.streamPrioritiesSupported; + break; + case hipDeviceAttributeSurfaceAlignment: + *pi = prop.surfaceAlignment; + break; + case hipDeviceAttributeTotalConstantMemory: + // size_t to int casting + *pi = std::min(prop.totalConstMem, int32_max); + break; + case hipDeviceAttributeTotalGlobalMem: + *pi = std::min(prop.totalGlobalMem, int32_max); + break; + case hipDeviceAttributeWarpSize: + *pi = prop.warpSize; + break; + case hipDeviceAttributeMaxRegistersPerBlock: + *pi = prop.regsPerBlock; + break; + case hipDeviceAttributeClockRate: + *pi = prop.clockRate; + break; + case hipDeviceAttributeWallClockRate: + *pi = g_devices[device]->devices()[0]->info().wallClockFrequency_; + break; + case hipDeviceAttributeMemoryClockRate: + *pi = prop.memoryClockRate; + break; + case hipDeviceAttributeMemoryBusWidth: + *pi = prop.memoryBusWidth; + break; + case hipDeviceAttributeMultiprocessorCount: + *pi = prop.multiProcessorCount; + break; + case hipDeviceAttributeComputeMode: + *pi = prop.computeMode; + break; + case hipDeviceAttributeComputePreemptionSupported: + *pi = prop.computePreemptionSupported; + break; + case hipDeviceAttributeL2CacheSize: + *pi = prop.l2CacheSize; + break; + case hipDeviceAttributeLocalL1CacheSupported: + *pi = prop.localL1CacheSupported; + break; + case hipDeviceAttributeLuidDeviceNodeMask: + *pi = prop.luidDeviceNodeMask; + break; + case hipDeviceAttributeMaxThreadsPerMultiProcessor: + *pi = prop.maxThreadsPerMultiProcessor; + break; + case hipDeviceAttributeComputeCapabilityMajor: + *pi = prop.major; + break; + case hipDeviceAttributeComputeCapabilityMinor: + *pi = prop.minor; + break; + case hipDeviceAttributeMultiGpuBoardGroupID: + *pi = prop.multiGpuBoardGroupID; + break; + case hipDeviceAttributePciBusId: + *pi = prop.pciBusID; + break; + case hipDeviceAttributeConcurrentKernels: + *pi = prop.concurrentKernels; + break; + case hipDeviceAttributePciDeviceId: + *pi = prop.pciDeviceID; + break; + case hipDeviceAttributePciDomainID: + *pi = prop.pciDomainID; + break; + case hipDeviceAttributePersistingL2CacheMaxSize: + *pi = prop.persistingL2CacheMaxSize; + break; + case hipDeviceAttributeMaxRegistersPerMultiprocessor: + *pi = prop.regsPerMultiprocessor; + break; + case hipDeviceAttributeReservedSharedMemPerBlock: + *pi = prop.reservedSharedMemPerBlock; + break; + case hipDeviceAttributeMaxSharedMemoryPerMultiprocessor: + *pi = prop.maxSharedMemoryPerMultiProcessor; + break; + case hipDeviceAttributeIsMultiGpuBoard: + *pi = prop.isMultiGpuBoard; + break; + case hipDeviceAttributeCooperativeLaunch: + *pi = prop.cooperativeLaunch; + break; + case hipDeviceAttributeHostRegisterSupported: + *pi = 1; // AMD GPUs allow you to register host memory regardless of the GPU + break; + case hipDeviceAttributeDeviceOverlap: + *pi = prop.asyncEngineCount > 0 ? 1 : 0; + break; + case hipDeviceAttributeCooperativeMultiDeviceLaunch: + *pi = prop.cooperativeMultiDeviceLaunch; + break; + case hipDeviceAttributeIntegrated: + *pi = prop.integrated; + break; + case hipDeviceAttributeMaxTexture1DWidth: + *pi = prop.maxTexture1D; + break; + case hipDeviceAttributeMaxTexture1DLinear: + *pi = prop.maxTexture1DLinear; + break; + case hipDeviceAttributeMaxTexture1DMipmap: + *pi = prop.maxTexture1DMipmap; + break; + case hipDeviceAttributeMaxTextureCubemap: + *pi = prop.maxTextureCubemap; + break; + case hipDeviceAttributeMaxTexture2DWidth: + *pi = prop.maxTexture2D[0]; + break; + case hipDeviceAttributeMaxTexture2DHeight: + *pi = prop.maxTexture2D[1]; + break; + case hipDeviceAttributeMaxTexture3DWidth: + *pi = prop.maxTexture3D[0]; + break; + case hipDeviceAttributeMaxTexture3DHeight: + *pi = prop.maxTexture3D[1]; + break; + case hipDeviceAttributeMaxTexture3DDepth: + *pi = prop.maxTexture3D[2]; + break; + case hipDeviceAttributeHdpMemFlushCntl: + *reinterpret_cast(pi) = prop.hdpMemFlushCntl; + break; + case hipDeviceAttributeHdpRegFlushCntl: + *reinterpret_cast(pi) = prop.hdpRegFlushCntl; + break; + case hipDeviceAttributeMaxPitch: + // size_t to int casting + *pi = std::min(prop.memPitch, int32_max); + break; + case hipDeviceAttributeTextureAlignment: + *pi = prop.textureAlignment; + break; + case hipDeviceAttributeTexturePitchAlignment: + *pi = prop.texturePitchAlignment; + break; + case hipDeviceAttributeKernelExecTimeout: + *pi = prop.kernelExecTimeoutEnabled; + break; + case hipDeviceAttributeCanMapHostMemory: + *pi = prop.canMapHostMemory; + break; + case hipDeviceAttributeCanUseHostPointerForRegisteredMem: + *pi = prop.canUseHostPointerForRegisteredMem; + break; + case hipDeviceAttributeEccEnabled: + *pi = prop.ECCEnabled; + break; + case hipDeviceAttributeCooperativeMultiDeviceUnmatchedFunc: + *pi = prop.cooperativeMultiDeviceUnmatchedFunc; + break; + case hipDeviceAttributeCooperativeMultiDeviceUnmatchedGridDim: + *pi = prop.cooperativeMultiDeviceUnmatchedGridDim; + break; + case hipDeviceAttributeCooperativeMultiDeviceUnmatchedBlockDim: + *pi = prop.cooperativeMultiDeviceUnmatchedBlockDim; + break; + case hipDeviceAttributeCooperativeMultiDeviceUnmatchedSharedMem: + *pi = prop.cooperativeMultiDeviceUnmatchedSharedMem; + break; + case hipDeviceAttributeAsicRevision: + *pi = prop.asicRevision; + break; + case hipDeviceAttributeManagedMemory: + *pi = prop.managedMemory; + break; + case hipDeviceAttributeMaxBlocksPerMultiProcessor: + *pi = prop.maxBlocksPerMultiProcessor; + break; + case hipDeviceAttributeDirectManagedMemAccessFromHost: + *pi = prop.directManagedMemAccessFromHost; + break; + case hipDeviceAttributeGlobalL1CacheSupported: + *pi = prop.globalL1CacheSupported; + break; + case hipDeviceAttributeHostNativeAtomicSupported: + *pi = prop.hostNativeAtomicSupported; + break; + case hipDeviceAttributeConcurrentManagedAccess: + *pi = prop.concurrentManagedAccess; + break; + case hipDeviceAttributePageableMemoryAccess: + *pi = prop.pageableMemoryAccess; + break; + case hipDeviceAttributePageableMemoryAccessUsesHostPageTables: + *pi = prop.pageableMemoryAccessUsesHostPageTables; + break; + case hipDeviceAttributeIsLargeBar: + *pi = prop.isLargeBar; + break; + case hipDeviceAttributeUnifiedAddressing: + // HIP runtime always uses SVM for host memory allocations. + // Note: Host registered memory isn't covered by this feature + // and still requires hipMemHostGetDevicePointer() call + *pi = true; + break; + case hipDeviceAttributeCanUseStreamWaitValue: + // hipStreamWaitValue64() and hipStreamWaitValue32() support + *pi = g_devices[device]->devices()[0]->info().aqlBarrierValue_; + break; + case hipDeviceAttributeImageSupport: + *pi = static_cast(g_devices[device]->devices()[0]->info().imageSupport_); + break; + case hipDeviceAttributePhysicalMultiProcessorCount: + *pi = g_devices[device]->devices()[0]->info().maxPhysicalComputeUnits_; + break; + case hipDeviceAttributeFineGrainSupport: + *pi = static_cast(g_devices[device]->devices()[0]->isFineGrainSupported()); + break; + case hipDeviceAttributeMemoryPoolsSupported: + *pi = HIP_MEM_POOL_SUPPORT; + break; + case hipDeviceAttributeVirtualMemoryManagementSupported: + *pi = static_cast(g_devices[device]->devices()[0]->info().virtualMemoryManagement_); + break; + default: + HIP_RETURN(hipErrorInvalidValue); } HIP_RETURN(hipSuccess); } -hipError_t hipDeviceGetByPCIBusId(int* device, const char*pciBusIdstr) { - +hipError_t hipDeviceGetByPCIBusId(int* device, const char* pciBusIdstr) { HIP_INIT_API(hipDeviceGetByPCIBusId, device, pciBusIdstr); if (device == nullptr || pciBusIdstr == nullptr) { @@ -369,19 +459,19 @@ hipError_t hipDeviceGetByPCIBusId(int* device, const char*pciBusIdstr) { int pciDeviceID = -1; int pciDomainID = -1; bool found = false; - if (sscanf (pciBusIdstr, "%04x:%02x:%02x", reinterpret_cast(&pciDomainID), - reinterpret_cast(&pciBusID), - reinterpret_cast(&pciDeviceID)) == 0x3) { + if (sscanf(pciBusIdstr, "%04x:%02x:%02x", reinterpret_cast(&pciDomainID), + reinterpret_cast(&pciBusID), + reinterpret_cast(&pciDeviceID)) == 0x3) { int count = 0; HIP_RETURN_ONFAIL(ihipDeviceGetCount(&count)); for (cl_int i = 0; i < count; i++) { hipDevice_t dev; - hipDeviceProp_t prop; + hipDeviceProp_tR0600 prop; HIP_RETURN_ONFAIL(ihipDeviceGet(&dev, i)); HIP_RETURN_ONFAIL(ihipGetDeviceProperties(&prop, dev)); - if ((pciBusID == prop.pciBusID) && (pciDomainID == prop.pciDomainID) - && (pciDeviceID == prop.pciDeviceID)) { + if ((pciBusID == prop.pciBusID) && (pciDomainID == prop.pciDomainID) && + (pciDeviceID == prop.pciDeviceID)) { *device = i; found = true; break; @@ -395,10 +485,10 @@ hipError_t hipDeviceGetByPCIBusId(int* device, const char*pciBusIdstr) { HIP_RETURN(hipSuccess); } -hipError_t hipDeviceGetCacheConfig ( hipFuncCache_t * cacheConfig ) { +hipError_t hipDeviceGetCacheConfig(hipFuncCache_t* cacheConfig) { HIP_INIT_API(hipDeviceGetCacheConfig, cacheConfig); - if(cacheConfig == nullptr) { + if (cacheConfig == nullptr) { HIP_RETURN(hipErrorInvalidValue); } @@ -407,8 +497,7 @@ hipError_t hipDeviceGetCacheConfig ( hipFuncCache_t * cacheConfig ) { HIP_RETURN(hipSuccess); } -hipError_t hipDeviceGetLimit ( size_t* pValue, hipLimit_t limit ) { - +hipError_t hipDeviceGetLimit(size_t* pValue, hipLimit_t limit) { HIP_INIT_API(hipDeviceGetLimit, pValue, limit); if (pValue == nullptr || limit >= hipLimitRange) { @@ -417,7 +506,7 @@ hipError_t hipDeviceGetLimit ( size_t* pValue, hipLimit_t limit ) { switch (limit) { case hipLimitMallocHeapSize: - hipDeviceProp_t prop; + hipDeviceProp_tR0600 prop; HIP_RETURN_ONFAIL(ihipGetDeviceProperties(&prop, ihipGetDevice())); *pValue = prop.totalGlobalMem; break; @@ -431,8 +520,7 @@ hipError_t hipDeviceGetLimit ( size_t* pValue, hipLimit_t limit ) { HIP_RETURN(hipSuccess); } -hipError_t hipDeviceGetPCIBusId ( char* pciBusId, int len, int device ) { - +hipError_t hipDeviceGetPCIBusId(char* pciBusId, int len, int device) { HIP_INIT_API(hipDeviceGetPCIBusId, (void*)pciBusId, len, device); int count; @@ -442,24 +530,19 @@ hipError_t hipDeviceGetPCIBusId ( char* pciBusId, int len, int device ) { HIP_RETURN(hipErrorInvalidDevice); } - //pciBusId should be large enough to store 13 characters including the NULL-terminator. + // pciBusId should be large enough to store 13 characters including the NULL-terminator. if (pciBusId == nullptr) { HIP_RETURN(hipErrorInvalidValue); } - hipDeviceProp_t prop; + hipDeviceProp_tR0600 prop; HIP_RETURN_ONFAIL(ihipGetDeviceProperties(&prop, device)); - auto* deviceHandle = g_devices[device]->devices()[0]; - snprintf (pciBusId, len, "%04x:%02x:%02x.%01x", - prop.pciDomainID, - prop.pciBusID, - prop.pciDeviceID, - deviceHandle->info().deviceTopology_.pcie.function); + snprintf(pciBusId, len, "%04x:%02x:%02x.0", prop.pciDomainID, prop.pciBusID, prop.pciDeviceID); HIP_RETURN(len <= 12 ? hipErrorInvalidValue : hipSuccess); } -hipError_t hipDeviceGetSharedMemConfig ( hipSharedMemConfig * pConfig ) { +hipError_t hipDeviceGetSharedMemConfig(hipSharedMemConfig* pConfig) { HIP_INIT_API(hipDeviceGetSharedMemConfig, pConfig); if (pConfig == nullptr) { HIP_RETURN(hipErrorInvalidValue); @@ -469,7 +552,7 @@ hipError_t hipDeviceGetSharedMemConfig ( hipSharedMemConfig * pConfig ) { HIP_RETURN(hipSuccess); } -hipError_t hipDeviceReset ( void ) { +hipError_t hipDeviceReset(void) { HIP_INIT_API(hipDeviceReset); hip::getCurrentDevice()->Reset(); @@ -477,7 +560,7 @@ hipError_t hipDeviceReset ( void ) { HIP_RETURN(hipSuccess); } -hipError_t hipDeviceSetCacheConfig ( hipFuncCache_t cacheConfig ) { +hipError_t hipDeviceSetCacheConfig(hipFuncCache_t cacheConfig) { HIP_INIT_API(hipDeviceSetCacheConfig, cacheConfig); // No way to set cache config yet. @@ -485,34 +568,33 @@ hipError_t hipDeviceSetCacheConfig ( hipFuncCache_t cacheConfig ) { HIP_RETURN(hipSuccess); } -hipError_t hipDeviceSetLimit ( hipLimit_t limit, size_t value ) { +hipError_t hipDeviceSetLimit(hipLimit_t limit, size_t value) { HIP_INIT_API(hipDeviceSetLimit, limit, value); if (limit >= hipLimitRange) { HIP_RETURN(hipErrorInvalidValue); } - switch(limit) { - case hipLimitStackSize : - // need to query device size and take action - if (!hip::getCurrentDevice()->devices()[0]->UpdateStackSize(value)) { - HIP_RETURN(hipErrorInvalidValue); - } - break; - case hipLimitMallocHeapSize: - if (!hip::getCurrentDevice()->devices()[0]->UpdateInitialHeapSize(value)) { - HIP_RETURN(hipErrorInvalidValue); - } - break; - default: - LogPrintfError("UnsupportedLimit = %d is passed", limit); - HIP_RETURN(hipErrorUnsupportedLimit); + switch (limit) { + case hipLimitStackSize: + // need to query device size and take action + if (!hip::getCurrentDevice()->devices()[0]->UpdateStackSize(value)) { + HIP_RETURN(hipErrorInvalidValue); + } + break; + case hipLimitMallocHeapSize: + if (!hip::getCurrentDevice()->devices()[0]->UpdateInitialHeapSize(value)) { + HIP_RETURN(hipErrorInvalidValue); + } + break; + default: + LogPrintfError("UnsupportedLimit = %d is passed", limit); + HIP_RETURN(hipErrorUnsupportedLimit); } HIP_RETURN(hipSuccess); } -hipError_t hipDeviceSetSharedMemConfig ( hipSharedMemConfig config ) { +hipError_t hipDeviceSetSharedMemConfig(hipSharedMemConfig config) { HIP_INIT_API(hipDeviceSetSharedMemConfig, config); - if (config != hipSharedMemBankSizeDefault && - config != hipSharedMemBankSizeFourByte && + if (config != hipSharedMemBankSizeDefault && config != hipSharedMemBankSizeFourByte && config != hipSharedMemBankSizeEightByte) { HIP_RETURN(hipErrorInvalidValue); } @@ -530,13 +612,13 @@ hipError_t hipDeviceSynchronize() { int ihipGetDevice() { hip::Device* device = hip::getCurrentDevice(); - if(device == nullptr){ + if (device == nullptr) { return -1; } return device->deviceId(); } -hipError_t hipGetDevice ( int* deviceId ) { +hipError_t hipGetDevice(int* deviceId) { HIP_INIT_API(hipGetDevice, deviceId); if (deviceId != nullptr) { @@ -551,13 +633,13 @@ hipError_t hipGetDevice ( int* deviceId ) { } } -hipError_t hipGetDeviceCount ( int* count ) { +hipError_t hipGetDeviceCount(int* count) { HIP_INIT_API_NO_RETURN(hipGetDeviceCount, count); HIP_RETURN(ihipDeviceGetCount(count)); } -hipError_t hipGetDeviceFlags ( unsigned int* flags ) { +hipError_t hipGetDeviceFlags(unsigned int* flags) { HIP_INIT_API(hipGetDeviceFlags, flags); if (flags == nullptr) { HIP_RETURN(hipErrorInvalidValue); @@ -566,7 +648,7 @@ hipError_t hipGetDeviceFlags ( unsigned int* flags ) { HIP_RETURN(hipSuccess); } -hipError_t hipSetDevice ( int device ) { +hipError_t hipSetDevice(int device) { HIP_INIT_API_NO_RETURN(hipSetDevice, device); if (static_cast(device) < g_devices.size()) { hip::setCurrentDevice(device); @@ -578,7 +660,7 @@ hipError_t hipSetDevice ( int device ) { HIP_RETURN(hipErrorInvalidDevice); } -hipError_t hipSetDeviceFlags ( unsigned int flags ) { +hipError_t hipSetDeviceFlags(unsigned int flags) { HIP_INIT_API(hipSetDeviceFlags, flags); if (g_devices.empty()) { HIP_RETURN(hipErrorNoDevice); @@ -590,9 +672,10 @@ hipError_t hipSetDeviceFlags ( unsigned int flags ) { // Only one scheduling flag allowed a time uint32_t scheduleFlag = flags & hipDeviceScheduleMask; - if (((scheduleFlag & mutualExclusiveFlags) != hipDeviceScheduleSpin) && ((scheduleFlag & mutualExclusiveFlags) != hipDeviceScheduleYield) - && ((scheduleFlag & mutualExclusiveFlags) != hipDeviceScheduleBlockingSync) - && ((scheduleFlag & mutualExclusiveFlags) != hipDeviceScheduleAuto)) { + if (((scheduleFlag & mutualExclusiveFlags) != hipDeviceScheduleSpin) && + ((scheduleFlag & mutualExclusiveFlags) != hipDeviceScheduleYield) && + ((scheduleFlag & mutualExclusiveFlags) != hipDeviceScheduleBlockingSync) && + ((scheduleFlag & mutualExclusiveFlags) != hipDeviceScheduleAuto)) { HIP_RETURN(hipErrorInvalidValue); } @@ -625,7 +708,7 @@ hipError_t hipSetDeviceFlags ( unsigned int flags ) { HIP_RETURN(hipSuccess); } -hipError_t hipSetValidDevices ( int* device_arr, int len ) { +hipError_t hipSetValidDevices(int* device_arr, int len) { HIP_INIT_API(hipSetValidDevices, device_arr, len); assert(0 && "Unimplemented"); diff --git a/hipamd/src/hip_gl.cpp b/hipamd/src/hip_gl.cpp index 509408581..22e961fbf 100644 --- a/hipamd/src/hip_gl.cpp +++ b/hipamd/src/hip_gl.cpp @@ -20,6 +20,7 @@ #include "top.hpp" #include "hip/hip_runtime.h" +#include "hip/hip_gl_interop.h" #include "hip_internal.hpp" #include "platform/interop_gl.hpp" #include "cl_common.hpp" diff --git a/hipamd/src/hip_hcc.map.in b/hipamd/src/hip_hcc.map.in index 5674d3b37..7493b3295 100644 --- a/hipamd/src/hip_hcc.map.in +++ b/hipamd/src/hip_hcc.map.in @@ -1,6 +1,7 @@ hip_4.2 { global: hipChooseDevice; + hipChooseDeviceR0000; hipCtxCreate; hipCtxDestroy; hipCtxDisablePeerAccess; @@ -62,6 +63,7 @@ global: hipGetDevice; hipGetDeviceCount; hipGetDeviceProperties; + hipGetDevicePropertiesR0000; hipGetErrorName; hipGetErrorString; hipGetLastError; @@ -530,6 +532,8 @@ local: hip_6.0 { global: hipExtGetLastError; + hipChooseDeviceR0600; + hipGetDevicePropertiesR0600; local: *; } hip_5.6; diff --git a/hipamd/src/hip_peer.cpp b/hipamd/src/hip_peer.cpp index 17dc65da0..cadb7c103 100644 --- a/hipamd/src/hip_peer.cpp +++ b/hipamd/src/hip_peer.cpp @@ -151,7 +151,7 @@ hipError_t hipDeviceGetP2PAttribute(int* value, hipDeviceP2PAttr attr, // Linear layout access is supported if P2P is enabled // Opaque Images are supported only on homogeneous systems // Might have more conditions to check, in future. - if (srcDeviceProp.gcnArch == dstDeviceProp.gcnArch) { + if (std::string(srcDeviceProp.gcnArchName) == std::string(dstDeviceProp.gcnArchName)) { HIP_RETURN_ONFAIL(canAccessPeer(value, srcDevice, dstDevice)); } else { *value = 0; diff --git a/hipamd/src/hip_prof_gen.py b/hipamd/src/hip_prof_gen.py index 6dc524760..492ae4610 100755 --- a/hipamd/src/hip_prof_gen.py +++ b/hipamd/src/hip_prof_gen.py @@ -393,6 +393,10 @@ def generate_prof_header(f, api_map, callback_ids, opts_map): f.write('#define _HIP_PROF_STR_H\n'); f.write('#define HIP_PROF_VER 1\n') + f.write('\n#include \n') + f.write('#include \n') + f.write('#include "amd_hip_gl_interop.h"\n') + # Check for non-public API for name in sorted(opts_map.keys()): if not name in api_map: @@ -402,6 +406,9 @@ def generate_prof_header(f, api_map, callback_ids, opts_map): priv_lst.append(name) message("Private: " + name) + f.write('\n#define HIP_API_ID_CONCAT_HELPER(a,b) a##b\n'); + f.write('#define HIP_API_ID_CONCAT(a,b) HIP_API_ID_CONCAT_HELPER(a,b)\n'); + # Generating the callbacks ID enumaration f.write('\n// HIP API callbacks ID enumeration\n') f.write('enum hip_api_id_t {\n') @@ -410,6 +417,7 @@ def generate_prof_header(f, api_map, callback_ids, opts_map): cb_id_map = {} last_cb_id = 0 + versioned_functions = set() for name, cb_id in callback_ids: if not name in api_map: f.write(' HIP_API_ID_RESERVED_' + str(cb_id) + ' = ' + str(cb_id) + ',\n') @@ -417,18 +425,30 @@ def generate_prof_header(f, api_map, callback_ids, opts_map): f.write(' HIP_API_ID_' + name + ' = ' + str(cb_id) + ',\n') cb_id_map[name] = cb_id if cb_id > last_cb_id: last_cb_id = cb_id + m = re.match(r'(.*)R[0-9][0-9][0-9][0-9]$', name) + if m: versioned_functions.add(m.group(1)) for name in sorted(api_map.keys()): if not name in cb_id_map: last_cb_id += 1 f.write(' HIP_API_ID_' + name + ' = ' + str(last_cb_id) + ',\n') + m = re.match(r'(.*)R[0-9][0-9][0-9][0-9]$', name) + if m: versioned_functions.add(m.group(1)) f.write(' HIP_API_ID_LAST = ' + str(last_cb_id) + ',\n') f.write('\n') + + for name in sorted(versioned_functions): + f.write(' HIP_API_ID_' + name + ' = ' + 'HIP_API_ID_CONCAT(HIP_API_ID_,' + name + '),\n') + f.write('\n') + for name in sorted(priv_lst): f.write(' HIP_API_ID_' + name + ' = HIP_API_ID_NONE,\n') f.write('};\n') + f.write('\n#undef HIP_API_ID_CONCAT_HELPER\n'); + f.write('#undef HIP_API_ID_CONCAT\n'); + # Generating the method to return API name by ID f.write('\n// Return the HIP API string for a given callback ID\n') f.write('static inline const char* hip_api_name(const uint32_t id) {\n') @@ -658,7 +678,6 @@ def generate_prof_header(f, api_map, callback_ids, opts_map): 'hipGetErrorString': '', 'hipMallocHost': '', 'hipModuleLoadDataEx': '', - 'hipGetDeviceProperties': '', 'hipConfigureCall': '', 'hipHccModuleLaunchKernel': '', 'hipExtModuleLaunchKernel': '', @@ -688,6 +707,8 @@ def generate_prof_header(f, api_map, callback_ids, opts_map): continue if value['name'] == 'HIP_API_ID_LAST': break + if type(value['value']) == str: + continue m = re.match(r'HIP_API_ID_(\S*)', value['name']) if m: api_callback_ids.append((m.group(1), value['value'])) diff --git a/hipamd/src/hiprtc/hiprtcInternal.cpp b/hipamd/src/hiprtc/hiprtcInternal.cpp index d423d78fc..446379184 100644 --- a/hipamd/src/hiprtc/hiprtcInternal.cpp +++ b/hipamd/src/hiprtc/hiprtcInternal.cpp @@ -76,7 +76,12 @@ bool RTCProgram::findIsa() { } void* sym_hipGetDevice = amd::Os::getSymbol(handle, "hipGetDevice"); - void* sym_hipGetDeviceProperties = amd::Os::getSymbol(handle, "hipGetDeviceProperties"); + void* sym_hipGetDeviceProperties = + amd::Os::getSymbol(handle, "hipGetDevicePropertiesR0600"); // Try to find the new symbol + if (sym_hipGetDeviceProperties == nullptr) { + sym_hipGetDeviceProperties = + amd::Os::getSymbol(handle, "hipGetDeviceProperties"); // Fall back to old one + } if (sym_hipGetDevice == nullptr || sym_hipGetDeviceProperties == nullptr) { LogInfo("ISA cannot be found to dlsym failure"); diff --git a/rocclr/device/device.hpp b/rocclr/device/device.hpp index dc147aa77..a033fcd7e 100644 --- a/rocclr/device/device.hpp +++ b/rocclr/device/device.hpp @@ -344,6 +344,9 @@ struct Info : public amd::EmbeddedObject { //! Max width of 2D image in pixels. size_t image2DMaxWidth_; + //! Max width of 2DA image in pixels. + size_t image2DAMaxWidth_[2]; + //! Max height of 2D image in pixels. size_t image2DMaxHeight_; @@ -480,6 +483,9 @@ struct Info : public amd::EmbeddedObject { //! Returns max number of pixels for a 1D image size_t image1DMaxWidth_; + //! Returns max number of pixels for a 1DA image + size_t image1DAMaxWidth_; + //! Returns max number of pixels for a 1D image created from a buffer object size_t imageMaxBufferSize_; @@ -1526,7 +1532,6 @@ class Isa { uint32_t memChannelBankWidth_; //!< Memory channel bank width. uint32_t localMemSizePerCU_; //!< Local memory size per CU. uint32_t localMemBanks_; //!< Number of banks of local memory. - }; // class Isa /*! \addtogroup Runtime diff --git a/rocclr/device/pal/paldevice.cpp b/rocclr/device/pal/paldevice.cpp index 5fd640bae..d47346c92 100644 --- a/rocclr/device/pal/paldevice.cpp +++ b/rocclr/device/pal/paldevice.cpp @@ -563,6 +563,9 @@ void NullDevice::fillDeviceInfo(const Pal::DeviceProperties& palProp, info_.maxMemAllocSize_ / kPixelRgbaSize); info_.image1DMaxWidth_ = maxTextureSize; info_.imageMaxArraySize_ = MaxImageArraySize; + info_.image2DAMaxWidth_[0] = MaxImageArraySize; + info_.image2DAMaxWidth_[1] = MaxImageArraySize; + info_.image1DAMaxWidth_ = maxTextureSize; info_.preferredInteropUserSync_ = true; info_.printfBufferSize_ = PrintfDbg::WorkitemDebugSize * info().maxWorkGroupSize_; diff --git a/rocclr/device/rocm/rocdevice.cpp b/rocclr/device/rocm/rocdevice.cpp index 2d17a71c8..58a683894 100644 --- a/rocclr/device/rocm/rocdevice.cpp +++ b/rocclr/device/rocm/rocdevice.cpp @@ -1521,6 +1521,27 @@ bool Device::populateOCLDeviceConstants() { info_.imageMaxArraySize_ = max_array_size; + uint32_t max_image1da_width = 0; + if (HSA_STATUS_SUCCESS != + hsa_agent_get_info(bkendDevice_, + static_cast(HSA_EXT_AGENT_INFO_IMAGE_1DA_MAX_ELEMENTS), + &max_image1da_width)) { + return false; + } + + info_.image1DAMaxWidth_ = max_image1da_width; + + uint32_t max_image2da_width[2] = {0, 0}; + if (HSA_STATUS_SUCCESS != + hsa_agent_get_info(bkendDevice_, + static_cast(HSA_EXT_AGENT_INFO_IMAGE_2DA_MAX_ELEMENTS), + &max_image2da_width)) { + return false; + } + + info_.image2DAMaxWidth_[0] = max_image2da_width[0]; + info_.image2DAMaxWidth_[1] = max_image2da_width[1]; + uint32_t max_image1d_width = 0; if (HSA_STATUS_SUCCESS != hsa_agent_get_info(bkendDevice_, From 0b5cd74c9d0369df78d08c01f18683510cc92ce9 Mon Sep 17 00:00:00 2001 From: Maneesh Gupta Date: Fri, 24 Nov 2023 06:14:54 +0000 Subject: [PATCH 21/27] SWDEV-433937 - Bundling libamdhip64.so.5 is optional Change-Id: I95d0d7aec802cc35bc754a70be5f41cb80c393a8 --- hipamd/packaging/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hipamd/packaging/CMakeLists.txt b/hipamd/packaging/CMakeLists.txt index a877364da..73ae1baad 100644 --- a/hipamd/packaging/CMakeLists.txt +++ b/hipamd/packaging/CMakeLists.txt @@ -42,7 +42,7 @@ set(CPACK_RPM_PACKAGE_LICENSE "MIT") #Begin binary files install if(HIP_PLATFORM STREQUAL "amd" ) if(BUILD_SHARED_LIBS) - install(PROGRAMS ${PROJECT_BINARY_DIR}/libamdhip64.so.5 DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT binary) + install(PROGRAMS ${PROJECT_BINARY_DIR}/libamdhip64.so.5 DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT binary OPTIONAL) install(PROGRAMS ${PROJECT_BINARY_DIR}/lib/libamdhip64.so DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT binary) install(PROGRAMS ${PROJECT_BINARY_DIR}/lib/libamdhip64.so.${HIP_LIB_VERSION_MAJOR} DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT binary) install(PROGRAMS ${PROJECT_BINARY_DIR}/lib/libamdhip64.so.${HIP_LIB_VERSION_STRING} DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT binary) From b1765c4fb1db2070e48899deaf070b88be11050c Mon Sep 17 00:00:00 2001 From: Jaydeep Patel Date: Wed, 18 Oct 2023 14:55:03 +0000 Subject: [PATCH 22/27] SWDEV-425568, SWDEV-433511 - Do cpu wait for device sync. Change-Id: Ia05e09bd10ab623b36c74037e9988e132120dd9f --- hipamd/src/hip_device_runtime.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hipamd/src/hip_device_runtime.cpp b/hipamd/src/hip_device_runtime.cpp index 1920c831a..7e7cc5388 100644 --- a/hipamd/src/hip_device_runtime.cpp +++ b/hipamd/src/hip_device_runtime.cpp @@ -605,8 +605,8 @@ hipError_t hipDeviceSetSharedMemConfig(hipSharedMemConfig config) { hipError_t hipDeviceSynchronize() { HIP_INIT_API(hipDeviceSynchronize); - constexpr bool kDontWaitForCpu = false; - hip::Stream::SyncAllStreams(hip::getCurrentDevice()->deviceId(), kDontWaitForCpu); + constexpr bool kDoWaitForCpu = true; + hip::Stream::SyncAllStreams(hip::getCurrentDevice()->deviceId(), kDoWaitForCpu); HIP_RETURN(hipSuccess); } From c2bf552a92cf554ccf14034cb567a3391a291615 Mon Sep 17 00:00:00 2001 From: Jaydeep Patel Date: Tue, 10 Oct 2023 15:06:53 +0000 Subject: [PATCH 23/27] SWDEV-425449 - Move ptrdiff_t & clock_t to hiprtc.cmake. Change-Id: I7c8ce3c1b8cb11e0253b0f51ae10fca7f0a32017 (cherry picked from commit 7150b2eaad4f9b15c6d29d392c7d060c4f661190) --- hipamd/include/hip/amd_detail/amd_hip_runtime.h | 8 -------- hipamd/src/hiprtc/cmake/HIPRTC.cmake | 12 ++++++++++++ 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/hipamd/include/hip/amd_detail/amd_hip_runtime.h b/hipamd/include/hip/amd_detail/amd_hip_runtime.h index fd9dfddd4..f4a143798 100644 --- a/hipamd/include/hip/amd_detail/amd_hip_runtime.h +++ b/hipamd/include/hip/amd_detail/amd_hip_runtime.h @@ -93,19 +93,11 @@ typedef unsigned int uint32_t; typedef unsigned long long uint64_t; typedef signed int int32_t; typedef signed long long int64_t; -#if defined(__GLIBCXX__) -typedef long int ptrdiff_t; -#else -typedef long long ptrdiff_t; -#endif -typedef long clock_t; namespace std { using ::uint32_t; using ::uint64_t; using ::int32_t; using ::int64_t; -using ::ptrdiff_t; -using ::clock_t; } #endif // __HIP_NO_STD_DEFS__ #endif // !defined(__HIPCC_RTC__) diff --git a/hipamd/src/hiprtc/cmake/HIPRTC.cmake b/hipamd/src/hiprtc/cmake/HIPRTC.cmake index 53ba93073..ef9e559d1 100644 --- a/hipamd/src/hiprtc/cmake/HIPRTC.cmake +++ b/hipamd/src/hiprtc/cmake/HIPRTC.cmake @@ -59,6 +59,18 @@ function(get_hiprtc_macros HIPRTC_DEFINES) #define _HIP_BFLOAT16_H_\n\ #define HIP_INCLUDE_HIP_MATH_FUNCTIONS_H\n\ #define HIP_INCLUDE_HIP_HIP_VECTOR_TYPES_H\n\ +#if !__HIP_NO_STD_DEFS__\n\ +#if defined(__HIPRTC_PTRDIFF_T_IS_LONG_LONG__) && __HIPRTC_PTRDIFF_T_IS_LONG_LONG__==1\n\ +typedef long long ptrdiff_t;\n\ +#else\n\ +typedef __PTRDIFF_TYPE__ ptrdiff_t;\n\ +#endif\n\ +typedef long clock_t;\n\ +namespace std {\n\ +using ::ptrdiff_t;\n\ +using ::clock_t;\n\ +}\n\ +#endif // __HIP_NO_STD_DEFS__\n\ #pragma clang diagnostic pop" PARENT_SCOPE) endfunction(get_hiprtc_macros) From d62f6a1716e0315324fd32de6578eb183263505d Mon Sep 17 00:00:00 2001 From: Rahul Garg Date: Wed, 6 Dec 2023 23:46:45 +0000 Subject: [PATCH 24/27] SWDEV-422771 - Update Change Log Change-Id: I50e1ef00e6ecd23a6fb2e2eaaf298a479c5d3501 --- CHANGELOG.md | 85 +++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 84 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3ff03a9e7..d547f2586 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,14 +2,97 @@ Full documentation for HIP is available at [docs.amd.com](https://docs.amd.com/) -## (Unreleased) HIP 6.0 (For ROCm 6.0) +## HIP 6.0 (For ROCm 6.0) ### Optimizations ### Added +- Addition of hipExtGetLastError + - AMD backend specific API, to return error code from last HIP API called from the active host thread + +- New fields for external resource interoperability, + - Structs + - hipExternalMemoryHandleDesc_st + - hipExternalMemoryBufferDesc_st + - hipExternalSemaphoreHandleDesc_st + - hipExternalSemaphoreSignalParams_st + - hipExternalSemaphoreWaitParams_st + - Enumerations + - hipExternalMemoryHandleType_enum + - hipExternalSemaphoreHandleType_enum + - hipExternalMemoryHandleType_enum + +- New members are added in HIP struct hipDeviceProp_t, for new feature capabilities including, + - Texture + - int maxTexture1DMipmap; + - int maxTexture2DMipmap[2]; + - int maxTexture2DLinear[3]; + - int maxTexture2DGather[2]; + - int maxTexture3DAlt[3]; + - int maxTextureCubemap; + - int maxTexture1DLayered[2]; + - int maxTexture2DLayered[3]; + - int maxTextureCubemapLayered[2]; + - Surface + - int maxSurface1D; + - int maxSurface2D[2]; + - int maxSurface3D[3]; + - int maxSurface1DLayered[2]; + - int maxSurface2DLayered[3]; + - int maxSurfaceCubemap; + - int maxSurfaceCubemapLayered[2]; + - Device + - hipUUID uuid; + - char luid[8]; + -- this is 8-byte unique identifier. Only valid on windows + -- LUID (Locally Unique Identifier) is supported for interoperability between devices. + - unsigned int luidDeviceNodeMask; + Note: HIP supports LUID only on Windows OS. ### Changed +- Some OpenGL Interop HIP APIs are moved from the hip_runtime_api header to a new header file hip_gl_interop.h for the AMD platform, as following, + - hipGLGetDevices + - hipGraphicsGLRegisterBuffer + - hipGraphicsGLRegisterImage + +### Changes Impacting Backward Incompatibility +- Data types for members in HIP_MEMCPY3D structure are changed from "unsigned int" to "size_t". +- The value of the flag hipIpcMemLazyEnablePeerAccess is changed to “0x01”, which was previously defined as “0”. +- Some device property attributes are not currently support in HIP runtime, in order to maintain consistency, the following related enumeration names are changed in hipDeviceAttribute_t + - hipDeviceAttributeName is changed to hipDeviceAttributeUnused1 + - hipDeviceAttributeUuid is changed to hipDeviceAttributeUnused2 + - hipDeviceAttributeArch is changed to hipDeviceAttributeUnused3 + - hipDeviceAttributeGcnArch is changed to hipDeviceAttributeUnused4 + - hipDeviceAttributeGcnArchName is changed to hipDeviceAttributeUnused5 +- HIP struct hipArray is removed from driver type header to be comlpying with cuda +- hipArray_t replaces hipArray*, as the pointer to array. + - This allows hipMemcpyAtoH and hipMemcpyHtoA to have the correct array type which is equivalent to coresponding CUDA driver APIs. ### Fixed +- Kernel launch maximum dimension validation is added specifically on gridY and gridZ in the HIP API hipModule-LaunchKernel. As a result,when hipGetDeviceAttribute is called for the value of hipDeviceAttributeMaxGrid-Dim, the behavior on the AMD platform is equivalent to NVIDIA. +- The HIP stream synchronisation behaviour is changed in internal stream functions, in which a flag "wait" is added and set when the current stream is null pointer while executing stream synchronisation on other explicitly created streams. This change avoids blocking of execution on null/default stream. +The change won't affect usage of applications, and makes them behave the same on the AMD platform as NVIDIA. +- Error handling behavior on unsupported GPU is fixed, HIP runtime will log out error message, instead of creating signal abortion error which is invisible to developers but continued kernel execution process. This is for the case when developers compile any application via hipcc, setting the option --offload-arch with GPU ID which is different from the one on the system. + +### Deprecated And Removed +- Deprecated Heterogeneous Compute (HCC) symbols and flags are removed from the HIP source code, including, + - Build options on obsolete HCC_OPTIONS was removed from cmake. + - Micro definitions are removed. + HIP_INCLUDE_HIP_HCC_DETAIL_DRIVER_TYPES_H + HIP_INCLUDE_HIP_HCC_DETAIL_HOST_DEFINES_H + - Compilation flags for the platform definitions, + AMD platform, + __HIP_PLATFORM_HCC__ + __HCC__ + __HIP_ROCclr__ + NVIDIA platform, + __HIP_PLATFORM_NVCC__ +- File directories in the clr repository are removed, + https://github.com/ROCm-Developer-Tools/clr/blob/develop/hipamd/include/hip/hcc_detail + https://github.com/ROCm-Developer-Tools/clr/blob/develop/hipamd/include/hip/nvcc_detail +- Deprecated gcnArch is removed from hip device struct hipDeviceProp_t. +- Deprecated "enum hipMemoryType memoryType;" is removed from HIP struct hipPointerAttribute_t union. +- Deprecated HIT based tests are removed from HIP project + - Catch tests are available [hip-tests] (https://github.com/ROCm-Developer-Tools/hip-tests) project ### Known Issues From a75072ce3a1a9bb91e5a7e2cb4cdc945fbcb7915 Mon Sep 17 00:00:00 2001 From: Jatin Chaudhary Date: Wed, 20 Dec 2023 01:28:00 +0000 Subject: [PATCH 25/27] SWDEV-438181 - check stream associated with event ...before we dereference it. It might have been deleted. Change-Id: Ief832ee0907658a40ca42b9d78d19658153a05dd --- hipamd/src/hip_stream.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/hipamd/src/hip_stream.cpp b/hipamd/src/hip_stream.cpp index 7eab6d8d8..c72bd8ab1 100644 --- a/hipamd/src/hip_stream.cpp +++ b/hipamd/src/hip_stream.cpp @@ -519,8 +519,15 @@ hipError_t hipStreamWaitEvent_common(hipStream_t stream, hipEvent_t event, unsig } hip::Stream* waitStream = reinterpret_cast(stream); hip::Event* e = reinterpret_cast(event); - hip::Stream* eventStream = reinterpret_cast(e->GetCaptureStream()); + auto eventStreamHandle = reinterpret_cast(e->GetCaptureStream()); + // the stream associated with the device might have been destroyed + if (!hip::isValid(eventStreamHandle)) { + // Stream associated with the event has been released + // meaning the event has been completed and we can resume the current stream + return hipSuccess; + } + hip::Stream* eventStream = reinterpret_cast(eventStreamHandle); if (eventStream != nullptr && eventStream->IsEventCaptured(event) == true) { ClPrint(amd::LOG_INFO, amd::LOG_API, "[hipGraph] Current capture node StreamWaitEvent on stream : %p, Event %p", stream, From 204d35d16ef5c2c1ea1a4bb25442908a306c857a Mon Sep 17 00:00:00 2001 From: Satyanvesh Dittakavi Date: Thu, 7 Dec 2023 14:40:00 +0000 Subject: [PATCH 26/27] SWDEV-434170 /SWDEV-432684 /SWDEV-433437 - Fix test_gpu_jit MIGraphx test failure Add clang pragma push and pop diagnostics for ignoring "-Weverything" in the hiprtc builtins header. Otherwise this will ignore even the geniune errors occurring in the hiprtc kernels. Change-Id: I8c3dacf902732b2ea495d83e797369f8aebd75d6 (cherry picked from commit 912cc407a4e0d8363ff8c7230bb00d9aea8f44da) --- hipamd/src/hiprtc/cmake/HIPRTC.cmake | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/hipamd/src/hiprtc/cmake/HIPRTC.cmake b/hipamd/src/hiprtc/cmake/HIPRTC.cmake index ef9e559d1..02bd604e4 100644 --- a/hipamd/src/hiprtc/cmake/HIPRTC.cmake +++ b/hipamd/src/hiprtc/cmake/HIPRTC.cmake @@ -27,12 +27,7 @@ function(get_hiprtc_macros HIPRTC_DEFINES) set(${HIPRTC_DEFINES} -"#pragma clang diagnostic push\n\ -#pragma clang diagnostic ignored \"-Wreserved-id-macro\"\n\ -#pragma clang diagnostic ignored \"-Wc++98-compat-pedantic\"\n\ -#pragma clang diagnostic ignored \"-Wreserved-macro-identifier\"\n\ -#pragma clang diagnostic ignored \"-Wundef\"\n\ -#define __device__ __attribute__((device))\n\ +"#define __device__ __attribute__((device))\n\ #define __host__ __attribute__((host))\n\ #define __global__ __attribute__((global))\n\ #define __constant__ __attribute__((constant))\n\ @@ -70,8 +65,7 @@ namespace std {\n\ using ::ptrdiff_t;\n\ using ::clock_t;\n\ }\n\ -#endif // __HIP_NO_STD_DEFS__\n\ -#pragma clang diagnostic pop" +#endif // __HIP_NO_STD_DEFS__\n" PARENT_SCOPE) endfunction(get_hiprtc_macros) @@ -79,21 +73,27 @@ endfunction(get_hiprtc_macros) if(HIPRTC_ADD_MACROS) # Read the existing content of the preprocessed file into a temporary variable FILE(READ "${HIPRTC_PREPROCESSED_FILE}" ORIGINAL_PREPROCESSED_FILE) -# Prepend the pragma to the original content - set(MODIFIED_PREPROCESSED_FILE "#pragma clang diagnostic ignored \"-Weverything\" +# Prepend the push and ignore pragmas to the original preprocessed file + set(PRAGMA_PUSH "#pragma clang diagnostic push") + set(PRAGMA_EVERYTHING "#pragma clang diagnostic ignored \"-Weverything\"") + set(MODIFIED_PREPROCESSED_FILE "${PRAGMA_PUSH}\n${PRAGMA_EVERYTHING} \n${ORIGINAL_PREPROCESSED_FILE}") # Write the modified preprocessed content back to the original file FILE(WRITE ${HIPRTC_PREPROCESSED_FILE} "${MODIFIED_PREPROCESSED_FILE}") + message(STATUS "Appending hiprtc macros to ${HIPRTC_PREPROCESSED_FILE}.") get_hiprtc_macros(HIPRTC_DEFINES) FILE(APPEND ${HIPRTC_PREPROCESSED_FILE} "${HIPRTC_DEFINES}") set(HIPRTC_HEADER_LIST ${HIPRTC_HEADERS}) separate_arguments(HIPRTC_HEADER_LIST) -# appends all the headers from the list to the hiprtc preprocessed file +# Appends all the headers from the list to the hiprtc preprocessed file foreach(header ${HIPRTC_HEADER_LIST}) FILE(READ "${header}" HEADER_FILE) FILE(APPEND ${HIPRTC_PREPROCESSED_FILE} "${HEADER_FILE}") endforeach() +# Append the pop pragma to the preprocessed file + set(PRAGMA_POP "#pragma clang diagnostic pop\n") + FILE(APPEND ${HIPRTC_PREPROCESSED_FILE} "${PRAGMA_POP}") endif() macro(generate_hiprtc_header HiprtcHeader) From 782f954d94bffce3d99ffdfce8b841c35544fe5c Mon Sep 17 00:00:00 2001 From: Stella Laurenzo Date: Tue, 12 Mar 2024 16:43:38 -0700 Subject: [PATCH 27/27] Respect CMAKE_PLATFORM_NO_VERSIONED_SONAME when installing libraries. This is not the best fix because it is manually installing platform specific shared library names/symlinks. However, I don't wish to be on the hook for validating historical practices without a public CI. A better approach would be to install the CMake targets and let CMake do the right thing. --- hipamd/packaging/CMakeLists.txt | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/hipamd/packaging/CMakeLists.txt b/hipamd/packaging/CMakeLists.txt index 73ae1baad..7014ec2ea 100644 --- a/hipamd/packaging/CMakeLists.txt +++ b/hipamd/packaging/CMakeLists.txt @@ -42,16 +42,23 @@ set(CPACK_RPM_PACKAGE_LICENSE "MIT") #Begin binary files install if(HIP_PLATFORM STREQUAL "amd" ) if(BUILD_SHARED_LIBS) + # Note: This is not locally built. It is downloaded and included. install(PROGRAMS ${PROJECT_BINARY_DIR}/libamdhip64.so.5 DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT binary OPTIONAL) + + # Locally built libraries must respect the CMAKE_PLATFORM_NO_VERSIONED_SONAME + # setting. Note that the better way to do this would be to install the + # targets and let CMake distinghuish (would also work on Windows). install(PROGRAMS ${PROJECT_BINARY_DIR}/lib/libamdhip64.so DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT binary) - install(PROGRAMS ${PROJECT_BINARY_DIR}/lib/libamdhip64.so.${HIP_LIB_VERSION_MAJOR} DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT binary) - install(PROGRAMS ${PROJECT_BINARY_DIR}/lib/libamdhip64.so.${HIP_LIB_VERSION_STRING} DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT binary) install(PROGRAMS ${PROJECT_BINARY_DIR}/lib/libhiprtc.so DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT binary) - install(PROGRAMS ${PROJECT_BINARY_DIR}/lib/libhiprtc.so.${HIP_LIB_VERSION_MAJOR} DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT binary) - install(PROGRAMS ${PROJECT_BINARY_DIR}/lib/libhiprtc.so.${HIP_LIB_VERSION_STRING} DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT binary) install(PROGRAMS ${PROJECT_BINARY_DIR}/lib/libhiprtc-builtins.so DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT binary) - install(PROGRAMS ${PROJECT_BINARY_DIR}/lib/libhiprtc-builtins.so.${HIP_LIB_VERSION_MAJOR} DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT binary) - install(PROGRAMS ${PROJECT_BINARY_DIR}/lib/libhiprtc-builtins.so.${HIP_LIB_VERSION_STRING} DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT binary) + if(NOT CMAKE_PLATFORM_NO_VERSIONED_SONAME) + install(PROGRAMS ${PROJECT_BINARY_DIR}/lib/libhiprtc-builtins.so.${HIP_LIB_VERSION_MAJOR} DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT binary) + install(PROGRAMS ${PROJECT_BINARY_DIR}/lib/libhiprtc-builtins.so.${HIP_LIB_VERSION_STRING} DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT binary) + install(PROGRAMS ${PROJECT_BINARY_DIR}/lib/libhiprtc.so.${HIP_LIB_VERSION_MAJOR} DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT binary) + install(PROGRAMS ${PROJECT_BINARY_DIR}/lib/libhiprtc.so.${HIP_LIB_VERSION_STRING} DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT binary) + install(PROGRAMS ${PROJECT_BINARY_DIR}/lib/libamdhip64.so.${HIP_LIB_VERSION_MAJOR} DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT binary) + install(PROGRAMS ${PROJECT_BINARY_DIR}/lib/libamdhip64.so.${HIP_LIB_VERSION_STRING} DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT binary) + endif() # Add libraries to asan package install(DIRECTORY ${PROJECT_BINARY_DIR}/lib/ DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT asan PATTERN ".hipInfo" EXCLUDE)