forked from vosen/ZLUDA
-
Notifications
You must be signed in to change notification settings - Fork 41
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Restore cublas argument. (injector) * Implement some Dark API functions (#41) * Implement some Dark API functions * Better error handling * Implement mul24.lo. * Implement mul24.hi. * Fix mul24.lo implementation. * Make mul24 tests more thorough. * Add ZLUDA_COMGR_LOG_LEVEL. * Bring back the minimal implementations of runtime API. (#45) * [Fix] Handle stream correctly. * WIP * Fix fatbin. * Revert. * wip * Remove redundant functions. * Bump version. --------- Co-authored-by: SEt <[email protected]>
- Loading branch information
1 parent
1c238a9
commit c0804ca
Showing
22 changed files
with
6,620 additions
and
64 deletions.
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -41,6 +41,7 @@ members = [ | |
"zluda_redirect", | ||
"zluda_rt", | ||
"zluda_rtc", | ||
"zluda_runtime", | ||
"zluda_sparse", | ||
] | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" | ||
target triple = "amdgcn-amd-amdhsa" | ||
|
||
declare i32 @__zluda_ptx_impl__mul24_hi_u32(i32, i32) #0 | ||
|
||
define protected amdgpu_kernel void @mul24_hi(ptr addrspace(4) byref(i64) %"18", ptr addrspace(4) byref(i64) %"19") #1 { | ||
%"8" = alloca i1, align 1, addrspace(5) | ||
%"4" = alloca i64, align 8, addrspace(5) | ||
%"5" = alloca i64, align 8, addrspace(5) | ||
%"6" = alloca i32, align 4, addrspace(5) | ||
%"7" = alloca i32, align 4, addrspace(5) | ||
br label %1 | ||
|
||
1: ; preds = %0 | ||
store i1 false, ptr addrspace(5) %"8", align 1 | ||
%"9" = load i64, ptr addrspace(4) %"18", align 8 | ||
store i64 %"9", ptr addrspace(5) %"4", align 8 | ||
%"10" = load i64, ptr addrspace(4) %"19", align 8 | ||
store i64 %"10", ptr addrspace(5) %"5", align 8 | ||
%"12" = load i64, ptr addrspace(5) %"4", align 8 | ||
%"20" = inttoptr i64 %"12" to ptr | ||
%"11" = load i32, ptr %"20", align 4 | ||
store i32 %"11", ptr addrspace(5) %"6", align 4 | ||
%"14" = load i32, ptr addrspace(5) %"6", align 4 | ||
%"13" = call i32 @__zluda_ptx_impl__mul24_hi_u32(i32 %"14", i32 9815513) | ||
store i32 %"13", ptr addrspace(5) %"7", align 4 | ||
%"15" = load i64, ptr addrspace(5) %"5", align 8 | ||
%"16" = load i32, ptr addrspace(5) %"7", align 4 | ||
%"21" = inttoptr i64 %"15" to ptr | ||
store i32 %"16", ptr %"21", align 4 | ||
ret void | ||
} | ||
|
||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } | ||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="ieee,ieee" "denormal-fp-math-f32"="ieee,ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" } |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
.version 6.5 | ||
.target sm_30 | ||
.address_size 64 | ||
|
||
.visible .entry mul24_hi( | ||
.param .u64 input, | ||
.param .u64 output | ||
) | ||
{ | ||
.reg .u64 in_addr; | ||
.reg .u64 out_addr; | ||
.reg .u32 temp; | ||
.reg .u32 temp2; | ||
|
||
ld.param.u64 in_addr, [input]; | ||
ld.param.u64 out_addr, [output]; | ||
|
||
ld.u32 temp, [in_addr]; | ||
mul24.hi.u32 temp2, temp, 9815513; | ||
st.u32 [out_addr], temp2; | ||
ret; | ||
} |
Oops, something went wrong.