Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

LLVM unit tests #324

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions ptx/src/test/ll/activemask.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
declare i32 @__zluda_ptx_impl_activemask() #0

declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0

declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0

declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0

declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0

declare i32 @__zluda_ptx_impl_sreg_clock() #0

declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0

define amdgpu_kernel void @activemask(ptr addrspace(4) byref(i64) %"33", ptr addrspace(4) byref(i64) %"34") #0 {
%"35" = alloca i64, align 8, addrspace(5)
%"36" = alloca i32, align 4, addrspace(5)
br label %1

1: ; preds = %0
%"37" = load i64, ptr addrspace(4) %"34", align 4
store i64 %"37", ptr addrspace(5) %"35", align 4
%"38" = call i32 @__zluda_ptx_impl_activemask()
store i32 %"38", ptr addrspace(5) %"36", align 4
%"39" = load i64, ptr addrspace(5) %"35", align 4
%"40" = load i32, ptr addrspace(5) %"36", align 4
%"41" = inttoptr i64 %"39" to ptr
store i32 %"40", ptr %"41", align 4
ret void
}

attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
39 changes: 39 additions & 0 deletions ptx/src/test/ll/add.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0

declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0

declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0

declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0

declare i32 @__zluda_ptx_impl_sreg_clock() #0

declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0

define amdgpu_kernel void @add(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #0 {
%"38" = alloca i64, align 8, addrspace(5)
%"39" = alloca i64, align 8, addrspace(5)
%"40" = alloca i64, align 8, addrspace(5)
%"41" = alloca i64, align 8, addrspace(5)
br label %1

1: ; preds = %0
%"42" = load i64, ptr addrspace(4) %"36", align 4
store i64 %"42", ptr addrspace(5) %"38", align 4
%"43" = load i64, ptr addrspace(4) %"37", align 4
store i64 %"43", ptr addrspace(5) %"39", align 4
%"45" = load i64, ptr addrspace(5) %"38", align 4
%"50" = inttoptr i64 %"45" to ptr
%"44" = load i64, ptr %"50", align 4
store i64 %"44", ptr addrspace(5) %"40", align 4
%"47" = load i64, ptr addrspace(5) %"40", align 4
%"46" = add i64 %"47", 1
store i64 %"46", ptr addrspace(5) %"41", align 4
%"48" = load i64, ptr addrspace(5) %"39", align 4
%"49" = load i64, ptr addrspace(5) %"41", align 4
%"51" = inttoptr i64 %"48" to ptr
store i64 %"49", ptr %"51", align 4
ret void
}

attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
39 changes: 39 additions & 0 deletions ptx/src/test/ll/add_non_coherent.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0

declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0

declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0

declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0

declare i32 @__zluda_ptx_impl_sreg_clock() #0

declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0

define amdgpu_kernel void @add_non_coherent(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #0 {
%"38" = alloca i64, align 8, addrspace(5)
%"39" = alloca i64, align 8, addrspace(5)
%"40" = alloca i64, align 8, addrspace(5)
%"41" = alloca i64, align 8, addrspace(5)
br label %1

1: ; preds = %0
%"42" = load i64, ptr addrspace(4) %"36", align 4
store i64 %"42", ptr addrspace(5) %"38", align 4
%"43" = load i64, ptr addrspace(4) %"37", align 4
store i64 %"43", ptr addrspace(5) %"39", align 4
%"45" = load i64, ptr addrspace(5) %"38", align 4
%"50" = inttoptr i64 %"45" to ptr addrspace(1)
%"44" = load i64, ptr addrspace(1) %"50", align 4
store i64 %"44", ptr addrspace(5) %"40", align 4
%"47" = load i64, ptr addrspace(5) %"40", align 4
%"46" = add i64 %"47", 1
store i64 %"46", ptr addrspace(5) %"41", align 4
%"48" = load i64, ptr addrspace(5) %"39", align 4
%"49" = load i64, ptr addrspace(5) %"41", align 4
%"51" = inttoptr i64 %"48" to ptr addrspace(1)
store i64 %"49", ptr addrspace(1) %"51", align 4
ret void
}

attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
39 changes: 39 additions & 0 deletions ptx/src/test/ll/add_tuning.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0

declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0

declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0

declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0

declare i32 @__zluda_ptx_impl_sreg_clock() #0

declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0

define amdgpu_kernel void @add_tuning(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #0 {
%"38" = alloca i64, align 8, addrspace(5)
%"39" = alloca i64, align 8, addrspace(5)
%"40" = alloca i64, align 8, addrspace(5)
%"41" = alloca i64, align 8, addrspace(5)
br label %1

1: ; preds = %0
%"42" = load i64, ptr addrspace(4) %"36", align 4
store i64 %"42", ptr addrspace(5) %"38", align 4
%"43" = load i64, ptr addrspace(4) %"37", align 4
store i64 %"43", ptr addrspace(5) %"39", align 4
%"45" = load i64, ptr addrspace(5) %"38", align 4
%"50" = inttoptr i64 %"45" to ptr
%"44" = load i64, ptr %"50", align 4
store i64 %"44", ptr addrspace(5) %"40", align 4
%"47" = load i64, ptr addrspace(5) %"40", align 4
%"46" = add i64 %"47", 1
store i64 %"46", ptr addrspace(5) %"41", align 4
%"48" = load i64, ptr addrspace(5) %"39", align 4
%"49" = load i64, ptr addrspace(5) %"41", align 4
%"51" = inttoptr i64 %"48" to ptr
store i64 %"49", ptr %"51", align 4
ret void
}

attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
45 changes: 45 additions & 0 deletions ptx/src/test/ll/and.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0

declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0

declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0

declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0

declare i32 @__zluda_ptx_impl_sreg_clock() #0

declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0

define amdgpu_kernel void @and(ptr addrspace(4) byref(i64) %"37", ptr addrspace(4) byref(i64) %"38") #0 {
%"39" = alloca i64, align 8, addrspace(5)
%"40" = alloca i64, align 8, addrspace(5)
%"41" = alloca i32, align 4, addrspace(5)
%"42" = alloca i32, align 4, addrspace(5)
br label %1

1: ; preds = %0
%"43" = load i64, ptr addrspace(4) %"37", align 4
store i64 %"43", ptr addrspace(5) %"39", align 4
%"44" = load i64, ptr addrspace(4) %"38", align 4
store i64 %"44", ptr addrspace(5) %"40", align 4
%"46" = load i64, ptr addrspace(5) %"39", align 4
%"54" = inttoptr i64 %"46" to ptr
%"45" = load i32, ptr %"54", align 4
store i32 %"45", ptr addrspace(5) %"41", align 4
%"47" = load i64, ptr addrspace(5) %"39", align 4
%"55" = inttoptr i64 %"47" to ptr
%"30" = getelementptr inbounds i8, ptr %"55", i64 4
%"48" = load i32, ptr %"30", align 4
store i32 %"48", ptr addrspace(5) %"42", align 4
%"50" = load i32, ptr addrspace(5) %"41", align 4
%"51" = load i32, ptr addrspace(5) %"42", align 4
%"56" = and i32 %"50", %"51"
store i32 %"56", ptr addrspace(5) %"41", align 4
%"52" = load i64, ptr addrspace(5) %"40", align 4
%"53" = load i32, ptr addrspace(5) %"41", align 4
%"59" = inttoptr i64 %"52" to ptr
store i32 %"53", ptr %"59", align 4
ret void
}

attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
55 changes: 55 additions & 0 deletions ptx/src/test/ll/atom_add.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
@shared_mem = external addrspace(3) global [1024 x i8], align 4

declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0

declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0

declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0

declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0

declare i32 @__zluda_ptx_impl_sreg_clock() #0

declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0

define amdgpu_kernel void @atom_add(ptr addrspace(4) byref(i64) %"40", ptr addrspace(4) byref(i64) %"41") #0 {
%"42" = alloca i64, align 8, addrspace(5)
%"43" = alloca i64, align 8, addrspace(5)
%"44" = alloca i32, align 4, addrspace(5)
%"45" = alloca i32, align 4, addrspace(5)
br label %1

1: ; preds = %0
%"46" = load i64, ptr addrspace(4) %"40", align 4
store i64 %"46", ptr addrspace(5) %"42", align 4
%"47" = load i64, ptr addrspace(4) %"41", align 4
store i64 %"47", ptr addrspace(5) %"43", align 4
%"49" = load i64, ptr addrspace(5) %"42", align 4
%"60" = inttoptr i64 %"49" to ptr
%"48" = load i32, ptr %"60", align 4
store i32 %"48", ptr addrspace(5) %"44", align 4
%"50" = load i64, ptr addrspace(5) %"42", align 4
%"61" = inttoptr i64 %"50" to ptr
%"31" = getelementptr inbounds i8, ptr %"61", i64 4
%"51" = load i32, ptr %"31", align 4
store i32 %"51", ptr addrspace(5) %"45", align 4
%"52" = load i32, ptr addrspace(5) %"44", align 4
store i32 %"52", ptr addrspace(3) @shared_mem, align 4
%"54" = load i32, ptr addrspace(5) %"45", align 4
%2 = atomicrmw add ptr addrspace(3) @shared_mem, i32 %"54" syncscope("agent-one-as") monotonic, align 4
store i32 %2, ptr addrspace(5) %"44", align 4
%"55" = load i32, ptr addrspace(3) @shared_mem, align 4
store i32 %"55", ptr addrspace(5) %"45", align 4
%"56" = load i64, ptr addrspace(5) %"43", align 4
%"57" = load i32, ptr addrspace(5) %"44", align 4
%"65" = inttoptr i64 %"56" to ptr
store i32 %"57", ptr %"65", align 4
%"58" = load i64, ptr addrspace(5) %"43", align 4
%"66" = inttoptr i64 %"58" to ptr
%"33" = getelementptr inbounds i8, ptr %"66", i64 4
%"59" = load i32, ptr addrspace(5) %"45", align 4
store i32 %"59", ptr %"33", align 4
ret void
}

attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
55 changes: 55 additions & 0 deletions ptx/src/test/ll/atom_add_float.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
@shared_mem = external addrspace(3) global [1024 x i8], align 4

declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0

declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0

declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0

declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0

declare i32 @__zluda_ptx_impl_sreg_clock() #0

declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0

define amdgpu_kernel void @atom_add_float(ptr addrspace(4) byref(i64) %"40", ptr addrspace(4) byref(i64) %"41") #0 {
%"42" = alloca i64, align 8, addrspace(5)
%"43" = alloca i64, align 8, addrspace(5)
%"44" = alloca float, align 4, addrspace(5)
%"45" = alloca float, align 4, addrspace(5)
br label %1

1: ; preds = %0
%"46" = load i64, ptr addrspace(4) %"40", align 4
store i64 %"46", ptr addrspace(5) %"42", align 4
%"47" = load i64, ptr addrspace(4) %"41", align 4
store i64 %"47", ptr addrspace(5) %"43", align 4
%"49" = load i64, ptr addrspace(5) %"42", align 4
%"60" = inttoptr i64 %"49" to ptr
%"48" = load float, ptr %"60", align 4
store float %"48", ptr addrspace(5) %"44", align 4
%"50" = load i64, ptr addrspace(5) %"42", align 4
%"61" = inttoptr i64 %"50" to ptr
%"31" = getelementptr inbounds i8, ptr %"61", i64 4
%"51" = load float, ptr %"31", align 4
store float %"51", ptr addrspace(5) %"45", align 4
%"52" = load float, ptr addrspace(5) %"44", align 4
store float %"52", ptr addrspace(3) @shared_mem, align 4
%"54" = load float, ptr addrspace(5) %"45", align 4
%2 = atomicrmw fadd ptr addrspace(3) @shared_mem, float %"54" syncscope("agent-one-as") monotonic, align 4
store float %2, ptr addrspace(5) %"44", align 4
%"55" = load float, ptr addrspace(3) @shared_mem, align 4
store float %"55", ptr addrspace(5) %"45", align 4
%"56" = load i64, ptr addrspace(5) %"43", align 4
%"57" = load float, ptr addrspace(5) %"44", align 4
%"65" = inttoptr i64 %"56" to ptr
store float %"57", ptr %"65", align 4
%"58" = load i64, ptr addrspace(5) %"43", align 4
%"66" = inttoptr i64 %"58" to ptr
%"33" = getelementptr inbounds i8, ptr %"66", i64 4
%"59" = load float, ptr addrspace(5) %"45", align 4
store float %"59", ptr %"33", align 4
ret void
}

attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
53 changes: 53 additions & 0 deletions ptx/src/test/ll/atom_cas.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0

declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0

declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0

declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0

declare i32 @__zluda_ptx_impl_sreg_clock() #0

declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0

define amdgpu_kernel void @atom_cas(ptr addrspace(4) byref(i64) %"42", ptr addrspace(4) byref(i64) %"43") #0 {
%"44" = alloca i64, align 8, addrspace(5)
%"45" = alloca i64, align 8, addrspace(5)
%"46" = alloca i32, align 4, addrspace(5)
%"47" = alloca i32, align 4, addrspace(5)
br label %1

1: ; preds = %0
%"48" = load i64, ptr addrspace(4) %"42", align 4
store i64 %"48", ptr addrspace(5) %"44", align 4
%"49" = load i64, ptr addrspace(4) %"43", align 4
store i64 %"49", ptr addrspace(5) %"45", align 4
%"51" = load i64, ptr addrspace(5) %"44", align 4
%"61" = inttoptr i64 %"51" to ptr
%"50" = load i32, ptr %"61", align 4
store i32 %"50", ptr addrspace(5) %"46", align 4
%"52" = load i64, ptr addrspace(5) %"44", align 4
%"62" = inttoptr i64 %"52" to ptr
%"30" = getelementptr inbounds i8, ptr %"62", i64 4
%"54" = load i32, ptr addrspace(5) %"46", align 4
%2 = cmpxchg ptr %"30", i32 %"54", i32 100 syncscope("agent-one-as") monotonic monotonic, align 4
%"63" = extractvalue { i32, i1 } %2, 0
store i32 %"63", ptr addrspace(5) %"46", align 4
%"55" = load i64, ptr addrspace(5) %"44", align 4
%"65" = inttoptr i64 %"55" to ptr
%"33" = getelementptr inbounds i8, ptr %"65", i64 4
%"56" = load i32, ptr %"33", align 4
store i32 %"56", ptr addrspace(5) %"47", align 4
%"57" = load i64, ptr addrspace(5) %"45", align 4
%"58" = load i32, ptr addrspace(5) %"46", align 4
%"66" = inttoptr i64 %"57" to ptr
store i32 %"58", ptr %"66", align 4
%"59" = load i64, ptr addrspace(5) %"45", align 4
%"67" = inttoptr i64 %"59" to ptr
%"35" = getelementptr inbounds i8, ptr %"67", i64 4
%"60" = load i32, ptr addrspace(5) %"47", align 4
store i32 %"60", ptr %"35", align 4
ret void
}

attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
Loading