-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathautotune_cpu_random_int8.cpu
35 lines (30 loc) · 1.24 KB
/
autotune_cpu_random_int8.cpu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
#!/bin/bash
A_dim=$1
B_dim=$2
C_dim=$3
infile=matrix_transposed.npy
BLOCK=1
python generate_test_matrix.py $A_dim $B_dim $C_dim 1 $BLOCK
biasfile=bias.npy
C_blocks=1
Gy=1
besttime=100000
for AT in $BLOCK; do
for B_blocks in 1; do
for CT in 4; do
python code_gen_cpu_int8_block.py --A_dim $A_dim --B_dim $B_dim --C_dim $C_dim --AT $AT --CT $CT --B_blocks $B_blocks --C_blocks $C_blocks --infile $infile --outfile testing.cpp --outfile_asm test1.s --x86 --infile_bias $biasfile
icc -march=icelake -fPIC -shared -g test1.s -o test.so
icc -I . -O3 -march=native -D AT=$AT -D CT=$1 -D C_Blocks=$C_blocks -DA_dim=$A_dim -DINFILE=$infile -D B_dim=$B_dim -D C_dim=$C_dim -D C_blocks=$C_blocks -D X86=1 -D MULTI=0 -D INT8=1 driver_cpu.cpp -lcnpy -o test -std=c++17
/home/ziheng/Downloads/sde-external-8.56.0-2020-07-05-lin/sde64 -- ./test #> runtime
python test_equivalence.py cpu_output.npy ref.npy
#runtime=$(grep "millisecond" runtime | awk '{print $3}')
#cat runtime
#if (( $(echo "$runtime < $besttime" | bc -l) )) ; then
# besttime=$runtime
# bestset=${AT}_${B_blocks}_${CT}
#fi
done
done
done
echo Best Runtime $besttime
echo $bestset