Skip to content

Commit

Permalink
Merge pull request #1 from vloncar/dot_layer
Browse files Browse the repository at this point in the history
Oops sorry, got distracted when the big turkey came.
  • Loading branch information
DelonShen authored Nov 27, 2020
2 parents b7c70c7 + c909a6a commit 0bdc225
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 97 deletions.
23 changes: 5 additions & 18 deletions hls4ml/model/hls_layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -795,29 +795,16 @@ def initialize(self):
assert(len(self.inputs) == 2)
inp1 = self.get_input_variable(self.inputs[0])
inp2 = self.get_input_variable(self.inputs[1])

accum_t = HLSType(*reversed(self.model.config.get_precision(self, 'accum')))
self.precision[accum_t.name] = accum_t
self.set_attr('accum_t', accum_t.precision)

self.nzeros = -1
self.data_length = 2*np.prod(inp1.shape)
self.nonzeros = np.count_nonzero(inp1) + np.count_nonzero(inp2)
self.nzeros = self.data_length - self.nonzeros


shape = [1]
assert(inp1.shape == inp2.shape)
dims = ['OUT_DOT_{}'.format(1)]
self.add_output_variable(shape, dims)
if len(inp1.shape) > 1:
raise Exception('ERROR: Dot of tensors with rank > 1 is not yet supported.')

self.add_output_variable(shape=[1], dim_names=['OUT_DOT_{}'.format(self.index)])

def config_cpp(self):
params = self._default_config_params()
inp1 = self.get_input_variable(self.inputs[0])
inp2 = self.get_input_variable(self.inputs[1])
params['n_out'] = 1
params['n_in'] = inp1.shape[0]
params['nzeros'] = self.nzeros
params['n_in'] = self.get_input_variable(self.inputs[0]).shape[0]
return self._config_template.format(**params)

class Concatenate(Merge):
Expand Down
107 changes: 29 additions & 78 deletions hls4ml/templates/vivado/nnet_utils/nnet_merge.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
#define NNET_MERGE_H_

#include "nnet_common.h"
#include "nnet_helpers.h"
#include "nnet_dense.h"
#include "hls_stream.h"
#include <math.h>

Expand All @@ -32,12 +32,10 @@ struct merge_config
static const unsigned n_elem = 10;
};


struct dot_config {
static const unsigned n_in = 10;
static const unsigned n_out = 1;
static const unsigned reuse_factor = 1;
static const unsigned n_zeros = 0;
typedef float accum_t;
};

Expand All @@ -52,59 +50,6 @@ struct concat_config {
static const unsigned axis = -1;
};

template<class input1_T, class input2_T, class ret_T>
inline typename std::enable_if<std::is_same<input1_T, ap_uint<1>>::value
and std::is_same<input2_T, ap_uint<1>>::value, ap_uint<1>>::type
product(ap_uint<1> a, ap_uint<1> w){
// specialisation for 1-bit weights and incoming data
#pragma HLS inline off
return a == w;
}

template<class input1_T, class input2_T, class ret_T>
inline typename std::enable_if<(not std::is_same<input1_T, ap_uint<1>>::value)
and std::is_same<input2_T, ap_uint<1>>::value, ret_T>::type
product(input1_T a, ap_uint<1> w){
// Specialisation for 1-bit weights, arbitrary data
#pragma HLS inline off
return w == 0 ? (input1_T) -a : a;
}

template<class input1_T, class input2_T, class ret_T>
inline typename std::enable_if<(not std::is_same<input1_T, ap_uint<2>>::value)
and std::is_same<input2_T, ap_int<2>>::value, ret_T>::type
product(input1_T a, ap_int<2> w){
// Specialisation for 2-bit weights, arbitrary data
#pragma HLS inline off
if (w == 0) return (input1_T) 0;
else if(w == -1) return (input1_T) -a;
else return (input1_T) a; // if(w == 1)
}

template<class input1_T, class input2_T, class ret_T>
inline typename std::enable_if<(not std::is_same<input1_T, ap_uint<1>>::value)
and (not std::is_same<input2_T, ap_uint<1>>::value), ret_T>::type
product(input1_T a, input2_T w){
// 'Normal' product
#pragma HLS inline off
return a * w;
}

template<typename input1_T, typename input2_T, class res_T, typename CONFIG_T>
inline typename std::enable_if<std::is_same<input1_T, ap_uint<1>>::value
and std::is_same<input2_T, ap_uint<1>>::value, ap_int<nnet::ceillog2(CONFIG_T::n_in) + 2>>::type
cast(typename CONFIG_T::accum_t x){
return (ap_int<nnet::ceillog2(CONFIG_T::n_in) + 2>) (x - CONFIG_T::n_in / 2) * 2;
}

template<typename input1_T, typename input2_T, class res_T, typename CONFIG_T>
inline typename std::enable_if<(not std::is_same<input1_T, ap_uint<1>>::value)
or (not std::is_same<input2_T, ap_uint<1>>::value), res_T>::type
cast(typename CONFIG_T::accum_t x){
return (res_T) x;
}


template<class input1_T, class input2_T, class res_T, typename CONFIG_T>
void add(
input1_T data1[CONFIG_T::n_elem],
Expand Down Expand Up @@ -174,33 +119,39 @@ void minimum(

template<class input1_T, class input2_T, class res_T, typename CONFIG_T>
void dot1d(
input1_T data1[CONFIG_T::n_in],
input1_T data1[CONFIG_T::n_in],
input2_T data2[CONFIG_T::n_in],
res_T res[CONFIG_T::n_out])
{
#pragma HLS PIPELINE II=CONFIG_T::reuse_factor

constexpr unsigned multiplier_limit = DIV_ROUNDUP(CONFIG_T::n_in, CONFIG_T::reuse_factor);
#pragma HLS ALLOCATION instances=product limit=multiplier_limit function

typename CONFIG_T::accum_t mult[CONFIG_T::n_in];
typename CONFIG_T::accum_t acc[CONFIG_T::n_out];
Product: for(int ii=0; ii < CONFIG_T::n_in; ii++){
#pragma HLS PIPELINE
int multiplier_limit = ceil(float(CONFIG_T::n_in*CONFIG_T::n_out) / float(CONFIG_T::reuse_factor)) - floor(float(CONFIG_T::n_zeros) / float(CONFIG_T::reuse_factor));
#pragma HLS ALLOCATION instances=product limit=multiplier_limit function
mult[ii] = product<input1_T, input2_T, res_T>(data1[ii], data2[ii]);
}
#pragma HLS ARRAY_PARTITION variable=mult complete
typename CONFIG_T::accum_t acc = 0;

Accum: for(int ii = 0; ii < CONFIG_T::n_in; ii++){
#pragma HLS PIPELINE
acc[0] += mult[ii];
}
Result: for(int ires = 0; ires < CONFIG_T::n_out; ires++){
#pragma HLS_UNROLL
res[ires] = cast<input1_T, input2_T, res_T, CONFIG_T>(acc[ires]);
}
Product: for(int i_mult=0; i_mult < CONFIG_T::n_in; i_mult++) {
#pragma HLS UNROLL
mult[i_mult] = product<input1_T, input2_T, typename CONFIG_T::accum_t>(data1[i_mult], data2[i_mult]);
}

Accum: for(int i_acc = 0; i_acc < CONFIG_T::n_in; i_acc++) {
#pragma HLS UNROLL
acc += mult[i_acc];
}

Result: for(int i_res = 0; i_res < CONFIG_T::n_out; i_res++) {
#pragma HLS_UNROLL
res[i_res] = cast<input1_T, res_T, CONFIG_T>(acc);
}
}


template<class input1_T, class input2_T, class res_T, typename CONFIG_T>
void concatenate1d(
input1_T data1[CONFIG_T::n_elem1_0],
input1_T data1[CONFIG_T::n_elem1_0],
input2_T data2[CONFIG_T::n_elem2_0],
res_T res[CONFIG_T::n_elem1_0 + CONFIG_T::n_elem2_0])
{
Expand All @@ -214,7 +165,7 @@ void concatenate1d(

template<class input1_T, class input2_T, class res_T, typename CONFIG_T>
void concatenate2d_0(
input1_T data1[CONFIG_T::n_elem1_0 * CONFIG_T::n_elem1_1],
input1_T data1[CONFIG_T::n_elem1_0 * CONFIG_T::n_elem1_1],
input2_T data2[CONFIG_T::n_elem2_0 * CONFIG_T::n_elem2_1],
res_T res[CONFIG_T::n_elem1_0 * CONFIG_T::n_elem1_1 + CONFIG_T::n_elem2_0 * CONFIG_T::n_elem2_1])
{
Expand All @@ -228,7 +179,7 @@ void concatenate2d_0(

template<class input1_T, class input2_T, class res_T, typename CONFIG_T>
void concatenate2d_1(
input1_T data1[CONFIG_T::n_elem1_0 * CONFIG_T::n_elem1_1],
input1_T data1[CONFIG_T::n_elem1_0 * CONFIG_T::n_elem1_1],
input2_T data2[CONFIG_T::n_elem2_0 * CONFIG_T::n_elem2_1],
res_T res[CONFIG_T::n_elem1_0 * CONFIG_T::n_elem1_1 + CONFIG_T::n_elem2_0 * CONFIG_T::n_elem2_1])
{
Expand All @@ -244,7 +195,7 @@ void concatenate2d_1(

template<class input1_T, class input2_T, class res_T, typename CONFIG_T>
void concatenate2d(
input1_T data1[CONFIG_T::n_elem1_0 * CONFIG_T::n_elem1_1],
input1_T data1[CONFIG_T::n_elem1_0 * CONFIG_T::n_elem1_1],
input2_T data2[CONFIG_T::n_elem2_0 * CONFIG_T::n_elem2_1],
res_T res[CONFIG_T::n_elem1_0 * CONFIG_T::n_elem1_1 + CONFIG_T::n_elem2_0 * CONFIG_T::n_elem2_1])
{
Expand All @@ -271,7 +222,7 @@ input1_T data1[CONFIG_T::n_elem1_0 * CONFIG_T::n_elem1_1 * CONFIG_T::n_elem1_2],

template<class input1_T, class input2_T, class res_T, typename CONFIG_T>
void concatenate3d_1(
input1_T data1[CONFIG_T::n_elem1_0 * CONFIG_T::n_elem1_1 * CONFIG_T::n_elem1_2],
input1_T data1[CONFIG_T::n_elem1_0 * CONFIG_T::n_elem1_1 * CONFIG_T::n_elem1_2],
input2_T data2[CONFIG_T::n_elem2_0 * CONFIG_T::n_elem2_1 * CONFIG_T::n_elem2_2],
res_T res[CONFIG_T::n_elem1_0 * CONFIG_T::n_elem1_1 * CONFIG_T::n_elem1_2 + CONFIG_T::n_elem2_0 * CONFIG_T::n_elem2_1 * CONFIG_T::n_elem2_2])
{
Expand Down Expand Up @@ -303,7 +254,7 @@ input1_T data1[CONFIG_T::n_elem1_0 * CONFIG_T::n_elem1_1 * CONFIG_T::n_elem1_2],

template<class input1_T, class input2_T, class res_T, typename CONFIG_T>
void concatenate3d_2(
input1_T data1[CONFIG_T::n_elem1_0 * CONFIG_T::n_elem1_1 * CONFIG_T::n_elem1_2],
input1_T data1[CONFIG_T::n_elem1_0 * CONFIG_T::n_elem1_1 * CONFIG_T::n_elem1_2],
input2_T data2[CONFIG_T::n_elem2_0 * CONFIG_T::n_elem2_1 * CONFIG_T::n_elem2_2],
res_T res[CONFIG_T::n_elem1_0 * CONFIG_T::n_elem1_1 * CONFIG_T::n_elem1_2 + CONFIG_T::n_elem2_0 * CONFIG_T::n_elem2_1 * CONFIG_T::n_elem2_2])
{
Expand Down
1 change: 0 additions & 1 deletion hls4ml/templates/vivado_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,6 @@
static const unsigned n_in = {n_in};
static const unsigned n_out = {n_out};
static const unsigned reuse_factor = {reuse};
static const unsigned n_zeros = {nzeros};
typedef {accum_t} accum_t;
}};\n"""

Expand Down

0 comments on commit 0bdc225

Please sign in to comment.