% c_dtype = dtype_to_c_type(dtype)
__kernel void bias_add_<%= dtype %>(__global const <%= c_dtype %> *value, __constant const <%= c_dtype %> *bias, __global <%= c_dtype %> *output) {
const int id = get_global_id(0); for(int i = 0; i < <%= n %>; i++) { output[ <%= n %> * id + i] = value[ <%= n %> * id + i] + bias[i]; }
}