% ctype = dtype_to_c_type(data_type) % mul_str = mul.each_with_index.collect { |mul, index| “#{mul} * index_map_#{index}” } __kernel void split(const int N, __global const <%= ctype %> *A, __global <%= ctype %> *C) {

// Get the index of the current element to be processed
const int globalCol = get_global_id(0); // Col ID of C (0..N)
const int localCol = get_global_id(1);
// compute effective coordinates
int ptr = localCol;

<% dest.each_with_index do |div, index| %>

<% if index == axis %>
int index_map_<%= index %> = (int)floor(ptr / (float)<%= div %>) + globalCol * <%= step %>;
<% else %>
int index_map_<%= index %> = (int)floor(ptr / (float)<%= div %>);
<% end %>
<% if index < dest.size - 1%>ptr = ptr % <%= div %>;<% end %><% end %>
C[N*globalCol + localCol] =  A[<%= mul_str.join(" + ") %>];

}