class Ikra::Translator::CommandTranslator
Attributes
environment_builder[R]
kernel_launcher_stack[R]
object_tracer[R]
program_builder[R]
root_command[R]
Public Class Methods
new(root_command:)
click to toggle source
# File lib/translator/commands/command_translator.rb, line 56 def initialize(root_command:) @kernel_launcher_stack = [] @environment_builder = EnvironmentBuilder.new # Select correct program builder based on command type @program_builder = ProgramBuilder.new( environment_builder: environment_builder, root_command: root_command) @root_command = root_command end
next_unique_id()
click to toggle source
# File lib/translator/commands/command_translator.rb, line 14 def self.next_unique_id @@unique_id = @@unique_id + 1 return @@unique_id end
translate_command(command)
click to toggle source
Entry point for translator. Returns a [ProgramBuilder], which contains all required information for compiling and executing the CUDA program.
# File lib/translator/commands/command_translator.rb, line 44 def self.translate_command(command) command_translator = self.new(root_command: command) command_translator.start_translation return command_translator.program_builder end
Public Instance Methods
build_command_translation_result( execution: "", result:, command:)
click to toggle source
# File lib/translator/commands/command_translator.rb, line 226 def build_command_translation_result( execution: "", result:, command:) result_type = command.result_type unique_id = command.unique_id if command.keep # Store result in global array # TODO: Remove DEBUG command_result = Constants::TEMP_RESULT_IDENTIFIER + unique_id.to_s command_execution = execution + "\n " + result_type.to_c_type + " " + command_result + " = " + result + ";" kernel_builder.add_cached_result(unique_id.to_s, result_type) kernel_launcher.add_cached_result(unique_id.to_s, result_type) environment_builder.add_previous_result_type(unique_id, result_type) else command_result = result command_execution = execution end command_translation = CommandTranslationResult.new( execution: command_execution, result: command_result, command: command) end
kernel_builder()
click to toggle source
# File lib/translator/commands/command_translator.rb, line 98 def kernel_builder return kernel_launcher_stack.last.kernel_builder end
kernel_launcher()
click to toggle source
# File lib/translator/commands/command_translator.rb, line 94 def kernel_launcher return kernel_launcher_stack.last end
pop_kernel_launcher(command_translation_result)
click to toggle source
Pops a KernelBuilder
from the kernel builder stack. This method is called when all blocks (parallel sections) for that kernel have been translated, i.e., the kernel is fully built.
# File lib/translator/commands/command_translator.rb, line 128 def pop_kernel_launcher(command_translation_result) previous_launcher = kernel_launcher_stack.pop kernel_builder = previous_launcher.kernel_builder kernel_builder.block_invocation = command_translation_result.result kernel_builder.execution = command_translation_result.execution kernel_builder.result_type = command_translation_result.result_type if previous_launcher == nil raise AssertionError.new("Attempt to pop kernel launcher, but stack is empty") end program_builder.add_kernel_launcher(previous_launcher) return previous_launcher end
push_kernel_launcher(kernel_builder: nil, kernel_launcher: nil)
click to toggle source
# File lib/translator/commands/command_translator.rb, line 112 def push_kernel_launcher(kernel_builder: nil, kernel_launcher: nil) if kernel_builder != nil && kernel_launcher == nil @kernel_launcher_stack.push(KernelLauncher.new(kernel_builder)) elsif kernel_builder == nil && kernel_launcher != nil @kernel_launcher_stack.push(kernel_launcher) elsif kernel_builder == nil && kernel_launcher == nil # Default: add new kernel builder @kernel_launcher_stack.push(KernelLauncher.new(KernelBuilder.new)) else raise ArgumentError.new("kernel_builder and kernel_laucher given but only expected one") end end
start_translation()
click to toggle source
# File lib/translator/commands/command_translator.rb, line 68 def start_translation Log.info("CommandTranslator: Starting translation...") # Trace all objects @object_tracer = TypeInference::ObjectTracer.new(root_command) all_objects = object_tracer.trace_all # --- Translate --- # Create new kernel launcher push_kernel_launcher # Translate the command (might create additional kernels) result = root_command.accept(self) # Add kernel builder to ProgramBuilder pop_kernel_launcher(result) # --- End of Translation --- # Add SoA arrays to environment object_tracer.register_soa_arrays(environment_builder) end
translate_entire_input(command)
click to toggle source
# File lib/translator/commands/command_translator.rb, line 145 def translate_entire_input(command) input_translated = command.input.each_with_index.map do |input, index| input.translate_input( parent_command: command, command_translator: self, # Assuming that every input consumes exactly one parameter start_eat_params_offset: index) end return EntireInputTranslationResult.new(input_translated) end
translate_input(input)
click to toggle source
Processes a [Symbolic::Input] objects, which contains a reference to a command object and information about how elements are accessed. If elements are only accessed according to the current thread ID, this input can be fused. Otherwise, a new kernel will be built.
# File lib/translator/commands/command_translator.rb, line 161 def translate_input(input) previous_result = "" if input.command.has_previous_result? # Read previously computed (cached) value Log.info("Reusing kept result for command #{input.command.unique_id}: #{input.command.gpu_result_pointer}") environment_builder.add_previous_result( input.command.unique_id, input.command.gpu_result_pointer) environment_builder.add_previous_result_type( input.command.unique_id, input.command.result_type) cell_access = "" if input.pattern == :tid cell_access = "[_tid_]" end kernel_launcher.configure_grid(input.command.size) previous_result = CommandTranslationResult.new( execution: "", result: "((#{input.command.result_type.to_c_type} *)(_env_->" + "prev_#{input.command.unique_id}))#{cell_access}", command: input.command) if input.pattern == :tid return previous_result else end end if input.pattern == :tid # Stay in current kernel return input.command.accept(self) elsif input.pattern == :entire if !input.command.has_previous_result? # Create new kernel push_kernel_launcher previous_result = input.command.accept(self) previous_result_kernel_var = kernel_launcher.kernel_result_var_name pop_kernel_launcher(previous_result) else kernel_launcher.use_cached_result( input.command.unique_id, input.command.result_type) previous_result_kernel_var = "prev_" + input.command.unique_id.to_s end # Add parameter for previous input to this kernel kernel_launcher.add_previous_kernel_parameter(Variable.new( name: previous_result_kernel_var, type: previous_result.result_type)) # This is a root command for this kernel, determine grid/block dimensions kernel_launcher.configure_grid(input.command.size, block_size: input.command.block_size) kernel_translation = CommandTranslationResult.new( result: previous_result_kernel_var, command: input.command) return kernel_translation else raise NotImplementedError.new("Unknown input pattern: #{input.pattern}") end end
visit_array_combine_command(command)
click to toggle source
Calls superclass method
Ikra::Symbolic::Visitor#visit_array_combine_command
# File lib/translator/commands/array_combine_command.rb, line 4 def visit_array_combine_command(command) Log.info("Translating ArrayCombineCommand [#{command.unique_id}]") super # Process dependent computation (receiver), returns [InputTranslationResult] input = translate_entire_input(command) # All variables accessed by this block should be prefixed with the unique ID # of the command in the environment. env_builder = @environment_builder[command.unique_id] block_translation_result = Translator.translate_block( block_def_node: command.block_def_node, environment_builder: env_builder, lexical_variables: command.lexical_externals, command_id: command.unique_id, entire_input_translation: input) kernel_builder.add_methods(block_translation_result.aux_methods) kernel_builder.add_block(block_translation_result.block_source) # Build command invocation string result = block_translation_result.function_name + "(" + (["_env_"] + input.result).join(", ") + ")" command_translation = build_command_translation_result( execution: input.execution, result: result, command: command) Log.info("DONE translating ArrayCombineCommand [#{command.unique_id}]") return command_translation end
visit_array_command(command)
click to toggle source
— Actual Visitor parts stars here —
# File lib/translator/commands/command_translator.rb, line 105 def visit_array_command(command) if command.keep && !command.has_previous_result? # Create slot for result pointer on GPU in env environment_builder.allocate_previous_pointer(command.unique_id) end end
visit_array_identity_command(command)
click to toggle source
Calls superclass method
Ikra::Symbolic::Visitor#visit_array_identity_command
# File lib/translator/commands/array_identity_command.rb, line 4 def visit_array_identity_command(command) Log.info("Translating ArrayIdentityCommand [#{command.unique_id}]") super # This is a root command, determine grid/block dimensions kernel_launcher.configure_grid(command.size, block_size: command.block_size) # Add base array to environment need_union_type = !command.base_type.is_singleton? transformed_base_array = object_tracer.convert_base_array( command.input.first.command, need_union_type) environment_builder.add_base_array(command.unique_id, transformed_base_array) command_translation = build_command_translation_result( result: "#{Constants::ENV_IDENTIFIER}->#{EnvironmentBuilder.base_identifier(command.unique_id)}[_tid_]", command: command) Log.info("DONE translating ArrayIdentityCommand [#{command.unique_id}]") return command_translation end
visit_array_index_command(command)
click to toggle source
Translate the block of an `Array.pnew` section.
Calls superclass method
Ikra::Symbolic::Visitor#visit_array_index_command
# File lib/translator/commands/array_index_command.rb, line 5 def visit_array_index_command(command) Log.info("Translating ArrayIndexCommand [#{command.unique_id}]") super # This is a root command, determine grid/block dimensions kernel_launcher.configure_grid(command.size, block_size: command.block_size) num_dims = command.dimensions.size # This is a root command, determine grid/block dimensions kernel_launcher.configure_grid(command.size, block_size: command.block_size) index_generators = (0...num_dims).map do |dim_index| index_div = command.dimensions.drop(dim_index + 1).reduce(1, :*) index_mod = command.dimensions[dim_index] if dim_index > 0 "(_tid_ / #{index_div}) % #{index_mod}" else # No modulo required for first dimension "_tid_ / #{index_div}" end end if num_dims > 1 # Retrieve type that was generated earlier zipped_type_singleton = command.result_type.singleton_type result = zipped_type_singleton.generate_inline_initialization(index_generators) # Add struct type to program builder, so that we can generate the source code # for its definition. program_builder.structs.add(zipped_type_singleton) else result = "_tid_" end command_translation = CommandTranslationResult.new( result: result, command: command) Log.info("DONE translating ArrayIndexCommand [#{command.unique_id}]") return command_translation end
visit_array_reduce_command(command)
click to toggle source
Calls superclass method
Ikra::Symbolic::Visitor#visit_array_reduce_command
# File lib/translator/commands/array_reduce_command.rb, line 4 def visit_array_reduce_command(command) Log.info("Translating ArrayReduceCommand [#{command.unique_id}]") super if command.input.size != 1 raise AssertionError.new("Expected exactly one input for ArrayReduceCommand") end # Process dependent computation (receiver) input = translate_entire_input(command) block_size = command.block_size # All variables accessed by this block should be prefixed with the unique ID # of the command in the environment. env_builder = @environment_builder[command.unique_id] block_translation_result = Translator.translate_block( block_def_node: command.block_def_node, environment_builder: env_builder, lexical_variables: command.lexical_externals, command_id: command.unique_id, entire_input_translation: input) kernel_builder.add_methods(block_translation_result.aux_methods) kernel_builder.add_block(block_translation_result.block_source) # Add "odd" parameter to the kernel which is needed for reduction kernel_builder.add_additional_parameters(Constants::ODD_TYPE + " " + Constants::ODD_IDENTIFIER) # Number of elements that will be reduced num_threads = command.input_size if num_threads.is_a?(Fixnum) # Easy case: Number of required reductions known statically odd = (num_threads % 2 == 1).to_s # Number of threads needed for reduction num_threads = num_threads.fdiv(2).ceil previous_result_kernel_var = input.result.first first_launch = true # While more kernel launches than one are needed to finish reduction while num_threads >= block_size + 1 # Launch new kernel (with same kernel builder) push_kernel_launcher(kernel_builder: kernel_builder) # Configure kernel with correct arguments and grid kernel_launcher.add_additional_arguments(odd) kernel_launcher.configure_grid(num_threads, block_size: block_size) # First launch of kernel is supposed to allocate new memory, so only reuse memory after first launch if first_launch first_launch = false else kernel_launcher.reuse_memory!(previous_result_kernel_var) end previous_result_kernel_var = kernel_launcher.kernel_result_var_name pop_kernel_launcher(input.command_translation_result(0)) # Update number of threads needed num_threads = num_threads.fdiv(block_size).ceil odd = (num_threads % 2 == 1).to_s num_threads = num_threads.fdiv(2).ceil end # Configuration for last launch of kernel kernel_launcher.add_additional_arguments(odd) kernel_launcher.configure_grid(num_threads, block_size: block_size) else # More difficult case: Have to generate loop for reductions # Add one regular kernel launcher for setting up the memory etc. odd_first = "(#{num_threads} % 2 == 1)" num_threads_first = "((int) ceil(#{num_threads} / 2.0))" push_kernel_launcher(kernel_builder: kernel_builder) kernel_launcher.add_additional_arguments(odd_first) kernel_launcher.configure_grid(num_threads_first, block_size: block_size) previous_result_kernel_var = kernel_launcher.kernel_result_var_name pop_kernel_launcher(input.command_translation_result(0)) # Add loop # Set up state (variables that are updated inside the loop) # 1. Calculate number of elements from previous computation # 2. Check if odd number # 3. Calculate number of threads that we need loop_setup = "int _num_elements = ceil(#{num_threads_first} / (double) #{block_size});\nbool _next_odd = _num_elements % 2 == 1;\nint _next_threads = ceil(_num_elements / 2.0);\n" # Update loop state after iteration update_loop = "_num_elements = ceil(_next_threads / (double) #{block_size});\nbool _next_odd = _num_elements % 2 == 0;\n_next_threads = ceil(_num_elements / 2.0);\n" push_kernel_launcher(kernel_launcher: WhileLoopKernelLauncher.new( kernel_builder: kernel_builder, condition: "_num_elements > 1", before_loop: loop_setup, post_iteration: update_loop)) kernel_launcher.add_additional_arguments("_next_odd") kernel_launcher.configure_grid("_next_threads", block_size: block_size) #pop_kernel_launcher(input.command_translation_result(0)) end if !first_launch kernel_launcher.reuse_memory!(previous_result_kernel_var) end command_execution = Translator.read_file(file_name: "reduce_body.cpp", replacements: { "previous_result" => input.result.first, "block_name" => block_translation_result.function_name, "arguments" => Constants::ENV_IDENTIFIER, "block_size" => block_size.to_s, "temp_result" => Constants::TEMP_RESULT_IDENTIFIER, "odd" => Constants::ODD_IDENTIFIER, "type" => command.result_type.to_c_type, "num_threads" => Constants::NUM_THREADS_IDENTIFIER}) command_translation = CommandTranslationResult.new( execution: command_execution, result: Constants::TEMP_RESULT_IDENTIFIER, command: command) Log.info("DONE translating ArrayReduceCommand [#{command.unique_id}]") return command_translation end
visit_array_stencil_command(command)
click to toggle source
Calls superclass method
Ikra::Symbolic::Visitor#visit_array_stencil_command
# File lib/translator/commands/array_stencil_command.rb, line 4 def visit_array_stencil_command(command) Log.info("Translating ArrayStencilCommand [#{command.unique_id}]") super num_dims = command.dimensions.size # Process dependent computation (receiver), returns [InputTranslationResult] input = translate_entire_input(command) # Count number of parameters num_parameters = command.offsets.size # All variables accessed by this block should be prefixed with the unique ID # of the command in the environment. env_builder = @environment_builder[command.unique_id] block_translation_result = Translator.translate_block( block_def_node: command.block_def_node, environment_builder: env_builder, lexical_variables: command.lexical_externals, command_id: command.unique_id, entire_input_translation: input) kernel_builder.add_methods(block_translation_result.aux_methods) kernel_builder.add_block(block_translation_result.block_source) # Compute indices in all dimensions index_generators = (0...num_dims).map do |dim_index| index_div = command.dimensions.drop(dim_index + 1).reduce(1, :*) index_mod = command.dimensions[dim_index] if dim_index > 0 "(_tid_ / #{index_div}) % #{index_mod}" else # No modulo required for first dimension "_tid_ / #{index_div}" end end compute_indices = index_generators.map.with_index do |gen, dim_index| "int temp_stencil_dim_#{dim_index} = #{gen};" end.join("\n") # Check if an index is out of bounds in any dimension out_of_bounds_check = Array.new(num_dims) do |dim_index| min_in_dim = command.offsets.map do |offset| offset[dim_index] end.min max_in_dim = command.offsets.map do |offset| offset[dim_index] end.max dim_size = command.dimensions[dim_index] if dim_size.is_a?(String) # This is not a compile-time constant. Pass dimension size as argument # to the kernel. dim_size_expr = "dim_size_#{dim_index}" kernel_builder.add_additional_parameters("int #{dim_size_expr}") kernel_launcher.add_additional_arguments(dim_size) else dim_size_expr = dim_size end "temp_stencil_dim_#{dim_index} + #{min_in_dim} >= 0 && temp_stencil_dim_#{dim_index} + #{max_in_dim} < #{dim_size_expr}" end.join(" && ") # `previous_result` should be an expression returning the array containing the # result of the previous computation. previous_result = input.result(0) arguments = ["_env_"] # Pass values from previous computation that are required by this thread. # Reconstruct actual indices from indices for each dimension. for i in 0...num_parameters multiplier = 1 global_index = [] for dim_index in (num_dims - 1).downto(0) global_index.push("(temp_stencil_dim_#{dim_index} + #{command.offsets[i][dim_index]}) * #{multiplier}") next_dim_size = command.dimensions[dim_index] if next_dim_size.is_a?(String) Log.warn("Cannot handle multi-dimensional stencil computations in host sections yet.") else multiplier = multiplier * next_dim_size end end arguments.push("#{previous_result}[#{global_index.join(" + ")}]") end # Push additional arguments (e.g., index) arguments.push(*input.result(1..-1)) argument_str = arguments.join(", ") stencil_computation = block_translation_result.function_name + "(#{argument_str})" temp_var_name = "temp_stencil_#{CommandTranslator.next_unique_id}" # The following template checks if there is at least one index out of bounds. If # so, the fallback value is used. Otherwise, the block is executed. command_execution = Translator.read_file(file_name: "stencil_body.cpp", replacements: { "execution" => input.execution, "temp_var" => temp_var_name, "result_type" => command.result_type.to_c_type, "compute_indices" => compute_indices, "out_of_bounds_check" => out_of_bounds_check, "out_of_bounds_fallback" => command.out_of_range_value.to_s, "stencil_computation" => stencil_computation}) command_translation = build_command_translation_result( execution: command_execution, result: temp_var_name, command: command) Log.info("DONE translating ArrayStencilCommand [#{command.unique_id}]") return command_translation end
visit_array_zip_command(command)
click to toggle source
Calls superclass method
Ikra::Symbolic::Visitor#visit_array_zip_command
# File lib/translator/commands/array_zip_command.rb, line 4 def visit_array_zip_command(command) Log.info("Translating ArrayZipCommand [#{command.unique_id}]") super # Process dependent computation (receiver), returns [InputTranslationResult] input = translate_entire_input(command) # Get Ikra struct type zipped_type_singleton = command.result_type.singleton_type # Add struct type to program builder, so that we can generate the source code # for its definition. program_builder.structs.add(zipped_type_singleton) command_translation = CommandTranslationResult.new( execution: input.execution, result: zipped_type_singleton.generate_inline_initialization(input.result), command: command) Log.info("DONE translating ArrayZipCommand [#{command.unique_id}]") return command_translation end