Processor < BasicObject
{
var string: "" # The source string to be processed var capture_items: [] # The stream of capture tokens to be processed var builder: null # The AST Builder to use in the reduction environment # Process the inputs and return the final captures layer process: setup; process_each; validate_state; final_captures # A lightweight class used for string captures in the reduction environment Token < BasicObject { var type var text var row var col inspect: [type, text, row, col].inspect to_s: [type, text, row, col].inspect line: row # alias sym: text.to_sym float: text.to_f integer: |base=0| text.to_inum(base, true) } var tokenizer: &|type, source, start, stop| { pos = position_of(start, source) value = Token.new( type: type text: source.slice(start, stop - start) row: pos.first col: pos.last ) } # The environment for performing reduction actions to build an AST in the PEG. # It gives implicit access to both local captures and AST builder methods. var reduction_env: Capture::Patterns::Reduction::Environment.new var reduction_env_cscope: ::Rubinius::ConstantScope.new(Capture::Patterns::Reduction::Environment) # TODO: fix var cidx_stack: [] # A FILO stack of start indices for captures var sidx_stack: [] # A FILO stack of start indices for string captures var tidx_stack: [] # A FILO stack of start indices for token captures var midx_stack: [] # A FILO stack of start indices for multiplicit captures var ridx_stack: [] # A FILO stack of start indices for reductions var capt_stack: [] # A FILO stack of Hashes with capture names, values # Given a string, return a map of indices of newline positions to line numbers memoize line_map_for: |string| { idx_hash = ::Ruby::Hash.new idx_hash[0] = 0 count = 0 idx = -1 loop { idx = string.index("\n", idx + 1) idx || break count = count + 1 idx_hash[idx] = count } idx_hash } # Given an index and string, return the row (line) and column numbers position_of: |idx, string| { nl_idx = string.rindex("\n", idx) || 0 row = line_map_for(string).fetch(nl_idx) + 1 col = idx - nl_idx [row, col] } # Push or pop a layer to/from the capture stack push_layer: capt_stack.push(::Ruby::Hash.new) pop_layer: capt_stack.pop setup: push_layer final_captures: pop_layer .select |k,v| { k.is_a?(Symbol) } .map |k,v| { [k,v.last] } .to_h process_each: capture_items.each |idx, metadata| { type = metadata[0] captargs = metadata[1] || [] send(type, idx, *captargs) } validate_state: { cidx_stack.empty? || raise("unbalanced cidx_stack: "cidx_stack"") sidx_stack.empty? || raise("unbalanced sidx_stack: "sidx_stack"") tidx_stack.empty? || raise("unbalanced tidx_stack: "tidx_stack"") midx_stack.empty? || raise("unbalanced midx_stack: "midx_stack"") ridx_stack.empty? || raise("unbalanced ridx_stack: "ridx_stack"") } # Start of a named capture c_start: |idx| { # Note the starting index of the capture cidx_stack.push(idx) } # End of a named capture c_end: |idx, name| { captures = capt_stack.last start_idx = cidx_stack.pop # Pick up the captured reduction, if its start and end # indices are within the bounds we are looking for. use_reduction = false value = captures[idx] value &? ( loc = value.first value = value.last ((loc.first>=start_idx) && (loc.last<=idx)) &? ( use_reduction = true ) ?? ( value = null ) ) # If the reduction is not found and the string length is not zero # slice the string with the given indices to get a text value. !use_reduction && !(start_idx==idx) && ( size = idx-start_idx value = (size == 1 &? string[start_idx] ?? string.slice(start_idx, size)) ) store_value = [[start_idx, idx], value] captures.store(idx, store_value) captures.store(name, store_value) } # Start of a named text capture s_start: |idx| { # Note the starting index of the text capture sidx_stack.push(idx) } # End of a named text capture s_end: |idx, name| { captures = capt_stack.last start_idx = sidx_stack.pop value = string.slice(start_idx, idx-start_idx) store_value = [[start_idx, idx], value] captures.store(idx, store_value) captures.store(name, store_value) } # Start of a named token capture t_start: |idx| { # Note the starting index of the text capture tidx_stack.push(idx) } # End of a named token capture t_end: |idx, name| { captures = capt_stack.last start_idx = tidx_stack.pop value = tokenizer.call(name, string, start_idx, idx) store_value = [[start_idx, idx], value] captures.store(idx, store_value) captures.store(name, store_value) } # Start of a multiplicit capture m_start: |idx| { midx_stack.push(idx) c_start(idx) } # Split of a multiplicit capture m_split: |idx, name| { c_end(idx, name) c_start(idx) } # End of a multiplicit capture m_end: |idx, name| { captures = capt_stack.last start_idx = midx_stack.pop cidx_stack.pop # Discard cidx pushed by m_start/end calling c_start # Try to find a group of "anonymous" captures by index first value = [] captures.each |k,v| { (k.is_a?(Fixnum) && k<=idx && k>start_idx) && ( loc = v.first (loc.first>=start_idx) && ( value.push(v.last) ) ) } # If the group is empty and the string length is not zero # slice the string with the given indices to get a text value (value.empty? && !(start_idx==idx)) && ( value = string.slice(start_idx, idx-start_idx) ) store_value = [[start_idx, idx], value] captures.store(idx, store_value) captures.store(name, store_value) } # Start of a reduction r_start: |idx| ( # Push a new layer of reduction context onto the stack push_layer ridx_stack.push(idx) ) # End of a reduction r_end: |idx, code, *args| { captures = pop_layer start_idx = ridx_stack.pop captures_map = captures.map |k,v| { k.is_a?(Symbol) &? [k,v.last] ?? null }.compact.to_h # Set up the reduction environment code.scope = reduction_env_cscope reduction_env.builder = builder reduction_env.captures = captures_map # Invoke the reduction action to get a value # Store with the string index as the key so the value can be recaptured value = code.invoke( code.name, Capture::Patterns::Reduction::Environment, reduction_env, [], null, ) store_value = [[start_idx, idx], value] capt_stack.last.store(idx, store_value) }
}