module RegularExpression::Bytecode
The bytecode module defines instructions, and has a compiled object for storing a stream of them, and a builder object for creating the compiled object.
Public Class Methods
compile(nfa)
click to toggle source
Never recurse a graph in a compiler! We don't know how deep it is and don't want to limit how large a program we can accept due to arbitrary stack space. Always use a worklist.
# File lib/regular_expression/bytecode.rb, line 11 def self.compile(nfa) builder = Builder.new label = ->(state, index = 0) { :"state_#{state.object_id}_#{index}" } visited = Set.new worklist = [[nfa, [Insns::Jump.new(:fail)]]] # For each state in the NFA. until worklist.empty? state, fallback = worklist.pop next if visited.include?(state) # Label the start of the state. builder.mark_label(label[state]) visited.add(state) if state.is_a?(NFA::FinishState) builder.push(Insns::Match.new) next end # Other states have transitions out of them. Go through each # transition. state.transitions.each_with_index do |transition, index| builder.mark_label(label[state, index]) if state.transitions.length > 1 && index != state.transitions.length - 1 builder.push(Insns::PushIndex.new) end case transition when NFA::Transition::BeginAnchor builder.push(Insns::GuardBegin.new(label[transition.state])) when NFA::Transition::EndAnchor builder.push(Insns::GuardEnd.new(label[transition.state])) when NFA::Transition::Any builder.push(Insns::JumpAny.new(label[transition.state])) when NFA::Transition::Value builder.push(Insns::JumpValue.new(transition.value, label[transition.state])) when NFA::Transition::Invert builder.push(Insns::JumpValuesInvert.new(transition.values, label[transition.state])) when NFA::Transition::Range if transition.invert builder.push(Insns::JumpRangeInvert.new(transition.left, transition.right, label[transition.state])) else builder.push(Insns::JumpRange.new(transition.left, transition.right, label[transition.state])) end when NFA::Transition::Epsilon builder.push(Insns::Jump.new(label[transition.state])) else raise end next_fallback = if state.transitions.length > 1 && index != state.transitions.length - 1 [Insns::PopIndex.new, Insns::Jump.new(label[state, index + 1])] else fallback end worklist.push([transition.state, next_fallback]) end # If we don't have one of the transitions that always executes, then we # need to add the fallback to the output for this state. if state.transitions.none? { |t| t.is_a?(NFA::Transition::BeginAnchor) || t.is_a?(NFA::Transition::Epsilon) } builder.push(*fallback) end end # We always have a failure case - it's just the failure instruction. builder.mark_label(:fail) builder.push(Insns::Fail.new) builder.build end