module RegularExpression::Compiler::X86
Public Class Methods
compile(cfg)
click to toggle source
Generate native code for a CFG
. This looks just like the Ruby
generator but abstracted one level, or just like the interpreter but abstracted two levels!
# File lib/regular_expression/compiler/x86.rb, line 42 def self.compile(cfg) fisk = Fisk.new buffer = Fisk::Helpers.jitbuffer(1024) fisk.asm(buffer) do # Here we're setting up a couple of local variables that point to # registers so that it's easier to see what's actually going on # rax is a scratch register that is used for the return value of the # function return_value = rax # rcx is a scratch register that is used to track the index of the # string where we're currently looking string_index = rcx # rdx is a scratch register that is used to track the index of the # string where we've started the match match_index = rdx # rsp is a reserved register that stores a pointer to the stack stack_pointer = rsp # rbp is a reserved register that stores a pointer to the base of the # stack. It is also known as the frame pointer frame_pointer = rbp # rsi is a scratch register that stores the second argument to the # function, and in our case stores the length of the string string_length = rsi # rdi is a scratch register that stores the first argument to the # function, and in our case stores a pointer to the base of the string string_pointer = rdi # r8 is a scratch register that we're using to store the last read # character value from the string character_buffer = r8 # First we're going to do some initialization of the frame pointer and # stack pointer so we can clear the stack when we're done with this # function push frame_pointer mov frame_pointer, stack_pointer # Now we're going to initialize the counter to 0 so that we attempt to # match at each index of the input string xor match_index, match_index # This is the start of our loop, where at the beginning of the loop # we check if we have already finished looking at each index (in which # case we'll jump to a failure condition) make_label :start_loop_head cmp match_index, string_length jg label(:exit) # Set the string_index value to the match_index value so that we begin # each loop at the current match index mov string_index, match_index cfg.blocks.each do |block| # Label the start of each block so that we can jump between them make_label block.name block.insns.each do |insn| case insn when Bytecode::Insns::PushIndex push string_index when Bytecode::Insns::PopIndex pop string_index when Bytecode::Insns::GuardBegin cmp string_index, imm8(0) jne label(:exit) jmp label(cfg.exit_map[insn.guarded].name) when Bytecode::Insns::GuardEnd cmp string_index, string_length je label(cfg.exit_map[insn.guarded].name) when Bytecode::Insns::JumpAny no_match_label = :"no_match_#{insn.object_id}" # Ensure we have a character we can read cmp string_index, string_length je label(no_match_label) # Move the string index forward and jump to the target # instruction inc string_index jmp label(cfg.exit_map[insn.target].name) make_label no_match_label when Bytecode::Insns::JumpValue no_match_label = :"no_match_#{insn.object_id}" # Ensure we have a character we can read cmp string_index, string_length je label(no_match_label) # Read the character into the character buffer mov character_buffer, string_pointer add character_buffer, string_index mov character_buffer, m64(character_buffer) # Compare the character buffer to the instruction's character, # continue on to the next instruction if it's not equal cmp character_buffer, imm8(insn.char.ord) jne label(no_match_label) # Move the string index forward and jump to the target # instruction inc string_index jmp label(cfg.exit_map[insn.target].name) make_label no_match_label when Bytecode::Insns::JumpValuesInvert no_match_label = :"no_match_#{insn.object_id}" # Ensure we have a character we can read cmp string_index, string_length je label(no_match_label) # Read the character into the character buffer mov character_buffer, string_pointer add character_buffer, string_index mov character_buffer, m64(character_buffer) # Compare the character buffer to each of the instruction's # characters, continue on to the next instruction if any of them # are equal insn.chars.each do |value| cmp character_buffer, imm8(value.ord) je label(no_match_label) end # Move the string index forward and jump to the target # instruction inc string_index jmp label(cfg.exit_map[insn.target].name) make_label no_match_label when Bytecode::Insns::JumpRange no_match_label = :"no_match_#{insn.object_id}" # Ensure we have a character we can read cmp string_index, string_length je label(no_match_label) # Read the character into the character buffer mov character_buffer, string_pointer add character_buffer, string_index mov character_buffer, m64(character_buffer) # Compare the character buffer to the left hand side of the # instruction's range, continue on to the next instruction if # it's outside the range cmp character_buffer, imm8(insn.left.ord) jl label(no_match_label) # Compare the character buffer to the right hand side of the # instruction's range, continue on to the next instruction if # it's outside the range cmp character_buffer, imm8(insn.right.ord) jg label(no_match_label) # Move the string index forward and jump to the target # instruction inc string_index jmp label(cfg.exit_map[insn.target].name) make_label no_match_label when Bytecode::Insns::JumpRangeInvert no_match_label = :"no_match_#{insn.object_id}" match_label = :"match_#{insn.object_id}" # Ensure we have a character we can read cmp string_index, string_length je label(no_match_label) # Read the character into the character buffer mov character_buffer, string_pointer add character_buffer, string_index mov character_buffer, m64(character_buffer) # Compare the character buffer to the left hand side of the # instruction's range, jump down to the success case if it's # outside the range cmp character_buffer, imm8(insn.left.ord) jl label(match_label) # Compare the character buffer to the right hand side of the # instruction's range, continue on to the next instruction if # it's inside the range cmp character_buffer, imm8(insn.right.ord) jle label(no_match_label) # Move the string index forward and jump to the target # instruction make_label match_label inc string_index jmp label(cfg.exit_map[insn.target].name) make_label no_match_label when Bytecode::Insns::Jump jmp label(cfg.exit_map[insn.target].name) when Bytecode::Insns::Match # If we reach this instruction, then we've successfully matched # against the input string, so we're going to return the integer # that represents the index at which this match began mov return_value, match_index mov stack_pointer, frame_pointer pop frame_pointer ret when Bytecode::Insns::Fail inc match_index jmp label(:start_loop_head) else raise end end end # If we reach this instruction, then we've failed to match at every # possible index in the string, so we're going to return the length # of the string + 1 so that the caller knows that this match failed make_label :exit mov return_value, string_length inc return_value # Here we make sure to clean up after ourselves by returning the frame # pointer to its former position mov stack_pointer, frame_pointer pop frame_pointer ret end Compiled.new(buffer) end