class RedParse
require “redparse/compile”
require “redparse/compile”
redparse - a ruby parser written in ruby Copyright (C) 2012, 2016 Caleb Clausen
This program is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License along with this program. If not, see <www.gnu.org/licenses/>.
redparse - a ruby parser written in ruby Copyright (C) 2012, 2016 Caleb Clausen
This program is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License along with this program. If not, see <www.gnu.org/licenses/>.
redparse - a ruby parser written in ruby Copyright (C) 2012, 2016 Caleb Clausen
This program is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License along with this program. If not, see <www.gnu.org/licenses/>.
redparse - a ruby parser written in ruby Copyright (C) 2012, 2016 Caleb Clausen This program is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this program. If not, see <http://www.gnu.org/licenses/>.
end¶ ↑
Constants
- ACTION_PATTERN
- BEGIN2END
- BEGINAFTEREQUALS
(CallSiteNode&-{:real_parens=>false, :args=>-{:size=>~0.reg}})
- BEGINAFTEREQUALS_MARKED
- BEGINWORDS
- BINOP_KEYWORDS
these ought to be regular operators, fer gosh sake
- BareMethod
- CACHEDIRNAME
- CHARMAPPINGS
- DEFOP
- DotCall
rule format:
-[syntax pattern_matchers.+, lookahead.-]>>node type
- DotOp
#HAS_PRECEDENCE=Op(/^#{PRECEDENCE.keys.map{|k| Regexp.quote k}.join('|')}$/,true)
KeywordOp= KeywordToken & -{ :ident=>/^(#{BINOP_KEYWORDS.join('|')})$/ } KeywordOp2= KeywordToken & -{ :ident=>/^([\[({!+*?:,]|\.{1,3}|::|=>)$/ }
- DoubleColonOp
- ENDWORDLIST
BEGINWORDLIST=RubyLexer::BEGINWORDLIST + %w“( [ {”
- ENDWORDS
- EPSILON
- Expr
- GLOBALCACHEDIRNAME
- HASHED_REDUCER
- HEADER_REX
- IGN_SEMI_AFTER
- IGN_SEMI_BEFORE
- INNERBOUNDINGWORDS
- KW2class
- KW_Op
- KeywordOp
- LCLETTER
- LETTER
- LETTER_DIGIT
- LHS_COMMA
- LOWEST_OP
- LowerOp
- Lvalue
- MODIFYASSIGNOP
- MULTIASSIGN
- NONASSIGNOP
- OPERATORLIKE_LB
for use in lookback patterns
- OPERATORS
- Op
- PRECEDENCE
- Punc2name
include
StackableClasses
- RESCUE_BODY
- RESCUE_KW
- RESCUE_OP
- RHS_COMMA
- RIGHT_ASSOCIATIVE
see pickaxe, 1st ed, page 221
- RUBYBINOPERATORS
- RUBYKEYWORDLIST
- RUBYNONSYMOPERATORS
- RUBYSYMOPERATORS
- RUBYUNOPERATORS
- RULES
rule format:
syntax pattern_matchers.+, lookahead.-, node type
- STRMAPPINGS
- STRMAP_REX
- UCLETTER
specific to parsing ruby
- UNOP
- VERSION
- Value
- WANTS_SEMI
- WITHCOMMAS
Attributes
Public Class Methods
# File lib/miniredparse.rb, line 547 def self.KW(ident) if defined? SPECIALIZED_KEYWORDS fail if /\\/===ident orig_ident=ident if Regexp===ident list=ident.to_s[/\(?-mix:\^\((.*)\)\$\)/,1] #pick apart any char class in ident if open_bracket_idx=list.index(/([^\\]|^)\[/) open_bracket_idx+=1 unless list[open_bracket_idx]=="[" close_bracket_idx=list.index(/[^\\]\]/,open_bracket_idx+1) close_bracket_idx+=1 unless list[close_bracket_idx]=="]" cclass=list.slice!(open_bracket_idx..close_bracket_idx) cclass=cclass[1...-1] cclass=cclass.scan( /[^\\]|\\./ ) cclass.map!{|ch| ch.size==1 ? ch : ch[1..1] } end #rest of it should be a list of words separated by | list=list.split(/\|/).reject{|x| x==''} list.concat cclass if cclass list.map{|w| w.gsub!(/\\/,'') KW(w) }.inject{|sum,kw| sum|kw} else fail unless String===ident ident=Punc2name[ident] unless /^(?:(?!#{LETTER_DIGIT}).)+$/o===ident fail "no name for #{orig_ident}" unless ident eval %{ class Keyword_#{ident} < SpecializedKeywordToken def ident; '#{orig_ident}' end # def self.instance; @instance ||= allocate end # def self.new; instance end def initialize(offset) @offset=offset end end } KW2class[ident]||=const_get("Keyword_#{ident}") end else ident=case ident when Integer; ident.chr when String,Regexp; ident else ident.to_s end return KeywordToken&-{:ident=>ident} end end
MethNameToken&-{ hack, shouldn't be necessary
#rubylexer should know to generally treat "defined?" as a keyword #or operator. (like most keywords, it can also be used as a method # name....) :ident=>"defined?"
}
# File lib/miniredparse.rb, line 634 def self.Op(ident=nil, allow_keyword=false) result=OperatorToken result |= KeywordToken if allow_keyword result &= -{:ident=>ident} if ident #result[:infix?]=true return result end
# File lib/miniredparse.rb, line 400 def self.has_return_hash_fix? #is this needed? it's not used in this file.... rl=RubyLexer.new("","return {}.size") return( FileAndLineToken===rl.get1token and MethNameToken===rl.get1token and ImplicitParamListStartToken===rl.get1token and WsToken===rl.get1token and KeywordToken===rl.get1token and KeywordToken===rl.get1token and KeywordToken===rl.get1token and MethNameToken===rl.get1token and ImplicitParamListStartToken===rl.get1token and ImplicitParamListEndToken===rl.get1token and ImplicitParamListEndToken===rl.get1token and EoiToken===rl.get1token ) end
# File lib/redparse/compile.rb, line 1443 def self.inspect_constant_names constants.each{|kn| k=const_get(kn) next if Class|Module|Numeric|Symbol|true|false|nil===k k.extend NamedConstant k.constant_name=kn } end
# File lib/miniredparse.rb, line 1070 def initialize(input,name="(eval)",line=1,lvars=[],options={}) @rubyversion=options[:rubyversion]||1.8 encoding=options[:encoding]||:ascii encoding=:binary if @rubyversion<=1.8 cache=Cache.new( File===input,name, :line,line,:encoding,encoding,:locals,lvars.sort.join(","), @rubyversion, :/, *signature ) cache_mode=options[:cache_mode]||:read_write raise ArgumentError unless /^(?:read_(?:write|only)|write_only|none)$/===cache_mode.to_s read_cache= /read/===cache_mode.to_s input.binmode if input.respond_to? :binmode if read_cache and cache and result=cache.get(input) @cached_result=result @write_cache=nil return end if /write/===cache_mode.to_s @write_cache,@input= cache,input else @write_cache=nil end if Array===input def input.get1token; shift end @lexer=input if @rubyversion>=1.9 @funclikes=RubyLexer::RubyLexer1_9::FUNCLIKE_KEYWORDS @varlikes=RubyLexer::RubyLexer1_9::VARLIKE_KEYWORDS else @funclikes=RubyLexer::FUNCLIKE_KEYWORDS @varlikes=RubyLexer::VARLIKE_KEYWORDS end else @lexer=RubyLexer.new(name,input,line,0,:rubyversion=>@rubyversion,:encoding=>encoding) @funclikes=@lexer::FUNCLIKE_KEYWORDS() @varlikes=@lexer::VARLIKE_KEYWORDS() lvars.each{|lvar| @lexer.localvars[lvar]=true } encoding=@lexer.encoding_name_normalize(encoding.to_s).to_sym warn "#{encoding} encoding won't really work right now" if RubyLexer::NONWORKING_ENCODINGS.include? encoding end @funclikes=/#@funclikes|^->$/ if @rubyversion>=1.9 @filename=name @min_sizes={} @compiled_rules={} @moretokens=[] @unary_or_binary_op=/^[-+]$/ # @rules=self.expaneded_RULES @precedence=self.PRECEDENCE @RIGHT_ASSOCIATIVE=self.RIGHT_ASSOCIATIVE if defined? END_ATTACK compile end @saw_item_that=nil @print_filter=proc{true} end
# File lib/redparse.rb, line 1900 def self.parse(*args) new(*args).parse end
# File lib/redparse/pthelper.rb, line 19 def self.remove_silly_begins(pt) pt.each_with_index{|x,i| if Array===x remove_silly_begins(x) if x.size==2 and x.first==:begin pt[i]=x=x.last end end } end
# File lib/miniredparse.rb, line 1063 def RedParse.signature(ancs=ancestors) [ancs.map{|m| m.name}, Digest::SHA256.file(__FILE__), Digest::SHA256.file(__FILE__.sub(/\.rb\z/,"/node.rb")), ] end
# File lib/miniredparse.rb, line 117 def self.stack_monkey(*args,&block) StackMonkey.new(*args,&block) end
# File lib/redparse/generate.rb, line 376 def self.str2cname str str.gsub(STRMAP_REX){|str2| STRMAPPINGS[str2] } \ .gsub(/(?!#{LETTER_DIGIT}).|[X]/o){|ch| "X"+ esc=CHARMAPPINGS[ch[0]] ? esc : ch[0].to_s(16) } end
Public Instance Methods
# File lib/miniredparse.rb, line 757 def FUNCLIKE_KEYWORD KeywordToken&-{:ident=>@funclikes} end
# File lib/miniredparse.rb, line 598 def KW(ident); self.class.KW(ident) end
just the left side (the stack/lookahead matchers)
# File lib/redparse/compile.rb, line 1026 def LEFT # require 'md5' @rules=expanded_RULES() # p MD5.new(@rules).to_s @rules.map{|r| r.left.subregs }.flatten end
remove lookahead and lookback decoration (not used?)
# File lib/redparse/compile.rb, line 1034 def LEFT_NO_LOOKING l=LEFT() l.map!{|m| case m # when Reg::LookAhead,Reg::LookBack; fail #should be gone already now when Proc; [] else m # end # } l end
# File lib/miniredparse.rb, line 641 def Op(*args); self.class.Op(*args); end
# File lib/miniredparse.rb, line 437 def PRECEDENCE { # "("=>122, #method param list # "{"=>122, "do"=>122, #blocks "::"=>121, "."=>121, # "defined?"=>120.5, "["=>120, #[] []= methods "!"=>119, "~"=>119, "+@"=>119, "**"=>118, "-@"=>117, "*"=>116, "/"=>116, "%"=>116, "+"=>115, "-"=>115, "<<"=>114, ">>"=>114, "&"=>113, "^"=>112, "|"=>112, "<="=>111, ">="=>111, "<"=>111, ">"=>111, "<=>"=>110, "=="=>110, "==="=>110, "!="=>110, "=~"=>110, "!~"=>110, "&&"=>109, "||"=>108, ".."=>107, "..."=>107, "?"=>106, # ":"=>106, #not sure what to do with ":" "unary&"=>105, #unary * and & operators "lhs*"=>105, #this should remain above = "lhs,"=>105, "rescue3"=>105, "="=>104, "%="=>104, "/="=>104, "-="=>104, "+="=>104, "|="=>104, "&="=>104, ">>="=>104, "<<="=>104, "*="=>104, "&&="=>104, "||="=>104, "**="=>104, "^="=>104, "defined?"=>103, "not"=>103, ":"=>102, #but not when used as a substitute for 'then' "=>"=>101, "rhs,"=>100, #"call,"=>100, "array,"=>100, "param,"=>100, ","=>100, "rhs*"=>100, "unary*"=>100, #the 'precedence' of comma is somewhat controversial. it actually has #several different precedences depending on which kind of comma it is. #the precedence of , is higher than :, => and the assignment operators #in certain (lhs) contexts. therefore, the precedence of lhs-comma should #really be above "=". #"unary" prefix function names seen has operators have this precedence #but, rubylexer handles precedence of these and outputs fake parens #to tell us how its parsed "or"=>99, "and"=>99, "if"=>98, "unless"=>98, "while"=>98, "until"=>98, "rescue"=>98, ";"=>96, } end
see pickaxe, 1st ed, page 221
# File lib/miniredparse.rb, line 419 def RIGHT_ASSOCIATIVE { # "defined?"=>120.5, "**"=>118, "="=>105, "%="=>105, "/="=>105, "-="=>105, "+="=>105, "|="=>105, "&="=>105, ">>="=>105, "<<="=>105, "*="=>105, "&&="=>105, "||="=>105, "**="=>105, "^="=>105, # "and"=>99, "or"=>99, # "if"=>98, "unless"=>98, "while"=>98, "until"=>98, "rescue"=>98, # "&&"=>109, "||"=>108, } end
# File lib/miniredparse.rb, line 797 def RULES lower_op= lower_op() result= [-[StartToken.lb, Expr.-, EoiToken.la]>>:accept, -[EoiToken]>>:error, ]+ #these must be the lowest possible priority, and hence first in the rules list # BEGIN2END.map{|_beg,_end| # -[KW(_beg), (KW(_beg)|KW(_end)).~.*, KW(_end), KW(/^(do|\{)$/).~.la]>>MisparsedNode # }+ [ # -[UNOP, Expr, lower_op]>>UnOpNode, # -[DEFOP, ParenedNode]>>UnOpNode, # -[Op(/^(?:unary|lhs|rhs)\*$/), ValueNode, lower_op]>>UnaryStarNode, # -[Op('=',true)|KW(/^(rescue|when|\[)$/)|Op(/,$/,true), # Op(/^(?:unary|rhs)\*$/), ValueNode, (MODIFYASSIGNOP|Op('=',true)).la]>>:shift, # -[MethNameToken|FUNCLIKE_KEYWORD(), KW('('), # Op(/^(?:unary|rhs)\*$/), ValueNode, (MODIFYASSIGNOP|Op('=',true)).la]>>:shift, #star should not be used in an lhs if an rhs or param list context is available to eat it. #(including param lists for keywords such as return,break,next,rescue,yield,when) # -[Op(/^(?:unary|lhs)\*$/), (GoalPostToken|Op(/,$/,true)|KW(/^(in|[=)|;])$/)).la]>>DanglingStarNode, #dangling * # -[Op(/,$/,true), (GoalPostToken|KW(/^(in|[=)|;])$/)).la]>> #dangling , # stack_monkey("DanglingComma",1,DanglingCommaNode){|stack| # dcomma=DanglingCommaNode.new # dcomma.offset=stack.last.offset # stack.push dcomma, stack.pop # }, #hmmm.... | in char classes above looks useless (predates GoalPostToken) -[Expr, Op|KW_Op, Expr, lower_op]>>RawOpNode, #most operators #assignment # -[Lvalue, MODIFYASSIGNOP, Expr, lower_op]>>AssignNode, # -[Lvalue, Op('=',true), AssignmentRhsNode, lower_op]>>AssignNode, # -[AssignmentRhsListStartToken, Expr, AssignmentRhsListEndToken]>>AssignmentRhsNode, # a = b rescue c acts like a ternary,,, #provided that both a and b are not multiple and b #(if it is a parenless callsite) has just 1 param # -[Lvalue&~MULTIASSIGN, Op('=',true), AssignmentRhsNode&-{:is_list=>true}, # Op('rescue3'), Expr, lower_op]>>AssignNode, # -[Lvalue, Op('=',true), AssignmentRhsNode, Op('rescue3'), Expr, lower_op]>>AssignNode, # -[Lvalue&~MULTIASSIGN, Op('=',true), AssignmentRhsNode&-{:is_list=>true}, # Op('rescue3',true).la]>>:shift, # -[Lvalue&~MULTIASSIGN, Op('=',true), AssignmentRhsNode&-{:is_list=>true}, # RESCUE_OP.la] >> # stack_monkey("rescue3",1,Op('rescue3',true)){|stack| # resc=stack.last.dup # resc.ident += '3' # stack[-1]=resc # }, #relative precedence of = and rescue are to be inverted if rescue #is to the right and assignment is not multiple. #if assignment rhs contains commas, don't reduce til they've been read #(unless we're already on an rhs) # -[(Op('=',true)|Expr).~.lb, Lvalue, Op('=',true), Expr, RHS_COMMA.la]>>:shift, # -[RHS_COMMA.lb, Lvalue, Op('=',true), Expr, RHS_COMMA.la ]>>AssignNode, # -[ValueNode, LHS_COMMA, ValueNode, Op('=',true).la]>>CommaOpNode, #relative precedence of = and lhs/rhs , are to be inverted. #mark parentheses and unary stars that come after lhs commas # -[LHS_COMMA, (UnaryStarNode|ParenedNode)&~-{:after_comma =>true}, Op('=',true)]>> # stack_monkey("after_comma",3,(UnaryStarNode|ParenedNode)&-{:after_comma =>true}){|stack| # stack[-3].after_comma=true}, #mebbe this should be a lexer hack? # -[#(OPERATORLIKE_LB&~Op('=',true)).lb, # Expr, RESCUE_OP, Expr, lower_op]>>RescueOpNode, #dot and double-colon # -[DoubleColonOp, VarNode, lower_op]>>ConstantNode,#unary :: # -[Expr, DotOp, CallNode, lower_op]>>DotCall, #binary . # -[Expr, DoubleColonOp, CallNode, lower_op]>>DotCall, #binary :: # -[Expr, DoubleColonOp, VarNode, lower_op]>>ConstantNode,#binary :: #lower_op constaints on lookahead are unnecessary in above 4 (unless I give openparen a precedence) # -[Expr, "?", Expr, ":", Expr, lower_op]>>TernaryNode, # -[MethNameToken, '(', Expr.-, ')', BlockNode.-, KW('do').~.la]>>CallNode, # -[FUNCLIKE_KEYWORD, '(', Expr.-, ')', BlockNode.-, KW('do').~.la]>>KWCallNode, -[#(OPERATORLIKE_LB& (MethNameToken|FUNCLIKE_KEYWORD()).~.lb, '(', Expr, KW(')')&~(-{:callsite? =>true}|-{:not_real? =>true}), KW('do').~.la]>>ParenedNode, # -[#(OPERATORLIKE_LB& # (MethNameToken|FUNCLIKE_KEYWORD()).~.lb, # '(', KW(')')&~(-{:callsite? =>true}|-{:not_real? =>true}), KW('do').~.la]>>VarLikeNode, #(), alias for nil #constraint on do in above 2 rules is probably overkill # -[ValueNode, Op(/,$/,true), ValueNode, lower_op]>>CommaOpNode, -[(OPERATORLIKE_LB&dont_postpone_semi).lb, Expr, ';', Expr, lower_op]>>SequenceNode, # -[#(OPERATORLIKE_LB&~KW(')')).lb, # '{', (CommaOpNode|ArrowOpNode).-, '}']>>HashLiteralNode, #-40 # -[KW(')').lb, 'do', BlockFormalsNode.-, Expr.-, 'end']>>BlockNode, #this does {} as well... converted to do...end #rubylexer handles the 'low precedence' of do...end # -[GoalPostToken, Expr.-, GoalPostToken]>>BlockFormalsNode, #rubylexer disambiguated operator vs keyword '|' # -[/^(while|until)$/, Expr, /^([:;]|do)$/, Expr.-, 'end']>>LoopNode, # -[/^(if|unless)$/, Expr, /^(;|then|:)$/, # Expr.-, ElsifNode.*, ElseNode.-, 'end' # ]>>IfNode, # -['else', Expr.-, KW(/^(ensure|end)$/).la]>>ElseNode, # -['elsif', Expr, /^(;|then|:)$/, Expr.-, # KW(/^(end|else|elsif)$/).la # ]>>ElsifNode, # -['module', ConstantNode|VarNode, KW(/^(;|::)$/).~.la]>> # stack_monkey(1,KW(';')){|stack| #insert ; at end of module header if none was present # stack.push KeywordToken.new(';'), stack.pop # }, # -['module', ConstantNode|VarNode, ';', RESCUE_BODY, 'end']>>ModuleNode, # -['class', Expr, ';', RESCUE_BODY, 'end']>>ClassNode, # -['class', Expr, Op('<'), Expr, KW(';').~.la]>>:shift, # -['class', Op('<<'), Expr, ';', RESCUE_BODY, 'end']>>MetaClassNode, #-30 # -['alias', BareMethod|VarNode, BareMethod|VarNode]>>AliasNode, # -['undef', BareMethod]>>UndefNode, # -[UndefNode, Op(',',true), BareMethod]>>UndefNode, # -['def', CallSiteNode, Op('=').-, KW(';'), RESCUE_BODY, # Expr.-, RescueNode.*, ElseNode.-, EnsureNode.-, # 'end' # ]>>MethodNode, # -['begin', RESCUE_BODY, # Expr.-, RescueNode.*, ElseNode.-, EnsureNode.-, # 'end' # ]>>BeginNode, # -[Op('=',true), BEGINAFTEREQUALS, RESCUE_OP.la]>> # stack_monkey("begin after equals",2,BEGINAFTEREQUALS_MARKED){ |stack| stack[-2].after_equals=true }, #this is bs. all for an extra :begin in the parsetree # -[(KW(/^(;|begin)$/)|RescueNode).lb, #ParenedNode|RescueOpNode|BeginNode used to be here too # RESCUE_KW, KW('=>').-, Expr.-, /^([:;]|then)$/, # ]>>RescueHeaderNode, # -[ RescueHeaderNode, Expr.-, KW(';').-, (KW(/^(else|ensure|end)$/)|RESCUE_KW).la # ]>>RescueNode, # -['ensure', Expr.-, KW('end').la]>>EnsureNode, # -['[', Expr.-, ']']>>ArrayLiteralNode, #-20 # -[Expr, '[', Expr.-, ']']>>BracketsGetNode, # -[HereDocNode, StringToken+1, StringToken.~.la]>>StringCatNode, # -[(OPERATORLIKE_LB&~(StringToken|HereDocNode)).lb, StringToken+2, StringToken.~.la]>>StringCatNode, # -[(OPERATORLIKE_LB&~(StringToken|HereDocNode)).lb, StringToken, StringToken.~.la]>>StringNode, #includes regexp, wordlist, backquotes # -['case', Expr.-, KW(';').-, WhenNode.*, ElseNode.-, 'end']>>CaseNode, # -['when', Expr, /^([:;]|then)$/, Expr.-, # KW(/^(when|else|end)$/).la # ]>>WhenNode, # -['for', Expr, 'in', Expr, /^([:;]|do)$/, Expr.-, 'end']>>ForNode, #semicolon cleanup.... # -[(OPERATORLIKE_LB&dont_postpone_semi).lb,Expr, ';', IGN_SEMI_BEFORE.la] \ # >>delete_monkey(2,"semi_cleanup_before_ISB"), # -[Expr, ';', KW('then').la] >>delete_monkey(2,"semi_cleanup_before_then"), # -[dont_postpone_semi.lb, Expr, ';', RescueNode] >>delete_monkey(3,"semi_cleanup_before_rescue"), #-10 # -[IGN_SEMI_AFTER.lb, ';'] >>delete_monkey(2,"semi_cleanup_after_oplike"), # -[(StartToken|RescueHeaderNode).lb, ';' ] >>delete_monkey(2,"semi_cleanup_after_rescue"), #this rule is somewhat more forgiving than matz' parser... #not all semicolons after :, (, and { keywords should #be ignored. some should cause syntax errors. #comma cleanup.... # -[Op(/,$/,true), KW(/^([}\]])$/).la] >>delete_monkey(2, "comma_cleanup"), #likewise, this is somewhat too forgiving. #some commas before } or ] should cause syntax errors #turn lvalues into rvalues if not followed by an assignop # -[-{:lvalue =>true}, (Op('=',true)|MODIFYASSIGNOP|LHS_COMMA).~.la]>> # stack_monkey("lval2rval",2,-{:lvalue =>nil}){|stack| # stack[-2].lvalue=nil # }, #expand the = into a separate token in calls to settors (after . or ::). #but not in method headers # -[(OPERATORLIKE_LB&~KW('def')).lb, Expr, DotOp|DoubleColonOp, # (MethNameToken&-{:has_equals=>true}).la]>> # stack_monkey("expand_equals",1,CallNode){|stack| # methname=stack.pop # methname.ident.chomp!('=') # offset=methname.offset+methname.ident.size # stack.push( # CallNode.new(methname,nil,nil,nil,nil), # OperatorToken.new('=',offset) # ) # }, -[NumberToken|SymbolToken]>>LiteralNode, #lexer does the wrong thing with -22**44.5, making the - part #of the first number token. it's actually lower precedence than #**... this rule fixes that problem. #in theory, unary - is lower precedence than ., ::, and [] as well, but #that appears not to apply to unary - in numeric tokens # -[NumberToken&-{:negative=>true}, Op('**').la]>> # stack_monkey("fix_neg_exp",2,Op("-@",true)){|stack| # #neg_op.unary=true # num=stack[-2] # op=OperatorToken.new("-@",num.offset) # op.startline=num.startline # stack[-2,0]=op # num.ident.sub!(/\A-/,'') # num.offset+=1 # }, #treat these keywords like (rvalue) variables. # -[@varlikes]>>VarLikeNode, #here docs # -[HerePlaceholderToken]>>HereDocNode, # -[HereBodyToken.la]>>delete_monkey(1,"delete_here_body"), ##this is rediculous. this should be a lexer hack? # -[VarNameToken]>>VarNode, ] if @rubyversion >= 1.9 result.concat [ # -['->', ParenedNode.-, 'do', Expr.-, 'end']>>ProcLiteralNode, # -['->', VarLikeNode["nil",{:@value=>nil}].reg, 'do', Expr.-, 'end']>>ProcLiteralNode, -[(DotOp|DoubleColonOp).lb, '(',Expr.-,')', BlockNode.-, KW('do').~.la]>>CallNode, ] end return result end
all classes mentioned in rules, on left and right sides
# File lib/redparse/compile.rb, line 1067 def STACKABLE_CLASSES # return @sc_result if defined? @sc_result @sc_result=[] @subclasses_of=child_relations_among(*vertices) # @sc_result=false l=LEFT() l=l.map{|lm| sc_juice lm}.flatten.compact assert l.grep(nil).empty? r= @rules.map{|rr| rr.right }.grep(Class) #classes in productions result=l+r @subclasses_of=nil @sc_result.replace result.grep(Class).uniq fail if @sc_result.empty? return @sc_result end
for use in lookahead patterns
# File lib/miniredparse.rb, line 771 def VALUELIKE_LA KW(@varlikes)|NumberToken|SymbolToken|StringToken|UNOP|DEFOP| KW(/^[({]$/x)|VarNameToken|MethNameToken|HerePlaceholderToken| KW(BEGINWORDS)|FUNCLIKE_KEYWORD()|AssignmentRhsListStartToken #why isn't this a sufficient implementation of this method: # KW('(') #in which case, '(' can be made the highest precedence operator instead end
# File lib/miniredparse.rb, line 243 def [](*args) @stack.[](*args) end
# File lib/miniredparse.rb, line 247 def []=(*args) @stack.[]=(*args) end
# File lib/redparse/generate.rb, line 155 def action2c(action) case action when Rule; "goto reduce_#{str2cname action.name};" when nil,:error; "goto error_handler;" when ParserState; "goto shift_state_#{str2cname action.name};" when :accept; "YYACCEPT;" when MultiReduce; action.action2c when MultiShift; action.action2c # when StackMonkey; action.action2c else fail "unexpected action type: #{action.class} = #{action}" end end
# File lib/redparse.rb, line 793 def addl_node_containers; [] end
# File lib/redparse/compile.rb, line 251 def all_dotted_rules all_rules.map{|rule| (0...rule.patterns.size).map{|i| DottedRule.create(rule,i,self) } }.flatten end
$OLD_PAA=1
# File lib/redparse/compile.rb, line 261 def all_initial_dotted_rules return @all_initial_dotted_rules if defined? @all_initial_dotted_rules @all_initial_dotted_rules=result= all_rules.map{|rule| DottedRule.create(rule,0,nil) } p :all_init unless defined? $OLD_PAA scanning=result provisionals=nil while true old_provisionals=provisionals provisionals={} scanning.each{|dr| dr.also_allow=dr.compute_also_allow(provisional=[false]) #fill out dr.also_allow provisionals[dr]=provisional[0] } scanning=provisionals.map{|dr,val| dr if val }.compact end until provisionals==old_provisionals end p :all_init_done return result end
# File lib/redparse/compile.rb, line 209 def all_rules return @all_rules if defined? @all_rules @inputs||=enumerate_exemplars @rules=expanded_RULES #force it to be recalculated @all_rules = map_with_index(@rules){|r,i| Rule.new r,i} @all_rules.each{|r| if StackMonkey===r.action r.action.exemplars=@inputs.grep r.action.hint end } warn "error recovery rules disabled for now; creates too many states and masks errors" @all_rules.reject!{|r| r.action==MisparsedNode } #names have to be allocated globally to make sure they don't collide names=@all_rules.map{|r| if r.action.respond_to? :name r.action.name else r.action.to_s end }.sort dups={} names.each_with_index{|name,i| dups[name]=0 if name==names[i+1] } @all_rules.each{|r| r.name= if r.action.respond_to? :name r.action.name.dup else r.action.to_s end if dups[r.name] count=dups[r.name]+=1 r.name<<"_#{count}" end } end
# File lib/redparse/compile.rb, line 1258 def all_states return @all_states if defined? @all_states @all_states=enumerate_states end
# File lib/miniredparse.rb, line 738 def beginsendsmatcher @bem||= /^(#{BEGINWORDS}|#{ENDWORDS})$/ end
# File lib/redparse.rb, line 259 def can_coalesce? rule,klass=nil,ident=nil,klass2=nil,ident2=nil Reg::Transform===rule or fail node_type= rule.right rule=rule.left.subregs.dup rule.pop if Proc|::Reg::LookAhead===rule.last rule[0]=rule[0].subregs[0] if ::Reg::LookBack===rule[0] =begin was, but now done by expanded_RULES #I could call this a JIT compiler, but that's a bit grandiose.... #more of a JIT pre-processor compiled_rule=@compiled_rules[rule]||= rule.map{|pattern| String|Regexp===pattern ? KW(pattern) : pattern } =end assert(rule.grep(String|Regexp|Reg::Subseq|Reg::LookAhead|Reg::LookBack|Proc).empty?) return false if klass && !can_combine?(rule,klass,ident) return false if klass2 && !can_combine2?(rule,klass2,ident2,-2) warn "plain lit matches #{node_type}" if klass==LiteralNode and klass2.nil? return true end
# File lib/redparse.rb, line 599 def can_combine2? rule,klass,ident,index=-1 #very similar to can_combine?, just above #i think can_combine2? with 3 params is equiv to can_combine? #so, the two should be merged index=-index rule_max_size=rule.inject(0){|sum,pattern| sum + pattern.itemrange.end } return true if rule_max_size<index min=max=0 rule.reverse_each{|matcher| break if index<min if Reg::Repeat===matcher #optional= matcher.times.first==0 min+=matcher.times.first max+=matcher.times.last matcher=matcher.subregs[0] else min+=1 max+=1 end next if index>max if ident return true if matcher===klass.new(ident) next #was: optional ? next : break end =begin was orlist= Reg::Or===matcher ? matcher.subregs : [matcher] orlist.map!{|m| classes=(Reg::And===m ? m.subregs : [m]).grep(Class) case classes.size when 0; return true when 1 else warn "multiple classes in matcher #{matcher}: #{classes.inspect}" end classes if classes.all?{|k| klass<=k } } return true if orlist.compact.flatten[0] =end return true if classes_matched_by(matcher).any?{|k| klass<=k } } return false end
# File lib/redparse.rb, line 568 def can_combine? rule,klass,ident rule.reverse_each{|matcher| if Reg::Repeat===matcher optional= matcher.times.first==0 matcher=matcher.subregs[0] end if ident return true if matcher===klass.new(ident) optional ? next : break end =begin was orlist= Reg::Or===matcher ? matcher.subregs : [matcher] orlist.map!{|m| classes=(Reg::And===m ? m.subregs : [m]).grep(Class) case classes.size when 0; return true when 1 else warn "multiple classes in matcher #{matcher}" end classes if classes.all?{|k| klass<=k } } return true if orlist.compact.flatten[0] =end return true if classes_matched_by(matcher).any?{|k| klass<=k } break unless optional } return false end
# File lib/redparse/compile.rb, line 1153 def check_for_parsealike_inputs all_patterns=all_rules.map{|r| r.patterns.map{|rp| Reg::Repeat===rp and rp=rp.subregs[0]; rp }}.flatten.uniq seen={} @identity_name_aliases={} warn "why are non_empty and after_equals params to BeginNode appearently ignored?" warn "some token identities overlap themselves?!?" warn "some overlaps are duplicated" warn ". and :: overlap => ..... surely that's not right" @inputs.map{|input| profile=all_patterns.map{|pat| Proc===pat ? pat : !!(pat===input)} if seen[profile] puts "#{input} overlaps #{seen[profile]}" @identity_name_aliases[seen[profile]]=input nil else seen[profile]=input end }.compact end
# File lib/redparse/compile.rb, line 1046 def child_relations_among(*classes) classes.unshift Object result={} classes.each{|klass| result[klass]=[] } #p classes classes.each{|klass| anclist=klass.ancestors anclist.shift==klass or fail anclist.each{|anc| if anc=result[anc] anc << klass break end } } return result end
# File lib/redparse.rb, line 553 def classes_matched_by(matcher) result=[] worklist=[matcher] begin case x=worklist.shift when Reg::And,Reg::Or; worklist.concat x.subregs when Class; result<<x end end until worklist.empty? return [Object] if result.empty? return result end
# File lib/redparse.rb, line 283 def coalesce rule,rulenum,klass=nil,ident=nil,klass2=nil,ident2=nil #last 4 params aren't actually neeeded anymore @coalesce_result||=[] result=@coalesce_result[rulenum] return result if result #dissect the rule Reg::Transform===rule or fail node_type= rule.right rule=rule.left.subregs.dup lookahead_processor=(rule.pop if Proc|::Reg::LookAhead===rule.last) lookback=rule[0]=rule[0].subregs[0] if ::Reg::LookBack===rule[0] assert @rules[rulenum].right==node_type if klass==VarNode and klass2==KeywordToken #warn "can_combine2? about to fail" end needends=0 result=["\n##{mui node_type}\n"] #index of data at which to start matching result<<"i=@stack.size-1 ##{mui node_type}\n#-1 because last element of @stack is always lookahead\n" =begin was, but now done by expanded_RULES #I could call this a JIT compiler, but that's a bit grandiose.... #more of a JIT pre-processor compiled_rule=@compiled_rules[rule]||= rule.map{|pattern| String|Regexp===pattern ? KW(pattern) : pattern } =end assert(rule.grep(String|Regexp|Reg::Subseq|Reg::LookAhead|Reg::LookBack|Proc).empty?) compiled_rule=rule return if klass && !can_combine?(compiled_rule,klass,ident) #should never happen return if klass2 && !can_combine2?(compiled_rule,klass2,ident2,-2) #should never happen #what's the minimum @stack size this rule could match? rule_min_size=@min_sizes[compiled_rule]||= compiled_rule.inject(0){|sum,pattern| sum + pattern.itemrange.begin } if rule_min_size > 1 needends+=1 result<<"if i>=#{rule_min_size}\n" min_i=rule_min_size end #@@has_loop||=[] #@@has_optional||=[] has_loop=#@@has_loop[rulenum]||= compiled_rule.find{|x| x.itemrange.last.to_f.infinite? } has_optional=#@@has_optional[rulenum]||= compiled_rule.find{|x| x.itemrange.first.zero? } if Class===node_type and has_loop||has_optional result<<"matching=[]\n" need_matching=true end j=compiled_rule.size #actually try to match rule elements against each @stack element in turn first1=true compiled_rule.reverse_each{|matcher| j-=1 result<<"i.zero? and fail\n" unless min_i && min_i>0 or first1 first1=false #is this matcher optional? looping? maximum= matcher.itemrange.last minimum= matcher.itemrange.first loop= maximum.to_f.infinite? optional=minimum.zero? fail "looping matcher with finite maximum not supported" if maximum>1 and !loop if need_matching success="matching.unshift item" loopsuccess="target.unshift item" optfail="matching.unshift nil" result<<"matching.unshift target=[]\n" if loop end is_lookback=matcher .equal? lookback if loop or optional matcher=matcher.subregs[0] fail "lookback is not a scalar" if is_lookback end itemget="@stack[i-=1]" itemget="(item=#{itemget})" if success test="#{ref_to matcher,rulenum,j}===#{itemget} #try match of #{mui matcher}" p [:misparse_start, matcher] if node_type===MisparsedNode and j.zero? matcher= ~ (matcher.subregs[0]|NilClass) if Reg::Not===matcher if matcher===nil and j.zero? warn "rule ##{rulenum}(>>#{node_type}) can match nil at start; might match emptiness before start of stack" end if !loop fail unless maximum==1 min_i-=1 if min_i result<<<<-END if #{test} #{success if !is_lookback} END optional ? result<<<<-END : needends+=1 else #ignore optional match fail #but bump the data position back up, since the latest datum #didn't actually match anything. i+=1 #{optfail} end END else min_i=nil if minimum<10 needends+=minimum result<<<<-END*minimum if #{test} #{loopsuccess} END result<<<<-END while #{test} #{loopsuccess} end #but bump the data position back up, since the latest datum #didn't actually match anything. i+=1 END else needends+=1 result<<<<-END #{"n=#{minimum}" unless need_matching} while #{test} #{loopsuccess || "n-=1"} end if #{need_matching ? "target.size>=minimum" : "n<=0"} then #but bump the data position back up, since the latest datum #didn't actually match anything. i+=1 END end end } #give lookahead matcher (if any) a chance to fail the match result<<case lookahead_processor when ::Reg::LookAhead action_idx=compiled_rule.size+1 needends+=1 "if #{ref_to lookahead_processor.subregs[0],rulenum,compiled_rule.size}===@stack.last ##{mui lookahead_processor.subregs[0] }\n" when Proc action_idx=compiled_rule.size+1 needends+=1 "if #{ref_to lookahead_processor,rulenum,compiled_rule.size}[self,@stack.last] ##{mui lookahead_processor}\n" else '' end #if there was a lookback item, don't include it in the matched set #result<<"matching.shift\n" if lookback and need_matching need_return=true #replace matching elements in @stack with node type found result<< case node_type when Class #if there was a lookback item, don't include it in the new node <<-END #{"i+=1" if lookback} matchrange= i...-1 #what elems in @stack were matched? #{"matching=@stack.slice! matchrange" unless need_matching} node=#{ref_to node_type,rulenum,action_idx||rule.size}.create(*matching) ##{mui node_type} node.startline||=#{need_matching ? "@stack[i]" : "matching.first"}.startline node.endline=@endline #{need_matching ? "@stack[matchrange]=[node]" : "@stack.insert i,node" } END when Proc,StackMonkey; ref_to(node_type,rulenum,action_idx||rule.size)+"[@stack] ##{mui node_type}\n" when :shift; need_return=false; "return 0\n" when :accept,:error; need_return=false; "throw :ParserDone\n" else fail end result<<"return true #let caller know we found a match\n" if need_return result<<"end;"*needends result<<"\n" return @coalesce_result[rulenum]=result rescue Exception #=>e #puts "error (#{e}) while executing rule: #{rule.inspect}" #puts e.backtrace.join("\n") raise end
# File lib/redparse.rb, line 239 def coalesce_loop(klass=nil,ident=nil,klass2=nil,ident2=nil) eligible=rules.reverse.map!{|rule| can_coalesce?(rule,klass,ident,klass2,ident2)&&rule } i=rules.size eligible.map!{|rule| i-=1 next unless rule if @size_cache @size_cache[[i,rule.right]]||=1 @size_cache[[i,rule.right]]+=1 end coalesce rule, i, klass,ident,klass2,ident2 } eligible.compact! @size_cache[klass2 ? [klass,ident,klass2,ident2] : ident ? ident : klass]= eligible.size if @size_cache @empty_reduce_withs+=1 if defined? @empty_reduce_withs and eligible.size.zero? return eligible end
# File lib/redparse.rb, line 779 def code_for_reduce_with ident, code code=coalesce_loop(*code) if Array===code ident.gsub!(/[\\']/){|x| "\\"+x} code=code.join @reduce_with_defns+=1 if name=@reduce_with_cache[code] @reduce_with_aliases+=1 "alias :'reduce_with_tos_#{ident}' :'#{name}'\n" else @reduce_with_cache[code]=name="reduce_with_tos_#{ident}" ["define_method('", name ,"') do\n", code ,"\nnil\nend\n"] end end
# File lib/redparse/compile.rb, line 1280 def compile oldparser=Thread.current[:$RedParse_parser] Thread.current[:$RedParse_parser]||=self if File.exist?("cached_parse_tables.drb") dup=Marshal.load(f=open("cached_parse_tables.drb","rb")) instance_variables.each{|var| remove_instance_variable var } extend SingleForwardable def_singleton_delegators(dup,public_methods+private_methods+protected_methods) self.inputs=enumerate_exemplars else @generating_parse_tables=true @inputs||=enumerate_exemplars states=all_states # @rules=expanded_RULES false && \ begin @inputs=nil #Marshal no like it p :dumping Marshal.dump(self,f=open("cached_parse_tables.drb","wb")) p :dump_done! rescue Exception p :dump_failed File.unlink "cached_parse_tables.drb" ensure @inputs=enumerate_exemplars end end f.close if f #look for unused dotted rules and actions #also states with drs past the end past_end=0 drs=all_dotted_rules dr_count=Hash.new(0) acts=all_rules#.map{|r| r.action }.uniq act_count=Hash.new(0) states.each{|state| state.dotteds.each{|dr| dr_count[dr]+=1 past_end+=1 if dr.pos>=dr.rule.patterns.size } sav=state.actions.values sav.grep(Class|StackMonkey).each{|act| act_count[act.__id__]+=1 } sav.grep(MultiReduce|MultiShift).each{|multi| multi.actions.each{|act| act_count[act.__id__]+=1} } #p state.name if state.dotteds.select{|dr| dr.rule.action==BeginNode} } puts "#{past_end} dotted rules found past the end of their rule" if past_end>0 nevers=0 drs.each{|dr| next unless dr_count[dr].zero? puts "never reached #{dr.name}" nevers+=1 } puts "#{nevers} dotted rules were never reached (out of #{drs.size})" nevers=0 acts.each{|act| next unless act_count[act.__id__].zero? puts "never reached #{act.name rescue act}" nevers+=1 } puts "#{nevers} actions were never reached (out of #{acts.size})" p :most_popular_nontrivial_drs pp dr_count.reject{|(dr,n)| dr.pos.zero? or dr.pos==1 && dr.rule.lookback?} \ .sort_by{|(dr,n)| n}[-15..-1].map{|(dr,n)| [dr.name,n] } #look for duplicate states actions2state={} dup_states=0 states.each{|st| cache=actions2state[st.actions] if cache st.equivalent_to=cache dup_states+=1 else actions2state[st.actions]=st end } puts "#{dup_states} duplicate states" if dup_states.nonzero? name2count={} states.each{|state| state.rename(name2count) } #divide each state's actions into sr and goto tables #also scan states for the most common sr and goto actions and make them default states.each{|state| state.make_sr_goto_tables @inputs} # pp states # pp states.size generate_c $stdout return self ensure remove_instance_variable :@generating_parse_tables rescue nil Thread.current[:$RedParse_parser]=oldparser end
# File lib/miniredparse.rb, line 118 def delete_monkey(index,name) DeleteMonkey.new(index,name) end
# File lib/miniredparse.rb, line 707 def dont_postpone_semi @dps||=~wants_semi_context end
# File lib/redparse/compile.rb, line 1134 def enumerate_exemplars return @@exemplars if defined? @@exemplars #dunno why this is necessary result= STACKABLE_CLASSES() \ .map{|sc| sc.enumerate_exemplars } \ .inject{|sum,sc| sum+sc} result.map!{|sc| res=sc.shift.allocate until sc.empty? eval "def res.#{sc.shift}; #{sc.shift.inspect} end" end def res.to_s; identity_name end res } return @@exemplars=result end
# File lib/redparse/compile.rb, line 1173 def enumerate_states inputs=check_for_parsealike_inputs inputs.reject!{|x| StartToken===x} result=[] todo=[start_state] seenlist = {} seenlist.default=:dunno_yet j=0 start=was=Time.now in_result={} #this should go away; obsoleted by @states state_num=-1 todo.each{|st| in_result[st]=(state_num+=1) } ps=todo.first pp [-in_result[ps], *ps.dotteds.map{|dr| dr.name }] old_todo_size=todo.size while state=todo.shift result<<state i=0 inputs.each {|input| newstate=state.evolve input,self,seenlist assert ACTION_PATTERN===newstate #newstate is ParserState|MultiShift|MultiReduce|Rule|:accept|:error state[input.identity_name]=newstate next unless newstate.respond_to? :substates #newstate.substates is just [newstate] for plain ParserStates morestates=newstate.substates.reject{|x| in_result[x]} morestates.each{|st| in_result[st]=(state_num+=1) } # p [in_result[state],:+,input.identity_name,:>>,pretty(newstate,in_result)] todo.concat morestates # pp morestates.map{|ps| # [-in_result[ps], *ps.dotteds.map{|dr| dr.name }] # } # pp pretty(newstate,in_result) unless ParserState===newstate } now=Time.now p [:*,j+=1,todo.size,todo.size-old_todo_size,now-was,j/(now-start),(100.0*j/(j+todo.size)).to_i] old_todo_size=todo.size was=now # if state.actions.values.uniq==[:error] #this can happen when the only dotted rule is for an :error #maybe this case can be optimized? # end end self.rmd_cache=nil self.oc_cache=nil self.sl2ms_cache=nil return result end
4.5 Error Recovery yacc's error recovery mechanism is rather idiosyncratic. In fact, examining two books, [LMB92] and [ASU86], and the output generated by yacc yields three dierent descriptions of the recovery mechanism. We have tried to be faithful to the output of yacc. Fortunately, the mechanism has few consequences to the generation of the rest of the hard- coded parser. The only change to the parser is the maintenance of the variable, yyerrorstatus. Although relatively short, the code below is very subtle, like the explanation of yacc's error recovery mechanism. The code is given only for completeness.
# File lib/redparse/generate.rb, line 323 def error_handler %[ error_handler: if (yyerrorstatus > 2){ yyerror("syntax error"); } user_error_handler: if (yyerrorstatus == 0){ huh if (la_identity == 0) YYABORT;// End of input. la_identity = yylex(&la_token); switch (OLDSTACK){ #{@states.map{|state| i=state.small_int "case #{i}: goto state_action_#{str2cname state.name};\n" } } }else{ yyerrorstatus = 0; while (stack != stack_start){ switch (OLDSTACK){ case N: goto state_M;// iff M = goto[N,error]. . . . } stack--; } YYABORT;// Empty stack. } ] end
# File lib/miniredparse.rb, line 120 def evaluate rule #dissect the rule if false rule=rule.dup lookahead_processor=(rule.pop if Proc===rule.last) node_type=rule.pop else Reg::Transform===rule or fail node_type= rule.right rule=rule.left.subregs.dup lookahead_processor=(rule.pop if Proc|::Reg::LookAhead===rule.last) lookback=rule[0]=rule[0].subregs[0] if ::Reg::LookBack===rule[0] end #index of data at which to start matching i=@stack.size-1 #-1 because last element of @stack is always lookahead #I could call this a JIT compiler, but that's a bit grandiose.... #more of a JIT pre-processor compiled_rule=@compiled_rules[rule]||= rule.map{|pattern| String|Regexp===pattern ? KW(pattern) : pattern } #what's the minimum @stack size this rule could match? rule_min_size=@min_sizes[compiled_rule]||= compiled_rule.inject(0){|sum,pattern| sum + pattern.itemrange.begin } i>=rule_min_size or return false matching=[] #actually try to match rule elements against each @stack element in turn compiled_rule.reverse_each{|matcher| i.zero? and fail target=matching #is this matcher optional? looping? loop= matcher.itemrange.last.to_f.infinite? minimum=matcher.itemrange.first optional=minimum.zero? matching.unshift target=[] if loop if loop or optional matcher=matcher.subregs[0] end begin if matcher===@stack[i-=1] #try match target.unshift @stack[i] else #if match failed, the whole rule fails #unless this match was optional, in which case, ignore it #or was looping and met its minimum #but bump the data position back up, since the latest datum #didn't actually match anything. return false unless optional or loop&&target.size>=minimum i+=1 matching.unshift nil unless loop break end end while loop } matchrange= i...-1 #what elems in @stack were matched? #give lookahead matcher (if any) a chance to fail the match case lookahead_processor when ::Reg::LookAhead return false unless lookahead_processor.subregs[0]===@stack.last when Proc return false unless lookahead_processor[self,@stack.last] end #if there was a lookback item, don't include it in the new node if lookback matchrange= i+1...-1 #what elems in @stack were matched? matching.shift end #replace matching elements in @stack with node type found case node_type when Class node=node_type.create(*matching) node.startline||=@stack[matchrange.first].startline node.endline=@endline @stack[matchrange]=[node] when Proc,StackMonkey; node_type[@stack] when :shift; return 0 when :accept,:error; throw :ParserDone else fail end return true #let caller know we found a match rescue Exception=>e #puts "error (#{e}) while executing rule: #{rule.inspect}" #puts e.backtrace.join("\n") raise end
# File lib/redparse/compile.rb, line 1263 def exemplars_that_match p @inputs.grep p end
inline any subsequences in RULES
right into the patterns reg should do this already, but current release does not
# File lib/miniredparse.rb, line 370 def expanded_RULES result=RULES() return result if (-[:foo, -[:bar]]).subregs.grep(Reg::Subseq).empty? result.map!{|rule| unless rule.left.subregs.grep(Reg::Subseq) then rule else right=rule.right rule=rule.left.subregs.dup (rule.size-1).downto(0){|i| if Reg::Subseq===rule[i] rule[i,1]=rule[i].subregs end } -rule>>right end } end
The case arms of the switch statement are taken directly from the goto table that was computed by the LALR(1) grammar analysis. Because this switch cannot fail, no default entry is needed. However, making the most common case arm the default is a trivial time and space optimization.
# File lib/redparse/generate.rb, line 299 def generate_c output output<< init_code output<< state_utils (0...RULES().size).each_with_index{|i,m| output<< (reduce i,m) } node_types.each{|nt| output<< (nonterminal nt) } map_with_index(all_states){|st,i| output<< (state st,i) } #output<< error_handler #disabled, i have rules for error recovery output<< "}" end
# File lib/miniredparse.rb, line 1132 def get_token(recursing=false) unless @moretokens.empty? @last_token=@moretokens.shift p @last_token if ENV['PRINT_TOKENS'] && @print_filter[@last_token] and not recursing return @last_token end rpt=ENV['RAW_PRINT_TOKENS'] begin result=@lexer.get1token or break p result if rpt and @print_filter[result] #set token's line result.startline= @endline||=1 #result.endline||=@endline if result.respond_to? :endline= if result.respond_to?(:as) and as=result.as #result=make_kw(as,result.offset) #result.originally=result.ident if OperatorToken===result #or KeywordToken===result result=result.dup result.ident=as else result2=make_kw(as,result.offset) result2.startline=result.startline result2.endline=result.endline result=result2 end result.not_real! if result.respond_to? :not_real! else case result when FileAndLineToken #so __FILE__ and __LINE__ can know what their values are @file=result.file @endline=result.line redo when OperatorToken if @unary_or_binary_op===result.ident and result.unary || result.tag==:unary result=result.dup result.ident+="@" end #more symbol table maintenance.... when KeywordToken case name=result.ident when /^(#{BINOP_KEYWORDS.join '|'})$/o #should be like this in rubylexer unless result.has_end? orig=result result=OperatorToken.new(name,result.offset) result.endline=orig.endline end when "|"; orig=result result=GoalPostToken.new(result.offset) #is this needed still? (yes) result.endline=orig.endline when "__FILE__"; #I wish rubylexer would handle this #class<<result; attr_accessor :value; end assert result.value==@file.dup when "__LINE__"; #I wish rubylexer would handle this #class<<result; attr_accessor :value; end assert result.value==@endline else result=make_kw name,result.offset if defined? SPECIALIZED_KEYWORDS #warning, this may discard information stored in instance vars of result end when StringToken,HerePlaceholderToken @endline=result.endline when EoiToken; break when HereBodyToken; @endline=result.endline break when AssignmentRhsListStartToken; break when AssignmentRhsListEndToken; break when IgnoreToken; redo end end end while false p result if ENV['PRINT_TOKENS'] && @print_filter[@last_token] unless recursing #ugly weak assertion assert result.endline==@endline unless result.ident==';' && result.endline-1==@endline or EoiToken===result return @last_token=result end
# File lib/redparse/compile.rb, line 1275 def identity_name_alias? name alias_=@identity_name_aliases[name] return( alias_||name ) end
3 LR-Parsing Mechanics We briefly explain the fundamentals of shift-reduce parsing (which represents the LR(1) family) without going into any more detail than necessary for subsequent exposition. LALR(1) parsers like yacc simulate, either directly or indirectly, a very simple automaton with a stack of automaton states [FL88]. (Parsers generated by yacc also maintain a semantic stack, but since that stack grows in parallel with the state stack, we only describe the use of the state stack here.) Simulating the automaton requires two mechanisms: one for determining the action, which is determined by the current input symbol and the state on the top of the stack, and one for determining state transitions based on the current top of stack and a grammar symbol. At parser-generation time LALR(1) grammar analysis builds these tables, called action and goto, respectively. (The analysis is necessary regardless of whether a table-driven or hard-coded parser is desired.) Functionally, these tables have the following signatures.
goto: state x symbol -> state action: state x token -> {shift,reduce_y,accept,error}
There are only four possible actions: reduce, shift, accept, and error. Reduce actions are parameterized by the grammar production being reduced. Actions are described below. let TOS be the state on the top of the stack, and let la_identity be the current lookahead token.
shift A shift pushes goto onto the stack, and updates la_identity by advancing the lexical analyzer.
reduce_y A reduction processes production Y : X -> x_1…x_n, which requires popping n states off the stack, followed by pushing goto[TOS, X]. (The semantic action of the parser relating to this production would be executed prior to popping states off the stack.)
accept An accept signals a successful parse.
error An error requires error reporting and/or recovery.
4 Simple Implementation mule creates a single parsing routine, yyparse(), that simulates the LALR(1) parser directly in ANSI C, without interpreting any tables. The routine has five simple parts: initialization, automata states, reduction actions, nonterminal transitions, and error recovery. Although very similar to the inverted table structure in [Pfa90], this structure avoids the duplication of semantic action routines. Another diverence is the yacc-compatible error recovery. The structure is simple, with all code being generated from a tiny set of small, well-defined templates that directly mirror the grammar or LALR(1) automaton. Since both the state stack and the semantic stack grow in unison, we wrap the stack entries into a single structure, StackType.
4.1 Initialization The initalization phase simply sets up bookkeeping and data structures for subsequent automata simulation. It is grammar-independent.
# File lib/redparse/generate.rb, line 75 def init_code " #define YYABORT do { \\ free(start_stack);return -1; \\ } while(0) #define YYACCEPT do { \\ YYSTYPE result=SEMANTIC_STACK; \\ free(start_stack); \\ return result; \\ } while(0) /*#define yyclearin_token = yylex(&la_token)*/ #define yyerrok yyerrorstatus = 3 #define YYERROR goto user_error_handler #define YYRECOVERING() (yyerrorstatus <= 2) typedef VALUE YYSTYPE; #if 0 typedef struct stackType{ int state;// State stack element. } StackType; typedef struct { VALUE semantic; } SemanticStackType; #else typedef int StackType; typedef VALUE SemanticStackType; #end int yyparse(void){ YYSTYPE la_token;// Semantic value computed by yylex(). int la_identity; unsigned yyerrorstatus = 3;// Initialize error-recovery counter. YYSTYPE yyredval;// Variable holds semantic value of$$. VALUE semantic_stack; /*Array of Node|Token*/ // SemanticStackType *semantic_stack_start; StackType *stack_start;// Stack. unsigned i=0; unsigned stack_size=64; stack_start=realloc(NULL,sizeof(StackType)*stack_size); if (stack_start==NULL) MALLOC_ERROR(); semantic_stack=rb_ary_new(); // semantic_stack_start=realloc(NULL,sizeof(SemanticStackType)*stack_size); // if (semantic_stack_start==NULL) MALLOC_ERROR(); la_identity = yylex(&la_token); /* Get 1st token.*/ goto shift_state_#{str2cname all_states.first.name};/* Start state.*/ " end
# File lib/redparse/compile.rb, line 1417 def initial_state @states={} all_initial_dotted_rules #is this still needed? result=new_state all_rules.map{|r| DottedRule.create(r,0,self)} result.name="initial" #result.perhaps_also_allow all_rules,self #silly here result end
this is a hack, should use graphcopy to search for Deferreds and replace with double-Deferred as below
# File lib/miniredparse.rb, line 689 def item_that(*a,&b) if defined? @generating_parse_tables huh unless b #double supers, one of them in a block executed after this method returns.... #man that's weird super(*a){|ob| @saw_item_that[[super(*a,&b),ob]]=true} else super(*a,&b) #and then here's another end end
# File lib/miniredparse.rb, line 668 def left_op_higher(op,op2) KeywordToken===op2 or OperatorToken===op2 or return true rightprec=@precedence[op2.to_s] or return true rightprec+=EPSILON if @RIGHT_ASSOCIATIVE[op2.to_s] return @precedence[op.to_s]>=rightprec end
# File lib/miniredparse.rb, line 680 def lower_op return @lower_op if defined? @lower_op lower_op=item_that{|op| left_op_higher(@stack[-3],op) } lower_op=(LOWEST_OP|(~VALUELIKE_LA() & lower_op)).la lower_op.extend LowerOp_inspect @lower_op=lower_op end
# File lib/miniredparse.rb, line 601 def make_specialized_kw(name,offset) name=Punc2name[name] unless /^((?!#{LETTER_DIGIT}).)+$/o===name KW2class[name].new(offset) end
# File lib/redparse/compile.rb, line 203 def map_with_index(list) result=[] list.each_with_index{|elem,i| result<<yield(elem,i)} result end
HIER=Class::FlattenedHierarchy.new *STACKABLE_CLASSES
# File lib/miniredparse.rb, line 360 def new_disabled_reduce #@hier||=Class::FlattenedHierarchy.new *STACKABLE_CLASSES() @reducer||=Reducer.new(@rules) @reducer.reduce(@stack) end
def start_state
goal=ultimate_goal_nodes result=all_rules.select{|rule| rt=rule.reduces_to and !goal.select{|node| node>=rt}.empty? } result.map!{|rule| DottedRule.create(rule,0,parser)} result=ParserState.new result result.name="start_state" result
end
# File lib/redparse/compile.rb, line 1408 def new_state(drs,unruly_also=false) result=ParserState.new drs,@states.size result.perhaps_also_allow all_rules,self cache=@states[result] return cache if cache @states[result]=@states.size return result end
User actions are associated with reductions, and the code corresponding to a given production is expanded in-place. After the user code, the symbols associated with right-hand side of the production are popped, followed by copying $$ onto the semantic stack. Finally, there is a jump to the code that will compute the appropriate state given the left-hand side symbol of this production.
4.4 Nonterminal Transitions For each nonterminal, code is produced to compute (and jump to) the appropriate state given the current state. This simple switch statement is given below.
# File lib/redparse/generate.rb, line 277 def nonterminal(j) " nonterminal_#{str2cname j.name}: /*nonterminal_#{j.small_int}:*/ switch (OLDSTACK){ // Top of stack. #{ all_states.map_with_index do|state,k| %[ case #{k}: goto state_#{str2cname state.goto[j].name};\n] end } } " rescue Exception=>e backtrace.unshift("exception in node(nonterminal) #{j.name} #{e.class}:#{e}").join("\n") end
# File lib/redparse.rb, line 953 def old_reduce_loop catch(:ParserDone){ loop { #try all possible reductions next if reduce==true #no rule can match current @stack, get another token tok=get_token or break #are we done yet? #tok.nil? or EoiToken===tok && EoiToken===@stack.last and break #shift our token onto the @stack @stack.push tok }} end
try all possible reductions
# File lib/redparse.rb, line 673 def old_slow_reduce shift=nil @rules.reverse_each{|rule| shift=evaluate(rule) and break } return shift end
# File lib/miniredparse.rb, line 260 def parse #hack, so StringToken can know what parser its called from #so it can use it to parse inclusions oldparser=Thread.current[:$RedParse_parser] Thread.current[:$RedParse_parser]||=self return @cached_result if defined? @cached_result @rules||=expanded_RULES() # @inputs||=enumerate_exemplars @stack=[StartToken.new, get_token] #last token on @stack is always implicitly the lookahead catch(:ParserDone){ loop { #try all possible reductions next if reduce==true #no rule can match current @stack, get another token tok=get_token or break #are we done yet? #tok.nil? or EoiToken===tok && EoiToken===@stack.last and break #shift our token onto the @stack @stack.push tok }} @stack.size==2 and return result=NopNode.new #handle empty parse string #unless the @stack is 3 tokens, #with the last an Eoi, and first a StartToken #there was a parse error unless @stack.size==3 pp @stack[-[15,@stack.size].min..-1] if ENV['PRINT_STACK'] top=MisparsedNode.new("(toplevel)", @stack[1...-1],'') raise ParseError.new(top.msg,@stack) end EoiToken===@stack.last or fail StartToken===@stack.first or fail result= @stack[1] #multiple assignment must be resolved #afterwards by walking the parse tree. #(because the relative precedences of = and , #are reversed in multiple assignment.) # result.respond_to? :fixup_multiple_assignments! and # result=result.fixup_multiple_assignments! #relative precedence of = and rescue are also inverted sometimes # result.respond_to? :fixup_rescue_assignments! and # result=result.fixup_rescue_assignments! #do something with error nodes msgs=[] result.walk{|parent,i,subi,node| if node.respond_to? :error? and node.error?(@rubyversion) msgs<< @filename+":"+node.blame.msg false else true end } if result.respond_to? :walk #hack hack result.errors=msgs unless msgs.empty? #other types of errors (lexer errors, exceptions in lexer or parser actions) #should be handled in the same way, but currently are not # puts msgs.join("\n") rescue Exception=>e input=@lexer if Array===input STDERR.puts "error while parsing:" STDERR.write input.pretty_inspect input=nil else input=input.original_file inputname=@lexer.filename STDERR.puts "error while parsing #@filename:#@endline: <<< #{input if input.to_s.size<=1000} >>>" end e.backtrace.each{|l| p l } raise else unless msgs.empty? pp @stack[-[15,@stack.size].min..-1] if ENV['PRINT_STACK'] raise RedParse::ParseError.new(msgs.join("\n"),@stack) end # result=NopNode.new if EoiToken===result return result ensure @write_cache.put(@input,result) if @write_cache and result and !result.errors @stack=nil Thread.current[:$RedParse_parser]=oldparser end
# File lib/redparse.rb, line 768 def parser_identity #what is the relationship between this method and #signature? #can the two be combined? result=class<<self; ancestors end.reject{|k| !k.name} result.reject!{|k| !!((::RedParse<k)..false) } result.reject!{|k| k.name[/^(?:RedParse::)?ReduceWiths/] } result.reverse! result.push @rubyversion #@rubyversion in identity is a hack; should have RedParse1_9 module instead end
# File lib/redparse/compile.rb, line 1267 def pattern_matches_nodes? p !@inputs.grep(Node&p).empty? end
# File lib/redparse/compile.rb, line 1271 def pattern_matches_tokens? p !@inputs.grep(Token&p).empty? end
# File lib/redparse/compile.rb, line 1229 def pretty(x,in_result) case x when ParserState; in_result[x] when MultiReduce pairs=x.list.dup result=[] until pairs.empty? cond,act,*pairs=*pairs cond = cond.inspect result<<[cond,pretty(act.action,in_result)] end result<<pretty(x.default,in_result) result.unshift :MultiReduce when MultiShift h={} mods=x.modifiers its=[] (0...mods.size).step(2){|i| its<<mods[i] } x.map.each_with_index{|xx,i| h[i]=pretty(xx) } [:MultiShift, its,h] when Class; x.name when StackMonkey; x.name when :accept,:error; x else fail "not a valid action: #{x}" end end
# File lib/redparse.rb, line 59 def pretty_stack max=nil target=@stack target=target[-max..-1] if max and max<target.size target.map{|n| res=n.inspect res<<"\n" unless res[-1]=="\n" res } end
# File lib/redparse.rb, line 1904 def redparse_modules_init end
try all possible reductions
# File lib/miniredparse.rb, line 252 def reduce shift=nil @rules.reverse_each{|rule| shift=evaluate(rule) and break } return shift end
reduce_call= HASHED_REDUCER
?
'send(@stack[-2].reducer_method(@stack))' : 'reduce'
eval <<-END,__FILE__,__LINE__
def reduce_loop catch(:ParserDone){ ( @stack.push(get_token||break) unless(#{reduce_call}==true) ) while true } end
END
# File lib/redparse.rb, line 979 def reduce_loop catch(:ParserDone){ while true #try all possible reductions #was: next if reduce==true next if send(@stack[-2].reducer_method(@stack))==true #no rule can match current @stack, get another token tok=get_token or break #are we done yet? #tok.nil? or EoiToken===tok && EoiToken===@stack.last and break #shift our token onto the @stack @stack.push tok end } end
# File lib/redparse.rb, line 1908 def reduce_withs_directory "redparse" end
# File lib/redparse.rb, line 480 def ref_to obj,i,j assert j<=0x3FF if Module===obj and obj.name return obj.name elsif ref=@@ref_to_cache_by_id[obj.__id__] || @@ref_to_cache[(i<<10)+j] return ref else @@ref_to_rules||= rules.map{|rule| rule.left.subregs.map{|pat| case pat when String,Regexp #not needed anymore...? RedParse::KW(pat) when Reg::LookBack,Reg::LookAhead,Reg::Repeat #Reg::Repeat should be handled already by now pat.subregs[0] #subseqs handled already else pat end }<<rule.right } @ref_to_code||=[] name="@@ref_#{@@ref_to_idx+=1}" #eval "#{name}=obj" unless @@ref_to_rules[i][j]==obj warn "ref_to mismatch" end @ref_to_code<<"#{name}=rules[#{i}][#{j}]" @ref_to_code<<"warn_unless_equal #@@ref_to_idx,mui(#{name}),#{squote mui( obj )}" @@ref_to_cache[(i<<10)+j]=name @@ref_to_cache_by_id[obj.__id__]=name end end
The state number is stored in the stack, followed by possibly invoking the lexical analyzer. The three optional lines store the semantic value of the current token, advance the lexical analyzer, and do error-recovery bookkeeping. Incrementing the stack pointer completes the push. The case arms of the switch are determined by the action table computed by the LALR(1) analysis; for each condition met in the comments, a case arm must be generated. Default actions were developed for compressing table-driven parsers, and can be similarly employed here for generating the switchs default [FL88].
4.3 Reduction Actions One piece of code is generated for each production. Its template is given below.
# File lib/redparse/generate.rb, line 219 def repl(rule,m) repl=rule.replacement case repl when :shift,:accept #do nothing? when Class %[static VALUE repl_#{rule.name}=rb_const_lookup(rb_const_lookup(kNIL,"RedParse"),"#{repl.name});\n] when StackMonkey huh else huh end end
# File lib/redparse.rb, line 745 def rubykeywordlist; RUBYKEYWORDLIST end
# File lib/redparse.rb, line 744 def rubyoperatorlist; OPERATORS end
def juice(m)
case m # when Class return [m] unless @subclasses_of result=[m] # and subclasses too i=0 while item=result[i] p item result.concat @subclasses_of[item] rescue nil i += 1 end result when String,Regexp; juice(RedParse.KW(m)) when Reg::And; m.subregs.map{|x| juice(x).flatten.compact}.inject{|sum,rr| sum&rr} when Reg::Or; m.subregs.map &method(:juice) when Reg::Not m=m.subregs[0] if Class===m or (Reg::Or===m and m.subregs.find{|x| Class===x }) juice(m) else [] end else [] end
end
# File lib/redparse/compile.rb, line 1109 def sc_juice(m) case m # when Class; [m] when String,Regexp; [KeywordToken] when Reg::And; m.subregs.map{|x| sc_juice(x)}.compact.map{|x| x.flatten.compact}.inject{|sum,rr| sum&rr } when Reg::Or; m.subregs.map(&method(:sc_juice)) when Reg::Not; sc_juice(m.subregs[0]) when Reg::LookAhead, Reg::LookBack; sc_juice(m.subregs[0]) when Reg::Repeat; sc_juice(m.subregs[0]) else [] end end
# File lib/miniredparse.rb, line 1060 def signature RedParse.signature(class<<self; ancestors end) end
# File lib/miniredparse.rb, line 116 def stack_monkey(*args,&block) StackMonkey.new(*args,&block) end
# File lib/redparse/compile.rb, line 1428 def start_state seenlist = {} seenlist.default=:dunno_yet result=initial_state.evolve StartToken.new, self,seenlist result.perhaps_also_allow all_rules,self result.name="start" result #pp [:initial_seenlist, seenlist] #ensure p :/ end
4.2 Hard-coded States For each automata state, mule creates code responsible for simulating the action of that state based on the current input token. All transitions into a given state are labeled with the same grammar symbol. States labeled with a token are called shift states and they require extra code to advance the lexical analyzer. The template of this code for state N is
# File lib/redparse/generate.rb, line 180 def state(state_n,n) #n=state_n.small_int name=state_n.name " shift_state_#{name}: GET_TOKEN(); /*modifies token, la_token*/ state_#{name}: /*state_#{n}:*/ STACK = #{n}; RESERVE_STACK_SLOT(); state_action_#{name}: /* Error-recovery entry point.*/ /*state_action_#{n}:*/ switch (la_identity){ #{state_n.actions.map do |tok,action| %[ case #{str2cname(tok)}: #{action2c action}] end.join(%[\n]) } default: #{action2c state_n.actions.default} } " rescue Exception=>e backtrace.unshift("exception in state #{name} #{e.class}:#{e}").join("\n") end
# File lib/redparse/generate.rb, line 128 def state_utils " #define MALLOC_ERROR() huh #define RESERVE_STACK_SLOT() \\ if (++i >= stack_size){ \\ unsigned new_stack_size=stack_size*2; \\ stack_start=realloc(stack_start,sizeof(StackType)*new_stack_size); \\ if (stack_start==NULL) MALLOC_ERROR(); \\ //semantic_stack_start=realloc(semantic_stack_start,sizeof(SemanticStackType)*new_stack_size); \\ //if (semantic_stack_start==NULL) MALLOC_ERROR(); \\ stack_size=new_stack_size; \\ } #define GET_TOKEN() \\ do { \\ SEMANTIC_STACK_SET(la_token); /*Put lexical semantic entry on stack.*/ \\ la_identity = yylex(&la_token); /* Advance lexical analysis.*/ \\ yyerrorstatus++; /* Update error-recovery counter.*/ \\ } while(0) #define STACK stack_start[i] #define SEMANTIC_STACK rb_ary_get(semantic_stack,rb_int2fixnum(i)) #define SEMANTIC_STACK_SET(x) rb_ary_set(semantic_stack,rb_int2fixnum(i),x) #define OLDSTACK stack_start[i-1] " end
# File lib/redparse/generate.rb, line 382 def str2cname(str) RedParse.str2cname(str) end
irb friendly inspect
/#to_s
# File lib/miniredparse.rb, line 49 def to_s mods=class<<self;self end.ancestors-self.class.ancestors mods=mods.map{|mod| mod.name }.join('+') mods="+"<<mods unless mods.empty? "#<#{self.class.name}#{mods}: [#{@input.inspect}]>" end
# File lib/redparse/compile.rb, line 1381 def ultimate_goal_nodes result=[] all_rules.each{|rule| if rule.patterns.size==0 and rule.patterns.first==StartToken and rule.patterns.last==EoiToken result << juice(rule.patterns[1]) end } result.flatten! return result end
# File lib/redparse/compile.rb, line 1452 def undumpables return @undumpables if @undumpables @rules||=expanded_RULES n=-1 @undumpables={} abortable_graphwalk(@rules){|cntr,o,i,ty| !case o when StackMonkey @undumpables[o.name]=o when Reg::Deferred @undumpables[n+=1]=o class<<o attr_accessor :undump_key end o.undump_key=n end } end
# File lib/miniredparse.rb, line 1225 def unget_token(token) @moretokens.unshift token end
# File lib/miniredparse.rb, line 1221 def unget_tokens(*tokens) @moretokens=tokens.concat @moretokens end
# File lib/redparse/compile.rb, line 1122 def unruly_rules return @unruly_rules if defined? @unruly_rules @unruly_rules= all_rules.select{|rule| rule.unruly? } p :unruly_rules pp @unruly_rules.map{|r| r.name} @unruly_rules end
# File lib/miniredparse.rb, line 398 def vertices; self.class.constants.grep(Node|Token) end
# File lib/miniredparse.rb, line 704 def wants_semi_context Op(/^(<<|=>|\.|::)$/)|KW(/^(#{WANTS_SEMI.map{|ws| Regexp.quote ws }.join('|')})$/) end
# File lib/redparse.rb, line 795 def write_reduce_withs path=nil return unless HASHED_REDUCER start=Time.now @size_cache={} identity=parser_identity @reduce_with_cache={} @reduce_with_aliases=0 @empty_reduce_withs=@reduce_with_defns=0 expanded_RULES() shortnames=[] #[[],[]] list=[self.class,*addl_node_containers].map{|mod| mod.constants.select{|k| /(?:Node|Token)$/===k.to_s }.map{|k| mod.const_get k } }.flatten.grep(Class).uniq #list=STACKABLE_CLASSES() list -= [KeywordToken,ImplicitParamListStartToken,ImplicitParamListEndToken, Token,WToken,NewlineToken,DecoratorToken,Node,ValueNode] list.reject!{|x| IgnoreToken>=x and not /(^|:)AssignmentRhs/===x.name} exprclasses,list=list.partition{|k| k<=ValueNode } fail unless list.include? StartToken indexcode=list.map{|klass| shortname=klass.to_s[/[^:]+$/] warn "empty reducer_ident for ::#{klass}" if shortname.empty? <<-END class ::#{klass} def reducer_method(stack) :reduce_with_tos_#{shortname} end if instance_methods(false).&(["reducer_method",:reducer_method]).empty? def reducer_ident :#{shortname} end if instance_methods(false).&(["reducer_ident",:reducer_ident]).empty? end END }.concat(exprclasses.map{|exprclass| shec=exprclass.name[/[^:]+$/] warn "empty reducer_ident for ::#{exprclass}" if shec.empty? <<-END class ::#{exprclass} def reducer_ident :#{shec} end if instance_methods(false).&(["reducer_ident",:reducer_ident]).empty? end END }) ruby=["#Copyright (C) #{Time.now.year} #{ENV['COPYRIGHT_OWNER']||'Caleb Clausen'}\n"+ "#Generated with ruby v#{RUBY_VERSION}\n" ].concat list.map{|klass| shortname=klass.to_s[/[^:]+$/] shortnames<<[shortname,klass,nil] code_for_reduce_with( shortname, [klass] ) }.concat(rubykeywordlist.map{|kw| shortname="KeywordToken_#{kw}" shortnames<<[shortname,KeywordToken,kw] code_for_reduce_with( shortname, [KeywordToken, kw] ) }).concat({ImplicitParamListStartToken=>'(',ImplicitParamListEndToken=>')'}.map{|(k,v)| shortnames<<[k.name,k,v] code_for_reduce_with k.name, [k,v] }) shortnames.delete ["OperatorToken",OperatorToken,nil] record=shortnames.dup ruby.concat(exprclasses.map{|exprclass| shec=exprclass.name[/[^:]+$/] shortnames.map{|(sn,snclass,snparam)| warn "empty shortname for #{snclass}" if sn.empty? record<<["#{sn}_then_#{shec}", exprclass, nil, snclass, snparam] code_for_reduce_with "#{sn}_then_#{shec}", [exprclass, nil, snclass, snparam] } }) ruby.concat(exprclasses.map{|exprclass| shec=exprclass.name[/[^:]+$/] rubyoperatorlist.map{|op| record<<["OperatorToken_#{op}_then_#{shec}", exprclass, nil, OperatorToken, op] code_for_reduce_with "OperatorToken_#{op}_then_#{shec}", [exprclass, nil, OperatorToken, op] } }).concat([LiteralNode,VarNode].map{|k| shec=k.name[/[^:]+$/] record<<["#{shec}_then_#{shec}", k, nil, k, nil] code_for_reduce_with "#{shec}_then_#{shec}", [k, nil, k, nil] }) modname="ReduceWithsFor_#{parser_identity.join('_').tr(':.','_')}" size_cache,rule_popularity=@size_cache.partition{|((i,action),size)| Integer===i } ruby.unshift [<<-END,@ref_to_code.join("\n"),<<-END2] #number of coalescences: #{size_cache.size} #empty coalescences: #@empty_reduce_withs #duplicate coalescences: #@reduce_with_aliases #nonduplicate coalescences: #{@reduce_with_cache.size} #reduce_with_defns: #@reduce_with_defns extend RedParse::ReduceWithUtils def self.redparse_modules_init(parser) return if defined? @@ref_0 rules=parser.rules.map{|rule| rule.left.subregs.map{|pat| case pat when String,Regexp #not needed anymore...? RedParse::KW(pat) when Reg::LookBack,Reg::LookAhead,Reg::Repeat #Reg::Repeat should be handled already by now pat.subregs[0] #subseqs handled already else pat end }<<rule.right } END end def redparse_modules_init ::RedParse::#{modname}.redparse_modules_init(self) super end END2 ruby.unshift( "#15 largest coalescences:\n", *size_cache.sort_by{|(k,size)| size}[-15..-1].map{ \ |(k,size)| "##{k.inspect}=#{size}\n" }) ruby.unshift("#10 most popular rules:\n", *rule_popularity.sort_by{|(rule,pop)| pop}[-10..-1].map{ \ |((i,action),pop)| "##{i} #{action.inspect}=#{pop}\n" }) warn "15 largest coalescences:" size_cache.sort_by{|(klass,size)| size}[-15..-1].each{ \ |(klass,size)| warn "#{klass.inspect}=#{size}" } warn "10 most popular rules:" rule_popularity.sort_by{|(rule,pop)| pop}[-10..-1].each{ \ |((i,action),pop)| warn "#{i} #{action.inspect}=#{pop}" } @ref_to_code=nil ruby=["module RedParse::#{modname}\n",ruby,"\nend\n",indexcode] @@rules_compile_cache[identity]=ruby path ||= $LOAD_PATH.find{|d| File.exist? File.join(d,"redparse.rb") }+"/redparse/" #should use reduce_withs_directory here somehow instead... path += modname+".rb" if path[-1]==?/ File.open(path,"wb") {|f| ruby.flatten.each{|frag| f.write frag } } #warn "actual write_reduce_withs writing took #{Time.now-start}s" warn "size of #{path}: #{File.size path}" ensure warn "write_reduce_withs took #{Time.now-start}s" if start @reduce_with_cache=nil if @reduce_with_cache @size_cache=nil if @size_cache end