import ‘MycoCharacterClasses.my’ import ‘MycoTokens.my’
MycoGrammar: Pegleromyces::Grammar {
C: MycoCharacterClasses T: MycoTokens [rules]
## # Toplevel Terminal Categorizations
rule root:
(r(declobj_expr_body[:n0]) { ast.declfile(n0, n0) })[:root]
# Declarations rule decl:
declobj
/ declstr / copen
# Expressions allowable inside object declarations rule declobj_expr:
category
/ declobj_expr_not_category
# Expressions allowable inside object declarations that is not a category rule declobj_expr_not_category:
decl
/ cdefn / meme
# Expressions allowable inside memes rule meme_expr:
arg_expr
# Expressions allowable as function arguments rule arg_expr:
assignment
/ left_chained_operations / string_compose / symbol_compose / left_chained_piping / expr_atom
# TODO: make expr_atom not redundant with below rules # Expression atoms rule expr_atom:
decl
/ left_chained_invocations / lit_string / lit_symstr / unary_operation / paren_expr / constant / lit_simple / lit_other / lit_array / invoke
# Expression atoms that are not invocation chains rule expr_atom_not_chained:
decl
/ lit_string / lit_symstr / unary_operation / paren_expr / constant / lit_simple / lit_other / lit_array / invoke
# Expression atoms that are not strings rule expr_atom_not_string:
decl
/ left_chained_invocations / unary_operation / paren_expr / constant / lit_simple / lit_other / lit_array / invoke
## # Simple literals
rule lit_simple:
r(T.float[:t0]) { ast.numeric(t0, t0.float) }
/ r(T.integer) { ast.numeric(t0, t0.integer) } / r(T.symbol) { ast.symbol(t0, t0.text.slice(Range.new(1,-1)).to_sym) } # TODO: more succinct here
## # Other literals
lit_other:
lit_block
rule lit_block:
r(T.op_toproc[:to] + block_body[:n1]) { ast.blklit(to, n1) }
rule block_body:
r(C.spc_nl.* + param_list.-[:np] + C.spc_nl.* + meme_enclosed_expr_body[:nb]) { ast.block(nb, np, nb) }
rule open_block_body:
r(C.spc_nl.* + param_list.-[:np] + C.spc_nl.* + meme_inline_expr_body[:nb]) { ast.block(nb, np, nb) }
## # Enclosed literals
rule lit_string:
r((T.string_begin + T.string_body[:tb] + T.string_end) / (T.sstring_begin + T.sstring_body[:tb] + T.sstring_end)) { ast.symbol(tb, encode_escapes(tb.text)) }
rule lit_string_as_symbol:
r((T.string_begin + T.string_body[:tb] + T.string_end) / (T.sstring_begin + T.sstring_body[:tb] + T.sstring_end)) { ast.symbol(tb, encode_escapes(tb.text).to_sym) }
rule lit_symstr:
r(T.symstr_begin + T.string_body[:tb] + T.string_end) { ast.symbol(tb, encode_escapes(tb.text).to_sym) }
rule category_name:
r(T.catgry_begin + T.catgry_body[:tb] + T.catgry_end) { ast.symbol(tb, encode_escapes(tb.text).to_sym) }
## # String interpolations / juxtapositions
rule string_compose_part:
r(C.spc.* + expr_atom_not_string[:n0] + C.spc.* + lit_string[:n1]) { [n0,n1] }
rule string_compose_parts:
r(string_compose_part.+[:nlist]) { nlist.flatten }
rule string_compose:
r(lit_string[:n0] + string_compose_parts[:nrest]) { ast.string_compose(n0, [n0] + nrest) }
rule symbol_compose:
r(lit_symstr[:n0] + string_compose_parts[:nrest]) { ast.symbol_compose(n0, [n0] + nrest) }
## # Constants
rule colon_const:
r(T.scope + T.constant[:tc]) { tc }
rule constant:
r(T.scope.-[:ts] + T.constant[:tc] + colon_const.*[:trest]) { ast.const((ts||tc), !!ts, [tc.sym, *trest.map(&:sym)]) }
rule const_sep: (C.spc_nl.* + T.const_sep + C.spc_nl.*).+
rule sepd_constant:
r(const_sep + constant[:n0]) { n0 }
rule constant_list:
r(constant[:n0] + sepd_constant.*[:nrest]) { ast.array(n0, [n0, *nrest]) }
## # Bare identifiers
# Used in contexts where a bare identifier is a symbol rule id_as_symbol:
r(T.identifier[:t0]) { ast.symbol(t0, t0.sym) }
## # Object
declarations
rule declobj_sepd_expr:
r(arg_sep + declobj_expr[:n0]) { n0 }
rule declobj_sepd_exprs:
r(declobj_expr[:n0] + declobj_sepd_expr.*[:nrest] + arg_sep_opt) { [n0, *nrest] }
rule declobj_expr_body:
r(arg_sep_opt + declobj_sepd_exprs[:nlist] + T.declare_end[:te]) { ast.sequence(nlist.first, nlist) }
/ r(arg_sep_opt + T.declare_end)
{ ast.null(te) }
rule declobj:
r(constant_list[:n0] + C.spc_nl.* + T.declare_begin[:tb] + declobj_expr_body[:n1]) { ast.declobj(tb, n0, n1) }
rule category_expr:
declobj_expr_not_category
rule category_sepd_expr:
r(arg_sep + category_expr[:n0]) { n0 }
rule category_sepd_exprs:
r(arg_sep + category_expr[:n0] + category_sepd_expr.*[:nrest]) { [n0, *nrest] }
rule category:
r(category_name[:n0] + category_sepd_exprs.-[:nlist] + !!(arg_sep_opt + (T.catgry_begin / T.declare_end))) { ast.category(n0, n0.value, (nlist &? ast.sequence(nlist.first, nlist) ?? ast.null(n0))) }
rule copen:
r(constant[:n0] + C.spc_nl.* + T.reopen[:tb] + C.spc_nl.* + T.declare_begin + declobj_expr_body[:n1]) { ast.copen(tb, n0, n1) }
rule cdefn:
r(constant[:n0] + C.spc_nl.* + T.define[:tb] + C.spc_nl.* + declobj[:n1]) { ast.cdefn(tb, n0, n1) }
## # String object declarations
rule declstr_body:
r(T.declstr_begin[:tb] + C.spc.* + C.nl + T.declstr_body[:ts] + C.spc_nl.* + T.declstr_end) { ast.str(tb, ts.text) }
rule declstr:
r(constant_list[:nc] + C.spc.+ + declstr_body[:nb]) { ast.declstr(nc, nc, nb) }
## # Assignment
rule assignment:
local_assignment
/ invoke_assignment
rule assign_rhs:
arg_expr
rule local_assignment:
r(T.identifier[:ti] + C.spc_nl.* + T.assign[:to] + C.spc_nl.* + assign_rhs[:rhs]) { ast.lasgn(to, ti.sym, rhs) }
rule invoke_assignment_lhs:
left_chained_invocations
/ invoke
rule invoke_assignment:
r(invoke_assignment_lhs[:lhs] + C.spc_nl.* + T.assign[:to] + C.spc_nl.* + assign_rhs[:rhs]) { lhs.name = (""lhs.name"=").to_sym orig_arguments = lhs.arguments &? lhs.arguments.body ?? [] arg_order = lhs.name==:"[]=" &? [*orig_arguments, rhs] ?? [rhs, *orig_arguments] lhs.arguments = ast.args(rhs, arg_order) lhs }
## # Invoke - Results in a :lambig, :call, or :iter with a :call within
opt_arg_list: (r(C.spc.* + arg_list) { n }).- opt_block_body: (r(C.spc_nl.* + block_body) { n }).-
rule invoke:
r(T.identifier[:tn] + opt_arg_list[:na] + opt_block_body[:nb]) { ast.invoke(tn, null, tn.sym, na, nb) }
rule invoke_classic_form:
r(T.identifier[:tn] + C.spc.* + arg_list[:na]) { ast.invoke(tn, null, tn.sym, na, null) }
rule op_invoke: # Allow some binary operators to be invoked with a dot
r(op_invoke_id[:tn] + opt_arg_list[:na] + opt_block_body[:nb]) { ast.invoke(tn, null, tn.sym, na, nb) }
rule elem_invoke:
r(lit_array[:na] + opt_block_body[:nb]) { ast.invoke(na, null, :"[]", ast.args(na, na.body), nb) }
rule op_invoke_id:
left_op_normal
## # Argument lists
arg_sep: (C.spc.* + T.arg_sep + C.spc.*).+ arg_sep_opt: (C.spc / T.arg_sep).*
rule in_arg_normal:
in_arg_splat
/ r(arg_expr + !in_arg_kwarg_mark) { n0 }
rule in_arg_sepd_normal:
r(arg_sep + in_arg_normal[:n0]) { n0 }
rule in_arg_normals:
r(in_arg_normal[:n0] + in_arg_sepd_normal.*[:nrest]) { [n0,*nrest] }
rule in_arg_sepd_kwarg:
r(arg_sep + in_arg_kwarg[:n0]) { n0 }
rule in_arg_kwargs:
r(in_arg_kwarg[:n0] + in_arg_sepd_kwarg.*[:nrest]) { ast.hash(n0.first, [n0,*nrest].flatten) }
rule in_arg_kwarg_mark: C.spc_nl.* + T.meme_mark
rule in_arg_kwarg:
r(id_as_symbol[:n0] + in_arg_kwarg_mark + C.spc_nl.* + arg_expr[:n1]) { [n0, n1] }
rule in_arg_splat:
r(T.op_mult[:to] + expr_atom[:n0]) { ast.splat(to, n0) }
rule in_arg_block:
r(T.op_toproc[:to] + expr_atom[:n0]) { ast.blkarg(to, n0) }
rule in_arg_list:
r(in_arg_normals[:n0] + arg_sep + in_arg_kwargs[:n1] + arg_sep + in_arg_block[:n2]) { [*n0,n1,n2] }
/ r(in_arg_normals + arg_sep + in_arg_kwargs) { [*n0,n1] } / r(in_arg_normals + arg_sep + in_arg_block) { [*n0,n1] } / r(in_arg_kwargs + arg_sep + in_arg_block) { [n0,n1] } / r(in_arg_normals) { [*n0] } / r(in_arg_kwargs) { [n0] } / r(in_arg_block) { [n0] }
rule arg_list:
r(T.args_begin[:tb] + arg_sep_opt + in_arg_list.-[:nlist] + arg_sep_opt + T.args_end) { ast.args(tb, (nlist || [])) }
rule lit_array:
r(T.array_begin[:tb] + arg_sep_opt + in_arg_list.-[:nlist] + arg_sep_opt + T.array_end) { ast.array(tb, (nlist || [])) }
## # Parameter lists
param_sep: (C.spc.* + T.arg_sep + C.spc.*).+ / (C.spc.* + !!T.params_end) param_sep_opt: (C.spc / T.arg_sep).* param_sepd: |x| r(x + param_sep) { n0 }
rule req_param:
r(T.identifier[:ti] + !!param_sep) { ast.reqprm(ti, ti.sym) }
rule opt_param:
r(T.identifier[:ti] + C.spc_nl.* + T.assign[:to] + C.spc_nl.* + arg_expr[:nv]) { ast.optprm(ti, ti.sym, nv) }
rule rest_param:
r(T.op_mult + C.spc_nl.* + T.identifier[:ti]) { ast.restprm(ti, ti.sym) }
rule kwopt_param:
r(T.identifier[:ti] + C.spc_nl.* + T.meme_mark[:to] + C.spc_nl.* + arg_expr[:nv]) { ast.kwoptprm(ti, ti.sym, nv) }
rule kwreq_param:
r(T.identifier[:ti] + C.spc_nl.* + T.meme_mark[:to] + !!param_sep) { ast.kwreqprm(ti, ti.sym) }
rule kwrest_param:
r(T.op_exp + C.spc_nl.* + T.identifier[:ti]) { ast.kwrestprm(ti, ti.sym) }
rule block_param:
r(T.op_toproc + C.spc_nl.* + T.identifier[:ti]) { ast.blkprm(ti, ti.sym) }
rule param_list:
r(T.params_begin[:tb] + param_sep_opt + param_sepd(req_param).*[:required] + param_sepd(opt_param).*[:optional] + param_sepd(rest_param).-[:rest] + param_sepd(req_param).*[:post] + param_sepd(kwreq_param / kwopt_param).*[:kw] + param_sepd(kwrest_param).-[:kwrest] + param_sepd(block_param).-[:block] + T.params_end) { kwrequired = kw.select |x| { x.node_type == :kwreqprm } kwoptional = kw.select |x| { x.node_type == :kwoptprm } # TODO: the ternary clauses should not be necessary ast.params(tb required, optional, (rest.is_a?(Array) &? rest.first ?? rest), post kwrequired, kwoptional, (kwrest.is_a?(Array) &? kwrest.first ?? kwrest) (block.is_a?(Array) &? block.first ?? block) )
}
## # Two-term operators
left_op_normal:
T.op_exp
/ T.op_mult / T.op_div / T.op_mod / T.op_plus / T.op_minus / T.op_compare / T.op_r_pipe
left_op_branch:
T.op_and / T.op_or
/ T.op_and_q / T.op_or_q / T.op_void_q
rule left_op: left_op_normal / left_op_branch
# Achieve left-associativity through iteration. # # PEG parsers get tripped up by left recursion # (in contrast to LALR parsers, which prefer left recursion). # This is a well-understood limitation, but refer to: # www.dalnefre.com/wp/2011/05/parsing-expression-grammars-part-4/ # for an easy-to-understand explanation of this problem and this solution. # rule sepd_chained_operation:
r(C.spc_nl.* + left_op[:to] + C.spc_nl.* + expr_atom[:n1]) { [to, n1] }
rule left_chained_operations:
r(expr_atom[:n0] + sepd_chained_operation.+[:nlist]) { nlist.unshift(n0) nlist.flatten! collapse(nlist, :t_op_exp) collapse(nlist, :t_op_mult, :t_op_div, :t_op_mod) collapse(nlist, :t_op_plus, :t_op_minus) collapse(nlist, :t_op_compare) collapse(nlist, :t_op_and, :t_op_or, :t_op_and_q, :t_op_or_q, :t_op_void_q) |n0,op,n1| { ast.branch_op(op, op.sym, n0, n1) } collapse(nlist, :t_op_r_pipe) |n0,op,n1| { ast.pipe_op(op, n0, n1) } # There should only be one resulting node left (nlist.count == 1) || raise("Failed to fully collapse left_chained_operations: "nlist"") nlist.first
}
## # Invocations and Quests (soft-failing invocations)
rule left_invoke_op:
T.quest
/ T.dot
# Achieve left-associativity through iteration. # (see left_chained_operations). # rule sepd_chained_invocation:
r(C.spc_nl.* + left_invoke_op[:t0] + C.spc_nl.* + (invoke / op_invoke)[:n1]) { [t0, n1] }
/ r(C.spc.*.token(:t_dot) + elem_invoke)
{ [t0, n1] }
rule left_chained_invocations:
r(expr_atom_not_chained[:n0] + sepd_chained_invocation.+[:nlist]) { nlist.unshift(n0) nlist.flatten! collapse(nlist, :t_dot, :t_quest) |n0,op,n1| { (op.type == :t_dot) &? (n1.receiver=n0; n1) ?? ast.quest(op, n0, n1) } # There should only be one resulting node left (nlist.count == 1) || raise("Failed to fully collapse left_chained_invocations: "nlist"") nlist.first
}
## # Piping invocations
pipeable: left_chained_invocations / invoke
rule sepd_chained_piping:
r(C.spc.+.token(:t_op_r_pipe)[:to] + pipeable[:n1]) { [to, n1] }
rule left_chained_piping:
r(expr_atom[:n0] + sepd_chained_piping.+[:nlist]) { nlist.unshift(n0) nlist.flatten! collapse(nlist, :t_op_r_pipe) |n0,op,n1| { ast.pipe_op(op, n0, n1) } # There should only be one resulting node left (nlist.count == 1) || raise("Failed to fully collapse left_chained_piping: "nlist"") nlist.first
}
## # Unary operators
rule unary_operation:
r(T.op_not[:to] + expr_atom[:n0]) { ast.invoke(to, n0, :"!", null) }
## # Memes and etc..
# TODO: this should work without the call to ‘inner’ # Currently, without this call, an unbalanced tidx_stack occurs # in the Processor due to an extra :t_start capture symbol emitted # by the String::Parser with no matching :t_end to clear it. rule t_inln_sep: !T.arg_sep.inner + T.expr_sep
rule inln_sep: (C.spc.* + t_inln_sep + C.spc.*).+ rule inln_sep_opt: (C.spc / t_inln_sep).*
rule expr_sep: (C.spc.* + T.expr_sep + C.spc.*).+ rule expr_sep_opt: (C.spc / T.expr_sep).*
rule meme_inline_sepd_expr:
r(inln_sep + meme_expr[:n]) { n }
rule meme_inline_sepd_exprs:
r(meme_expr[:n0] + meme_inline_sepd_expr.*[:nrest] + inln_sep_opt) { [n0, *nrest] }
rule meme_sepd_expr:
r(expr_sep + meme_expr[:n]) { n }
rule meme_sepd_exprs:
r(meme_expr[:n0] + meme_sepd_expr.*[:nrest] + expr_sep_opt) { [n0, *nrest] }
rule meme_inline_expr_body:
r(inln_sep_opt + meme_inline_sepd_exprs[:nlist]) { ast.sequence(nlist.first, nlist) }
rule meme_expr_body:
r(expr_sep_opt + meme_sepd_exprs[:nlist] + T.meme_end[:te]) { ast.sequence(nlist.first, nlist) }
/ r(expr_sep_opt + T.meme_end)
{ ast.null(te) }
rule paren_expr_body:
r(expr_sep_opt + meme_sepd_exprs[:nlist] + T.paren_end[:te]) { nlist.count==1 &? nlist.first ?? ast.sequence(nlist.first, nlist) }
/ r(expr_sep_opt + T.paren_end)
{ ast.null(te) }
rule paren_expr:
r(T.paren_begin + paren_expr_body[:n0]) { n0 }
rule meme_enclosed_expr_body:
r(T.meme_begin + meme_expr_body[:n0]) { n0 }
rule meme_either_body_as_block:
r((block_body / open_block_body)[:n0]) { ast.blklit(n0, n0) }
rule decoration:
invoke_classic_form
/ id_as_symbol / lit_string_as_symbol / constant
rule sepd_decoration:
r(C.spc.* + decoration[:n]) { n }
rule sepd_decorations:
r(decoration[:n0] + sepd_decoration.*[:nrest]) { [n0, *nrest] }
rule meme:
r(decorators_and_meme_name[:nd] + C.spc.* + T.meme_mark[:tm] + (r(C.spc_nl.* + param_list.-[:n]) { n })[:np] + C.spc_nl.* + meme_either_body[:nb]) { ast.meme(tm, nd.body.shift.name, nd, np, nb) }
{ ast.meme(nd, nd.body.shift.name, nd, null, null) }
rule meme:
r(sepd_decorations[:nlist] + C.spc.* + T.meme_mark[:tm] + C.spc_nl.* + meme_either_body_as_block[:nb]) { ast.meme(nlist.first, nlist, nb) }
{ ast.meme(nlist.first, nlist, null) }
}