class Metasm::C::Compiler

each CPU defines a subclass of this one

Attributes

auto_label_list[RW]

list of unique labels generated (to recognize user-defined ones)

curexpr[RW]
exeformat[RW]

an ExeFormat (mostly used for unique label creation, and cpu.check_reserved_name)

label_oldname[RW]

map asm name -> original C name (for exports etc)

parser[RW]

the C Parser (destroyed by compilation)

source[RW]

an array of assembler statements (strings)

Public Class Methods

new(parser, exeformat=nil, source=[]) click to toggle source

creates a new CCompiler from an ExeFormat and a C Parser

# File metasm/compile_c.rb, line 39
def initialize(parser, exeformat=nil, source=[])
        exeformat ||= ExeFormat.new
        @parser, @exeformat, @source = parser, exeformat, source
        @auto_label_list = {}
        @label_oldname = {}
end

Public Instance Methods

c_block(blk) click to toggle source
# File metasm/compile_c.rb, line 176
def c_block(blk)
        c_block_enter(blk)
        blk.statements.each { |stmt|
                case stmt
                when CExpression; c_cexpr(stmt)
                when Declaration; c_decl(stmt.var)
                when If; c_ifgoto(stmt.test, stmt.bthen.target)
                when Goto; c_goto(stmt.target)
                when Label; c_label(stmt.name)
                when Return; c_return(stmt.value)
                when Asm; c_asm(stmt)
                when Block; c_block(stmt)
                else raise
                end
        }
        c_block_exit(blk)
end
c_block_enter(blk) click to toggle source
# File metasm/compile_c.rb, line 194
def c_block_enter(blk)
end
c_block_exit(blk) click to toggle source
# File metasm/compile_c.rb, line 197
def c_block_exit(blk)
end
c_function(func) click to toggle source

compiles a C function func to asm source into the array of strings str in a first pass the stack variable offsets are computed, then each statement is compiled in turn

# File metasm/compile_c.rb, line 153
def c_function(func)
        # must wait the Declaration to run the CExpr for dynamic auto offsets,
        # and must run those statements once only
        # TODO alloc a stack variable to maintain the size for each dynamic array
        # TODO offset of arguments
        # TODO nested function
        c_init_state(func)

        # hide the full @source while compiling, then add prolog/epilog (saves 1 pass)
        @source << ''
        @source << "#{@label_oldname[func.name]}:" if @label_oldname[func.name]
        @source << "#{func.name}:"
        presource, @source = @source, []

        c_block(func.initializer)

        tmpsource, @source = @source, presource
        c_prolog
        @source.concat tmpsource
        c_epilog
        @source << ''
end
c_idata(data, align) click to toggle source

compiles a C static data definition into an asm string returns the new alignment value

# File metasm/compile_c.rb, line 251
def c_idata(data, align)
        w = data.type.align(@parser)
        @source << ".align #{align = w}" if w > align

        @source << "#{@label_oldname[data.name]}:" if @label_oldname[data.name]
        @source << data.name.dup
        len = c_idata_inner(data.type, data.initializer)
        len %= w
        len == 0 ? w : len
end
c_idata_inner(type, value) click to toggle source

dumps an anonymous variable definition, appending to the last line of source source.last is a label name or is empty before calling here return the length of the data written

# File metasm/compile_c.rb, line 265
def c_idata_inner(type, value)
        case type
        when BaseType
                value ||= 0

                if type.name == :void
                        @source.last << ':' if not @source.last.empty?
                        return 0
                end

                @source.last <<
                case type.name
                when :__int8;  ' db '
                when :__int16; ' dw '
                when :__int32; ' dd '
                when :__int64; ' dq '
                when :ptr; " d#{%w[x b w x d x x x q][@parser.typesize[type.name]]} "
                when :float;   ' db ' + [value].pack(@parser.endianness == :little ? 'e' : 'g').unpack('C*').join(', ') + ' // '
                when :double;  ' db ' + [value].pack(@parser.endianness == :little ? 'E' : 'G').unpack('C*').join(', ') + ' // '
                when :longdouble; ' db ' + [value].pack(@parser.endianness == :little ? 'E' : 'G').unpack('C*').join(', ') + ' // ' # XXX same as :double
                else raise "unknown idata type #{type.inspect} #{value.inspect}"
                end

                @source.last << c_idata_inner_cexpr(value)

                @parser.typesize[type.name]

        when Struct
                value ||= []
                @source.last << ':' if not @source.last.empty?
                # could .align here, but if there is our label name just before, it should have been .aligned too..
                raise "unknown struct initializer #{value.inspect}" if not value.kind_of? ::Array
                sz = 0
                type.members.zip(value).each { |m, v|
                        if m.name and wsz = type.offsetof(@parser, m.name) and sz < wsz
                                @source << "db #{wsz-sz} dup(?)"
                        end
                        @source << ''
                        flen = c_idata_inner(m.type, v)
                        sz += flen
                }

                sz

        when Union
                value ||= []
                @source.last << ':' if not @source.last.empty?
                len = sizeof(nil, type)
                raise "unknown union initializer #{value.inspect}" if not value.kind_of? ::Array
                idx = value.rindex(value.compact.last) || 0
                raise "empty union initializer" if not idx
                wlen = c_idata_inner(type.members[idx].type, value[idx])
                @source << "db #{'0' * (len - wlen) * ', '}" if wlen < len

                len

        when Array
                value ||= []
                if value.kind_of? CExpression and not value.op and value.rexpr.kind_of? ::String
                        elen = sizeof(nil, value.type.type)
                        @source.last <<
                        case elen
                        when 1; ' db '
                        when 2; ' dw '
                        else raise 'bad char* type ' + value.inspect
                        end << value.rexpr.inspect

                        len = type.length || (value.rexpr.length+1)
                        if len > value.rexpr.length
                                @source.last << (', 0' * (len - value.rexpr.length))
                        end

                        elen * len

                elsif value.kind_of? ::Array
                        @source.last << ':' if not @source.last.empty?
                        len = type.length || value.length
                        value.each { |v|
                                @source << ''
                                c_idata_inner(type.type, v)
                        }
                        len -= value.length
                        if len > 0
                                @source << " db #{len * sizeof(nil, type.type)} dup(0)"
                        end

                        sizeof(nil, type.type) * len

                else raise "unknown static array initializer #{value.inspect}"
                end
        end
end
c_idata_inner_cexpr(expr) click to toggle source
# File metasm/compile_c.rb, line 358
def c_idata_inner_cexpr(expr)
        expr = expr.reduce(@parser) if expr.kind_of? CExpression
        case expr
        when ::Integer; (expr >= 4096) ? ('0x%X' % expr) : expr.to_s
        when ::Numeric; expr.to_s
        when Variable
                case expr.type
                when Array; expr.name
                else c_idata_inner_cexpr(expr.initializer)
                end
        when CExpression
                if not expr.lexpr
                        case expr.op
                        when :&
                                case expr.rexpr
                                when Variable; expr.rexpr.name
                                else raise 'unhandled addrof in initializer ' + expr.rexpr.inspect
                                end
                        #when :*
                        when :+; c_idata_inner_cexpr(expr.rexpr)
                        when :-; ' -' << c_idata_inner_cexpr(expr.rexpr)
                        when nil
                                e = c_idata_inner_cexpr(expr.rexpr)
                                if expr.rexpr.kind_of? CExpression
                                        e = '(' << e << " & 0#{'ff'*sizeof(expr)}h)"
                                end
                                e
                        else raise 'unhandled initializer expr ' + expr.inspect
                        end
                else
                        case expr.op
                        when :+, :-, :*, :/, :%, :<<, :>>, :&, :|, :^
                                e = '(' << c_idata_inner_cexpr(expr.lexpr) <<
                                expr.op.to_s << c_idata_inner_cexpr(expr.rexpr) << ')'
                                if expr.type.integral?
                                        # db are unsigned
                                        e = '(' << e << " & 0#{'ff'*sizeof(expr)}h)"
                                end
                                e
                        #when :'.'
                        #when :'->'
                        #when :'[]'
                        else raise 'unhandled initializer expr ' + expr.inspect
                        end
                end
        else raise 'unhandled initializer ' + expr.inspect
        end
end
c_label(name) click to toggle source
# File metasm/compile_c.rb, line 200
def c_label(name)
        @source << "#{name}:"
end
c_program_epilog() click to toggle source

here you can add thing like stubs for PIC code

# File metasm/compile_c.rb, line 246
def c_program_epilog
end
c_reserve_stack(block, off = 0) click to toggle source

fills @state.offset (empty hash) automatic variable => stack offset, (recursive) offset is an ::Integer or a CExpression (dynamic array) assumes offset 0 is a ptr-size-aligned address TODO registerize automatic variables

# File metasm/compile_c.rb, line 209
def c_reserve_stack(block, off = 0)
        block.statements.each { |stmt|
                case stmt
                when Declaration
                        next if stmt.var.type.kind_of? Function
                        off = c_reserve_stack_var(stmt.var, off)
                        @state.offset[stmt.var] = off
                when Block
                        c_reserve_stack(stmt, off)
                        # do not update off, not nested subblocks can overlap
                end
        }
end
c_reserve_stack_var(var, off) click to toggle source

computes the new stack offset for var off is either an offset from stack start (:ptr-size-aligned) or

a CExpression [[[expr, +, 7], &, -7], +, off]
# File metasm/compile_c.rb, line 226
def c_reserve_stack_var(var, off)
        if (arr_type = var.type).kind_of? Array and (arr_sz = arr_type.length).kind_of? CExpression
                # dynamic array !
                arr_sz = CExpression.new(arr_sz, :*, sizeof(nil, arr_type.type),
                               BaseType.new(:long, :unsigned)).precompile_inner(@parser, nil)
                off = CExpression.new(arr_sz, :+, off, arr_sz.type)
                off = CExpression.new(off, :+,  7, off.type)
                off = CExpression.new(off, :&, -7, off.type)
                CExpression.new(off, :+,  0, off.type)
        else
                al = var.type.align(@parser)
                sz = sizeof(var)
                case off
                when CExpression; CExpression.new(off.lexpr, :+, ((off.rexpr + sz + al - 1) / al * al), off.type)
                else (off + sz + al - 1) / al * al
                end
        end
end
c_udata(data, align) click to toggle source
# File metasm/compile_c.rb, line 407
def c_udata(data, align)
        @source << "#{@label_oldname[data.name]}:" if @label_oldname[data.name]
        @source << "#{data.name} "
        @source.last <<
        case data.type
        when BaseType
                len = @parser.typesize[data.type.name]
                case data.type.name
                when :__int8;  'db ?'
                when :__int16; 'dw ?'
                when :__int32; 'dd ?'
                when :__int64; 'dq ?'
                else "db #{len} dup(?)"
                end
        else
                len = sizeof(data)
                "db #{len} dup(?)"
        end
        len %= align
        len == 0 ? align : len
end
check_reserved_name(var) click to toggle source

return non-nil if the variable name is unsuitable to appear as is in the asm listing eg filter out asm instruction names

# File metasm/compile_c.rb, line 431
def check_reserved_name(var)
        return true if @exeformat.cpu and @exeformat.cpu.check_reserved_name(var.name)
        %w[db dw dd dq].include?(var.name)
end
compile() click to toggle source

compiles the c parser toplevel to assembler statements in self.source (::Array of ::String)

starts by precompiling parser.toplevel (destructively): static symbols are converted to toplevel ones, as nested functions uses an ExeFormat (the argument) to create unique label/variable names

remove typedefs/enums CExpressions: all expr types are converted to __int8/__int16/__int32/__int64 (sign kept) (incl. ptr), + void

struct member dereference/array indexes are converted to *(ptr + off)
coma are converted to 2 statements, ?: are converted to If
:|| and :&& are converted to If + assignment to temporary
immediate quotedstrings/floats are converted to references to const static toplevel
postincrements are replaced by a temporary (XXX arglist)
compound statements are unnested

Asm are kept (TODO precompile clobber types) Declarations: initializers are converted to separate assignment CExpressions Blocks are kept unless empty structure dereferences/array indexing are converted to *(ptr + offset) While/For/DoWhile/Switch are converted to If/Goto Continue/Break are converted to Goto Cases are converted to Labels during Switch conversion Label statements are removed Return: 'return <foo>;' => 'return <foo>; goto <end_of_func>;', 'return;' => 'goto <eof>;' If: 'if (a) b; else c;' => 'if (a) goto l1; { c; }; goto l2; l1: { b; } l2:'

&& and || in condition are expanded to multiple If

functions returning struct are precompiled (in Declaration/CExpression/Return)

in a second phase, unused labels are removed from functions, as noop goto (goto x; x:) dead code is removed ('goto foo; bar; baz:' => 'goto foo; baz:') (TODO)

after that, toplevel is no longer valid C (bad types, blocks moved…)

then toplevel statements are sorted (.text, .data, .rodata, .bss) and compiled into asm statements in self.source

returns the asm source in a single string

# File metasm/compile_c.rb, line 91
def compile
        cf = @exeformat.unique_labels_cache.keys & @auto_label_list.keys
        raise "compile_c name conflict: #{cf.inspect}" if not cf.empty?
        @exeformat.unique_labels_cache.update @auto_label_list

        @parser.toplevel.precompile(self)

        # reorder statements (arrays of Variables) following exe section typical order
        funcs, rwdata, rodata, udata = [], [], [], []
        @parser.toplevel.statements.each { |st|
                if st.kind_of? Asm
                        @source << st.body
                        next
                end
                raise 'non-declaration at toplevel! ' + st.inspect if not st.kind_of? Declaration
                v = st.var
                if v.type.kind_of? Function
                        funcs << v if v.initializer        # no initializer == storage :extern
                elsif v.storage == :extern
                elsif v.initializer
                        if v.type.qualifier.to_a.include?(:const) or
                        (v.type.kind_of? Array and v.type.type.qualifier.to_a.include?(:const))
                                rodata << v
                        else
                                rwdata << v
                        end
                else
                        udata << v
                end
        }

        if not funcs.empty?
                @exeformat.compile_setsection @source, '.text'
                funcs.each { |func| c_function(func) }
                c_program_epilog
        end

        align = 1
        if not rwdata.empty?
                @exeformat.compile_setsection @source, '.data'
                rwdata.each { |data| align = c_idata(data, align) }
        end

        if not rodata.empty?
                @exeformat.compile_setsection @source, '.rodata'
                rodata.each { |data| align = c_idata(data, align) }
        end

        if not udata.empty?
                @exeformat.compile_setsection @source, '.bss'
                udata.each  { |data| align = c_udata(data, align) }
        end

        # needed to allow asm parser to use our autogenerated label names
        @exeformat.unique_labels_cache.delete_if { |k, v| @auto_label_list[k] }

        @source.join("\n")
end
exception(msg='EOF unexpected') click to toggle source

allows 'raise self' (eg struct.offsetof)

# File metasm/compile_c.rb, line 34
def exception(msg='EOF unexpected')
        ParseError.new "near #@curexpr: #{msg}"
end
new_label(base='') click to toggle source
# File metasm/compile_c.rb, line 46
def new_label(base='')
        lbl = @exeformat.new_label base
        @auto_label_list[lbl] = true
        lbl
end
sizeof(*a) click to toggle source
# File metasm/compile_c.rb, line 54
def sizeof(*a) @parser.sizeof(*a) end
toplevel() click to toggle source
# File metasm/compile_c.rb, line 52
def toplevel ; @parser.toplevel end
typesize() click to toggle source
# File metasm/compile_c.rb, line 53
def typesize ; @parser.typesize end