class Metasm::Decompiler

Constants

AssignOp

TODO add methods to C::CExpr

Attributes

c_parser[RW]
dasm[RW]
forbid_decompile_ifwhile[RW]
forbid_decompile_types[RW]
forbid_optimize_code[RW]
forbid_optimize_dataflow[RW]
forbid_optimize_labels[RW]
recurse[RW]

recursive flag: for each subfunction, recurse is decremented, when 0 only the prototype is decompiled, when <0 nothing is done

Public Class Methods

new(dasm, cp = dasm.c_parser) click to toggle source
# File metasm/decompile.rb, line 30
def initialize(dasm, cp = dasm.c_parser)
        @dasm = dasm
        @recurse = 1/0.0      # Infinity
        @c_parser = cp || @dasm.cpu.new_cparser
end

Public Instance Methods

backtrace_target(expr, addr) click to toggle source

backtraces an expression from addr returns an integer, a label name, or an Expression XXX '(GetProcAddr(“foo”))()' should not decompile to 'foo()'

# File metasm/decompile.rb, line 315
def backtrace_target(expr, addr)
        if n = @dasm.backtrace(expr, addr).first
                return expr if n == Expression::Unknown
                n = Expression[n].reduce_rec
                n = @dasm.get_label_at(n) || n
                n = $1 if n.kind_of? ::String and n =~ /^thunk_(.*)/
                n
        else
                expr
        end
end
c_to_graph(st) click to toggle source
# File metasm/decompile.rb, line 1884
def c_to_graph(st)
        g = CGraph.new
        g.exprs = {}  # label => [exprs]
        g.to = {}     # label => [labels]
        g.block = {}  # label => is label in a block? (vs If#test)
        anon_label = 0        # when no label is there, use anon_label++
        # converts C code to a graph of codepath of cexprs
        to_graph = lambda { |stmt, l_cur, l_after, l_cont, l_break|
                case stmt
                when C::Label; g.to[l_cur] = [stmt.name] ; g.to[stmt.name] = [l_after]
                when C::Goto; g.to[l_cur] = [stmt.target]
                when C::Continue; g.to[l_cur] = [l_cont]
                when C::Break; g.to[l_cur] = [l_break]
                when C::CExpression
                        g.exprs[l_cur] = [stmt]
                        g.to[l_cur] = [l_after]
                when C::Return
                        g.exprs[l_cur] = [stmt.value] if stmt.value
                        g.to[l_cur] = []
                when C::Block
                        to_graph[stmt.statements, l_cur, l_after, l_cont, l_break]
                when ::Array
                        g.exprs[l_cur] = []
                        g.block[l_cur] = true
                        stmt.each_with_index { |s, i|
                                case s
                                when C::Declaration
                                when C::CExpression
                                        g.exprs[l_cur] << s
                                else
                                        l = anon_label += 1
                                        ll = anon_label += 1
                                        g.to[l_cur] = [l]
                                        g.block[l_cur] = true
                                        to_graph[stmt[i], l, ll, l_cont, l_break]
                                        l_cur = ll
                                        g.exprs[l_cur] = []
                                end
                        }
                        g.to[l_cur] = [l_after].compact
                when C::If
                        g.exprs[l_cur] = [stmt.test]
                        lt = anon_label += 1
                        to_graph[stmt.bthen, lt, l_after, l_cont, l_break]
                        le = anon_label += 1
                        to_graph[stmt.belse, le, l_after, l_cont, l_break]
                        g.to[l_cur] = [lt, le]
                when C::While, C::DoWhile
                        la = anon_label += 1
                        if stmt.kind_of? C::DoWhile
                                lt, lb = la, l_cur
                        else
                                lt, lb = l_cur, la
                        end
                        g.exprs[lt] = [stmt.test]
                        g.to[lt] = [lb, l_after]
                        to_graph[stmt.body, lb, lt, lt, l_after]
                when C::Asm, nil; g.to[l_cur] = [l_after]
                else puts "to_graph unhandled #{stmt.class}: #{stmt}" if $VERBOSE
                end
        }

        g.start = anon_label
        to_graph[st, g.start, nil, nil, nil]

        # optimize graph
        g.to_optim = {}
        g.to.each { |k, v| g.to_optim[k] = v.uniq }
        g.exprs.delete_if { |k, v| v == [] }
        g.to_optim.delete_if { |k, v|
                if v.length == 1 and not g.exprs[k] and v != [k]
                        g.to_optim.each_value { |t| if i = t.index(k) ; t[i] = v.first ; end }
                        true
                elsif v.length == 0 and not g.exprs[k]
                        g.to_optim.each_value { |t| t.delete k }
                        true
                end
        }

        g.from_optim = {}
        g.to_optim.each { |k, v| v.each { |t| (g.from_optim[t] ||= []) << k } }

        g
end
ce_patch(exprs, oldce, newce) click to toggle source

patches a set of exprs, replacing oldce by newce

# File metasm/decompile.rb, line 938
def ce_patch(exprs, oldce, newce)
        walk_ce(exprs) { |ce|
                case ce.op
                when :funcall
                        ce.lexpr = newce if ce.lexpr == oldce
                        ce.rexpr.each_with_index { |a, i| ce.rexpr[i] = newce if a == oldce }
                else
                        ce.lexpr = newce if ce.lexpr == oldce
                        ce.rexpr = newce if ce.rexpr == oldce
                end
        }
end
ce_read(ce_, var) click to toggle source

checks if expr reads var

# File metasm/decompile.rb, line 917
def ce_read(ce_, var)
        isvar(ce_, var) or
        walk_ce(ce_) { |ce|
                case ce.op
                when :funcall; break true if isvar(ce.lexpr, var) or ce.rexpr.find { |a| isvar(a, var) }
                when :'='; break true if isvar(ce.rexpr, var)
                        break ce_read(ce.rexpr, var) if isvar(ce.lexpr, var)        # *&var = 2
                else break true if isvar(ce.lexpr, var) or isvar(ce.rexpr, var)
                end
        }
end
ce_write(ce_, var) click to toggle source

checks if expr writes var

# File metasm/decompile.rb, line 930
def ce_write(ce_, var)
        walk_ce(ce_) { |ce|
                break true if AssignOp.include?(ce.op) and (isvar(ce.lexpr, var) or
                        (((ce.op == :'++' or ce.op == :'--') and isvar(ce.rexpr, var))))
        }
end
cleanup_var_decl(scope, func) click to toggle source

reorder statements to put decl first, move assignments to decl, move args to func prototype

# File metasm/decompile.rb, line 2504
def cleanup_var_decl(scope, func)
        scope.symbol.each_value { |v| v.type = C::BaseType.new(:int) if v.type.void? }

        args = func.type.args
        decl = []
        scope.statements.delete_if { |sm|
                next if not sm.kind_of? C::Declaration
                if sm.var.stackoff.to_i > 0 and sm.var.name !~ /_a(\d+)$/    # aliased vars: use 1st domain only
                        args << sm.var
                else
                        decl << sm
                end
                true
        }

        # move trivial affectations to initialiser
        # XXX a = 1 ; b = a ; a = 2
        go = true     # break from delete_if does not delete..
        scope.statements.delete_if { |st|
                if go and st.kind_of? C::CExpression and st.op == :'=' and st.rexpr.kind_of? C::CExpression and not st.rexpr.op and
                                st.rexpr.rexpr.kind_of? ::Integer and st.lexpr.kind_of? C::Variable and scope.symbol[st.lexpr.name]
                        st.lexpr.initializer = st.rexpr
                else
                        go = false
                end
        }

        # reorder declarations
        scope.statements[0, 0] = decl.sort_by { |sm| [-sm.var.stackoff.to_i, sm.var.name] }

        # ensure arglist has no hole (create&add unreferenced args)
        func.type.args = []
        argoff = @c_parser.typesize[:ptr]
        args.sort_by { |sm| sm.stackoff.to_i }.each { |a|
                # XXX misalignment ?
                if not curoff = a.stackoff
                        func.type.args << a # __fastcall
                        next
                end
                while curoff > argoff
                        wantarg = C::Variable.new
                        wantarg.name = scope.decompdata[:stackoff_name][argoff] || stackoff_to_varname(argoff)
                        wantarg.type = C::BaseType.new(:int)
                        wantarg.attributes = ['unused']
                        func.type.args << wantarg
                        scope.symbol[wantarg.name] = wantarg
                        argoff += @c_parser.typesize[:ptr]
                end
                func.type.args << a
                argoff += @c_parser.typesize[:ptr]
        }
end
decompile(*entry) click to toggle source

decompile recursively function from an entrypoint, then perform global optimisation (static vars, …) should be called once after everything is decompiled (global optimizations may bring bad results otherwise) use #decompile_func for incremental decompilation returns the #c_parser

# File metasm/decompile.rb, line 40
def decompile(*entry)
        entry.each { |f| decompile_func(f) }
        finalize
        @c_parser
end
decompile_c_types(scope) click to toggle source

assign type to vars (regs, stack & global) types are found by subfunction argument types & indirections, and propagated through assignments etc TODO when updating the type of a var, update the type of all cexprs where it appears

# File metasm/decompile.rb, line 1194
def decompile_c_types(scope)
        return if forbid_decompile_types

        # TODO *(int8*)(ptr+8); *(int32*)(ptr+12) => automatic struct

        # name => type
        types = {}

        pscopevar = lambda { |e|
                e = e.rexpr while e.kind_of? C::CExpression and not e.op and e.rexpr.kind_of? C::CExpression
                if e.kind_of? C::CExpression and e.op == :& and not e.lexpr and e.rexpr.kind_of? C::Variable
                        e.rexpr.name if scope.symbol[e.rexpr.name]
                end
        }
        scopevar = lambda { |e|
                e = e.rexpr if e.kind_of? C::CExpression and not e.op
                if e.kind_of? C::Variable and scope.symbol[e.name]
                        e.name
                elsif e.kind_of? C::CExpression and e.op == :* and not e.lexpr
                        pscopevar[e.rexpr]
                end
        }
        globalvar = lambda { |e|
                e = e.rexpr if e.kind_of? C::CExpression and not e.op
                if e.kind_of? ::Integer and @dasm.get_section_at(e)
                        e
                elsif e.kind_of? C::Variable and not scope.symbol[e.name] and @c_parser.toplevel.symbol[e.name] and @dasm.get_section_at(e.name)
                        e.name
                end
        }

        # check if a newly found type for o is better than current type
        # order: foo* > void* > foo
        better_type = lambda { |t0, t1|
                t1 == C::BaseType.new(:void) or (t0.pointer? and t1.kind_of? C::BaseType) or t0.untypedef.kind_of? C::Union or
                (t0.kind_of? C::BaseType and t1.kind_of? C::BaseType and (@c_parser.typesize[t0.name] > @c_parser.typesize[t1.name] or (t0.name == t1.name and t0.qualifier))) or
                (t0.pointer? and t1.pointer? and better_type[t0.pointed, t1.pointed])
        }

        update_global_type = lambda { |e, t|
                if ne = new_global_var(e, t, scope)
                        ne.type = t if better_type[t, ne.type]      # TODO patch existing scopes using ne
                                                                # TODO rename (dword_xx -> byte_xx etc)
                        e = scope.symbol_ancestors[e] || e if e.kind_of? String     # exe reloc
                        walk_ce(scope) { |ce|
                                ce.lexpr = ne if ce.lexpr == e
                                ce.rexpr = ne if ce.rexpr == e
                                if ce.op == :* and not ce.lexpr and ce.rexpr == ne and ne.type.pointer? and ne.type.pointed.untypedef.kind_of? C::Union
                                        # *struct -> struct->bla
                                        ce.rexpr = structoffset(ne.type.pointed.untypedef, ce.rexpr, 0, sizeof(ce.type))
                                elsif ce.lexpr == ne or ce.rexpr == ne
                                        # set ce type according to l/r
                                        # TODO set ce.parent type etc
                                        ce.type = C::CExpression[ce.lexpr, ce.op, ce.rexpr].type
                                end
                        }
                end
        }

        propagate_type = nil  # fwd declaration
        propagating = []      # recursion guard (x = &x)
        # check if need to change the type of a var
        # propagate_type if type is updated
        update_type = lambda { |n, t|
                next if propagating.include? n
                o = scope.symbol[n].stackoff
                next if not o and t.untypedef.kind_of? C::Union
                next if o and scope.decompdata[:stackoff_type][o] and t != scope.decompdata[:stackoff_type][o]
                next if t0 = types[n] and not better_type[t, t0]
                next if o and (t.integral? or t.pointer?) and o % sizeof(t) != 0 # keep vars aligned
                types[n] = t
                next if t == t0
                propagating << n
                propagate_type[n, t]
                propagating.delete n
                next if not o
                t = t.untypedef
                if t.kind_of? C::Struct
                        t.members.to_a.each { |m|
                                mo = t.offsetof(@c_parser, m.name)
                                next if mo == 0
                                scope.symbol.each { |vn, vv|
                                        update_type[vn, m.type] if vv.stackoff == o+mo
                                }
                        }
                end
        }

        # try to update the type of a var from knowing the type of an expr (through dereferences etc)
        known_type = lambda { |e, t|
                loop do
                        e = e.rexpr while e.kind_of? C::CExpression and not e.op and e.type == t
                        if o = scopevar[e]
                                update_type[o, t]
                        elsif o = globalvar[e]
                                update_global_type[o, t]
                        elsif not e.kind_of? C::CExpression
                        elsif o = pscopevar[e] and t.pointer?
                                update_type[o, t.pointed]
                        elsif e.op == :* and not e.lexpr
                                e = e.rexpr
                                t = C::Pointer.new(t)
                                next
                        elsif t.pointer? and e.op == :+ and e.lexpr.kind_of? C::CExpression and e.lexpr.type.integral? and e.rexpr.kind_of? C::Variable
                                e.lexpr, e.rexpr = e.rexpr, e.lexpr
                                next
                        elsif e.op == :+ and e.lexpr and e.rexpr.kind_of? C::CExpression
                                if not e.rexpr.op and e.rexpr.rexpr.kind_of? ::Integer
                                        if t.pointer? and e.rexpr.rexpr < 0x1000 and (e.rexpr.rexpr % sizeof(t.pointed)) == 0     # XXX relocatable + base=0..
                                                e = e.lexpr      # (int)*(x+2) === (int) *x
                                                next
                                        elsif globalvar[e.rexpr.rexpr]
                                                known_type[e.lexpr, C::BaseType.new(:int)]
                                                e = e.rexpr
                                                next
                                        end
                                elsif t.pointer? and (e.lexpr.kind_of? C::CExpression and e.lexpr.lexpr and [:<<, :>>, :*, :&].include? e.lexpr.op) or
                                                (o = scopevar[e.lexpr] and types[o] and types[o].integral? and
                                                 !(o = scopevar[e.rexpr] and types[o] and types[o].integral?))
                                        e.lexpr, e.rexpr = e.rexpr, e.lexpr       # swap
                                        e = e.lexpr
                                        next
                                elsif t.pointer? and ((e.rexpr.kind_of? C::CExpression and e.rexpr.lexpr and [:<<, :>>, :*, :&].include? e.rexpr.op) or
                                                (o = scopevar[e.rexpr] and types[o] and types[o].integral? and
                                                !(o = scopevar[e.lexpr] and types[o] and types[o].integral?)))
                                        e = e.lexpr
                                        next
                                end
                        end
                        break
                end
        }

        # we found a type for a var, propagate it through affectations
        propagate_type = lambda { |var, type|
                walk_ce(scope) { |ce|
                        next if ce.op != :'='

                        if ce.lexpr.kind_of? C::Variable and ce.lexpr.name == var
                                known_type[ce.rexpr, type]
                                next
                        end
                        if ce.rexpr.kind_of? C::Variable and ce.rexpr.name == var
                                known_type[ce.lexpr, type]
                                next
                        end

                        # int **x; y = **x  =>  int y
                        t = type
                        l = ce.lexpr
                        while l.kind_of? C::CExpression and l.op == :* and not l.lexpr
                                if var == pscopevar[l.rexpr]
                                        known_type[ce.rexpr, t]
                                        break
                                elsif t.pointer?
                                        l = l.rexpr
                                        t = t.pointed
                                else break
                                end
                        end

                        # int **x; **x = y  =>  int y
                        t = type
                        r = ce.rexpr
                        while r.kind_of? C::CExpression and r.op == :* and not r.lexpr
                                if var == pscopevar[r.rexpr]
                                        known_type[ce.lexpr, t]
                                        break
                                elsif t.pointer?
                                        r = r.rexpr
                                        t = t.pointed
                                else break
                                end
                        end

                        # TODO int *x; *x = *y; ?
                }
        }

        # put all those macros in use
        # use user-defined types first
        scope.symbol.each_value { |v|
                next if not v.kind_of? C::Variable or not v.stackoff or not t = scope.decompdata[:stackoff_type][v.stackoff]
                known_type[v, t]
        }

        # try to infer types from C semantics
        later = []
        walk_ce(scope) { |ce|
                if ce.op == :'=' and ce.rexpr.kind_of? C::CExpression and (ce.rexpr.op == :funcall or (ce.rexpr.op == nil and ce.rexpr.rexpr.kind_of? ::Integer and
                                ce.rexpr.rexpr.abs < 0x10000 and (not ce.lexpr.kind_of? C::CExpression or ce.lexpr.op != :'*' or ce.lexpr.lexpr)))
                        # var = int
                        known_type[ce.lexpr, ce.rexpr.type]
                elsif ce.op == :funcall
                        f = ce.lexpr.type
                        f = f.pointed if f.pointer?
                        next if not f.kind_of? C::Function
                        # cast func args to arg prototypes
                        f.args.to_a.zip(ce.rexpr).each_with_index { |(proto, arg), i| ce.rexpr[i] = C::CExpression[arg, proto.type] ; known_type[arg, proto.type] }
                elsif ce.op == :* and not ce.lexpr
                        if e = ce.rexpr and e.kind_of? C::CExpression and not e.op and e = e.rexpr and e.kind_of? C::CExpression and
                                        e.op == :& and not e.lexpr and e.rexpr.kind_of? C::Variable and e.rexpr.stackoff
                                # skip *(__int32*)&var_12 for now, avoid saying var12 is an int if it may be a ptr or anything
                                later << [ce.rexpr, C::Pointer.new(ce.type)]
                                next
                        end
                        known_type[ce.rexpr, C::Pointer.new(ce.type)]
                elsif not ce.op and ce.type.pointer? and ce.type.pointed.kind_of? C::Function
                        # cast to fptr: must be a fptr
                        known_type[ce.rexpr, ce.type]
                end
        }

        later.each { |ce, t| known_type[ce, t] }

        # offsets have types now
        types.each { |v, t|
                # keep var type qualifiers
                q = scope.symbol[v].type.qualifier
                scope.symbol[v].type = t
                t.qualifier = q if q
        }


        # remove offsets to struct members
        # XXX this defeats antialiasing
        # off => [structoff, membername, membertype]
        memb = {}
        types.dup.each { |n, t|
                v = scope.symbol[n]
                next if not o = v.stackoff
                t = t.untypedef
                if t.kind_of? C::Struct
                        t.members.to_a.each { |tm|
                                moff = t.offsetof(@c_parser, tm.name)
                                next if moff == 0
                                types.delete_if { |vv, tt| scope.symbol[vv].stackoff == o+moff }
                                memb[o+moff] = [v, tm.name, tm.type]
                        }
                end
        }

        # patch local variables into the CExprs, incl unknown offsets
        varat = lambda { |n|
                v = scope.symbol[n]
                if s = memb[v.stackoff]
                        v = C::CExpression[s[0], :'.', s[1], s[2]]
                else
                        v.type = types[n] || C::BaseType.new(:int)
                end
                v
        }

        maycast = lambda { |v, e|
                if sizeof(v) != sizeof(e)
                        v = C::CExpression[:*, [[:&, v], C::Pointer.new(e.type)]]
                end
                v
        }
        maycast_p = lambda { |v, e|
                if not e.type.pointer? or sizeof(v) != sizeof(nil, e.type.pointed)
                        C::CExpression[[:&, v], e.type]
                else
                        C::CExpression[:&, v]
                end
        }

        walk_ce(scope, true) { |ce|
                case
                when ce.op == :funcall
                        ce.rexpr.map! { |re|
                                if o = scopevar[re]; C::CExpression[maycast[varat[o], re]]
                                elsif o = pscopevar[re]; C::CExpression[maycast_p[varat[o], re]]
                                else re
                                end
                        }
                when o = scopevar[ce.lexpr]; ce.lexpr = maycast[varat[o], ce.lexpr]
                when o = scopevar[ce.rexpr]; ce.rexpr = maycast[varat[o], ce.rexpr]
                        ce.rexpr = C::CExpression[ce.rexpr] if not ce.op and ce.rexpr.kind_of? C::Variable
                when o = pscopevar[ce.lexpr]; ce.lexpr = maycast_p[varat[o], ce.lexpr]
                when o = pscopevar[ce.rexpr]; ce.rexpr = maycast_p[varat[o], ce.rexpr]
                when o = scopevar[ce]; ce.replace C::CExpression[maycast[varat[o], ce]]
                when o = pscopevar[ce]; ce.replace C::CExpression[maycast_p[varat[o], ce]]
                end
        }

        fix_type_overlap(scope)
        fix_pointer_arithmetic(scope)

        # if int32 var_4 is always var_4 & 255, change type to int8
        varuse = Hash.new(0)
        varandff = Hash.new(0)
        varandffff = Hash.new(0)
        walk_ce(scope) { |ce|
                if ce.op == :& and ce.lexpr.kind_of? C::Variable and ce.lexpr.type.integral? and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr.kind_of? ::Integer
                        case ce.rexpr.rexpr
                        when 0xff; varandff[ce.lexpr.name] += 1
                        when 0xffff; varandffff[ce.lexpr.name] += 1
                        end
                end
                varuse[ce.lexpr.name] += 1 if ce.lexpr.kind_of? C::Variable
                varuse[ce.rexpr.name] += 1 if ce.rexpr.kind_of? C::Variable
        }
        varandff.each { |k, v|
                scope.symbol[k].type = C::BaseType.new(:__int8, :unsigned) if varuse[k] == v
        }
        varandffff.each { |k, v|
                scope.symbol[k].type = C::BaseType.new(:__int16, :unsigned) if varuse[k] == v
        }

        # propagate types to cexprs
        walk_ce(scope, true) { |ce|
                if ce.op
                        ce.type = C::CExpression[ce.lexpr, ce.op, ce.rexpr].type rescue next
                        if ce.op == :'=' and ce.rexpr.kind_of? C::Typed and ce.rexpr.type != ce.type and (not ce.rexpr.type.integral? or not ce.type.integral?)
                                known_type[ce.rexpr, ce.type] if ce.type.pointer? and ce.type.pointed.untypedef.kind_of? C::Function       # localvar = &struct with fptr
                                ce.rexpr = C::CExpression[[ce.rexpr], ce.type]
                        end
                elsif ce.type.pointer? and ce.rexpr.kind_of? C::CExpression and ce.rexpr.op == :& and not ce.rexpr.lexpr and sizeof(ce.rexpr.rexpr.type) == sizeof(ce.type.pointed)
                        ce.type = ce.rexpr.type
                end
        }
end
decompile_cexpr(e, scope, itype=nil) click to toggle source

turns an Expression to a CExpression, create+declares needed variables in scope

# File metasm/decompile.rb, line 391
def decompile_cexpr(e, scope, itype=nil)
        case e
        when Expression
                if e.op == :'=' and e.lexpr.kind_of? ::String and e.lexpr =~ /^dummy_metasm_/
                        decompile_cexpr(e.rexpr, scope, itype)
                elsif e.op == :+ and e.rexpr.kind_of? ::Integer and e.rexpr < 0
                        decompile_cexpr(Expression[e.lexpr, :-, -e.rexpr], scope, itype)
                elsif e.lexpr
                        a = decompile_cexpr(e.lexpr, scope, itype)
                        C::CExpression[a, e.op, decompile_cexpr(e.rexpr, scope, itype)]
                elsif e.op == :+
                        decompile_cexpr(e.rexpr, scope, itype)
                else
                        a = decompile_cexpr(e.rexpr, scope, itype)
                        C::CExpression[e.op, a]
                end
        when Indirection
                case e.len
                when 1, 2, 4, 8
                        bt = C::BaseType.new("__int#{e.len*8}".to_sym)
                else
                        bt = C::Struct.new
                        bt.members = [C::Variable.new('data', C::Array.new(C::BaseType.new(:__int8), e.len))]
                end
                itype = C::Pointer.new(bt)
                p = decompile_cexpr(e.target, scope, itype)
                p = C::CExpression[[p], itype] if not p.type.kind_of? C::Pointer
                C::CExpression[:*, p]
        when ::Integer
                C::CExpression[e]
        when C::CExpression
                e
        else
                name = e.to_s
                if not s = scope.symbol_ancestors[name]
                        s = C::Variable.new
                        s.type = C::BaseType.new(:__int32)
                        case e
                        when ::String       # edata relocation (rel.length = size of pointer)
                                return @c_parser.toplevel.symbol[e] || new_global_var(e, itype || C::BaseType.new(:int), scope)
                        when ::Symbol; s.storage = :register ; s.add_attribute("register(#{name})")
                        else s.type.qualifier = [:volatile]
                                puts "decompile_cexpr unhandled #{e.inspect}, using #{e.to_s.inspect}" if $VERBOSE
                        end
                        s.name = name
                        scope.symbol[s.name] = s
                        scope.statements << C::Declaration.new(s)
                end
                s
        end
end
decompile_controlseq(scope) click to toggle source

changes ifgoto, goto to while/ifelse..

# File metasm/decompile.rb, line 525
def decompile_controlseq(scope)
        # TODO replace all this crap by a method using the graph representation
        scope.statements = decompile_cseq_if(scope.statements, scope)
        remove_labels(scope)
        scope.statements = decompile_cseq_if(scope.statements, scope)
        remove_labels(scope)
        # TODO harmonize _if/_while api (if returns a replacement, while patches)
        decompile_cseq_while(scope.statements, scope)
        decompile_cseq_switch(scope)
end
decompile_cseq_if(ary, scope) click to toggle source

ifgoto => ifthen ary is an array of statements where we try to find if () {} [else {}] recurses to then/else content

# File metasm/decompile.rb, line 654
def decompile_cseq_if(ary, scope)
        return ary if forbid_decompile_ifwhile
        # the array of decompiled statements to use as replacement
        ret = []
        # list of labels appearing in ary
        inner_labels = ary.grep(C::Label).map { |l| l.name }
        while s = ary.shift
                # recurse if it's not the first run
                if s.kind_of? C::If
                        s.bthen.statements = decompile_cseq_if(s.bthen.statements, s.bthen) if s.bthen.kind_of? C::Block
                        s.belse.statements = decompile_cseq_if(s.belse.statements, s.belse) if s.belse.kind_of? C::Block
                end

                # if (a) goto x; if (b) goto x; => if (a || b) goto x;
                while s.kind_of? C::If and s.bthen.kind_of? C::Goto and not s.belse and ary.first.kind_of? C::If and ary.first.bthen.kind_of? C::Goto and
                                not ary.first.belse and s.bthen.target == ary.first.bthen.target
                        s.test = C::CExpression[s.test, :'||', ary.shift.test]
                end

                # if (a) goto x; b; x:  => if (!a) { b; }
                if s.kind_of? C::If and s.bthen.kind_of? C::Goto and l = ary.grep(C::Label).find { |l_| l_.name == s.bthen.target }
                        # if {goto l;} a; l: => if (!) {a;}
                        s.test = C::CExpression.negate s.test
                        s.bthen = C::Block.new(scope)
                        s.bthen.statements = decompile_cseq_if(ary[0..ary.index(l)], s.bthen)
                        s.bthen.statements.pop      # remove l: from bthen, it is in ary (was needed in bthen for inner ifs)
                        ary[0...ary.index(l)] = []
                end

                if s.kind_of? C::If and (s.bthen.kind_of? C::Block or s.bthen.kind_of? C::Goto)
                        s.bthen = C::Block.new(scope, [s.bthen]) if s.bthen.kind_of? C::Goto

                        bts = s.bthen.statements

                        # if (a) if (b) { c; }  => if (a && b) { c; }
                        if bts.length == 1 and bts.first.kind_of? C::If and not bts.first.belse
                                s.test = C::CExpression[s.test, :'&&', bts.first.test]
                                bts = bts.first.bthen
                                bts = s.bthen.statements = bts.kind_of?(C::Block) ? bts.statements : [bts]
                        end

                        # if (a) { if (b) goto c; d; } c:  => if (a && !b) { d; }
                        if bts.first.kind_of? C::If and l = bts.first.bthen and (l = l.kind_of?(C::Block) ? l.statements.first : l) and l.kind_of? C::Goto and ary[0].kind_of? C::Label and l.target == ary[0].name
                                s.test = C::CExpression[s.test, :'&&', C::CExpression.negate(bts.first.test)]
                                if e = bts.shift.belse
                                        bts.unshift e
                                end
                        end

                        # if () { goto a; } a:
                        if bts.last.kind_of? C::Goto and ary[0].kind_of? C::Label and bts.last.target == ary[0].name
                                bts.pop
                        end

                        # if { a; goto outer; } b; return; => if (!) { b; return; } a; goto outer;
                        if bts.last.kind_of? C::Goto and not inner_labels.include? bts.last.target and g = ary.find { |ss| ss.kind_of? C::Goto or ss.kind_of? C::Return } and g.kind_of? C::Return
                                s.test = C::CExpression.negate s.test
                                ary[0..ary.index(g)], bts[0..-1] = bts, ary[0..ary.index(g)]
                        end

                        # if { a; goto l; } b; l: => if {a;} else {b;}
                        if bts.last.kind_of? C::Goto and l = ary.grep(C::Label).find { |l_| l_.name == bts.last.target }
                                s.belse = C::Block.new(scope)
                                s.belse.statements = decompile_cseq_if(ary[0...ary.index(l)], s.belse)
                                ary[0...ary.index(l)] = []
                                bts.pop
                        end

                        # if { a; l: b; goto any;} c; goto l; => if { a; } else { c; } b; goto any;
                        if not s.belse and (bts.last.kind_of? C::Goto or bts.last.kind_of? C::Return) and g = ary.grep(C::Goto).first and l = bts.grep(C::Label).find { |l_| l_.name == g.target }
                                s.belse = C::Block.new(scope)
                                s.belse.statements = decompile_cseq_if(ary[0...ary.index(g)], s.belse)
                                ary[0..ary.index(g)], bts[bts.index(l)..-1] = bts[bts.index(l)..-1], []
                        end

                        # if { a; b; c; } else { d; b; c; } => if {a;} else {d;} b; c;
                        if s.belse
                                bes = s.belse.statements
                                while not bts.empty?
                                        if bts.last.kind_of? C::Label; ary.unshift bts.pop
                                        elsif bes.last.kind_of? C::Label; ary.unshift bes.pop
                                        elsif bts.last.to_s == bes.last.to_s; ary.unshift bes.pop ; bts.pop
                                        else break
                                        end
                                end

                                # if () { a; } else { b; } => if () { a; } else b;
                                # if () { a; } else {} => if () { a; }
                                case bes.length
                                when 0; s.belse = nil
                                #when 1; s.belse = bes.first
                                end
                        end

                        # if () {} else { a; } => if (!) { a; }
                        # if () { a; } => if () a;
                        case bts.length
                        when 0; s.test, s.bthen, s.belse = C::CExpression.negate(s.test), s.belse, nil if s.belse
                        #when 1; s.bthen = bts.first        # later (allows simpler handling in _while)
                        end
                end

                # l1: l2: if () goto l1; goto l2;  =>  if(!) goto l2; goto l1;
                if s.kind_of? C::If
                        ls = s.bthen
                        ls = ls.statements.last if ls.kind_of? C::Block
                        if ls.kind_of? C::Goto
                                if li = inner_labels.index(ls.target)
                                        table = inner_labels
                                else
                                        table = ary.map { |st| st.name if st.kind_of? C::Label }.compact.reverse
                                        li = table.index(ls.target) || table.length
                                end
                                g = ary.find { |ss|
                                        break if ss.kind_of? C::Return
                                        next if not ss.kind_of? C::Goto
                                        table.index(ss.target).to_i > li
                                }
                                if g
                                        s.test = C::CExpression.negate s.test
                                        if not s.bthen.kind_of? C::Block
                                                ls = C::Block.new(scope)
                                                ls.statements << s.bthen
                                                s.bthen = ls
                                        end
                                        ary[0..ary.index(g)], s.bthen.statements = s.bthen.statements, decompile_cseq_if(ary[0..ary.index(g)], scope)
                                end
                        end
                end

                ret << s
        end
        ret
end
decompile_cseq_switch(scope) click to toggle source

TODO

# File metasm/decompile.rb, line 844
def decompile_cseq_switch(scope)
        uncast = lambda { |e| e = e.rexpr while e.kind_of? C::CExpression and not e.op ; e }
        walk(scope) { |s|
                # XXX pfff...
                next if not s.kind_of? C::If
                # if (v < 12) return ((void(*)())(tableaddr+4*v))();
                t = s.bthen
                t = t.statements.first if t.kind_of? C::Block and t.statements.length == 1
                next if not t.kind_of? C::Return or not t.respond_to? :from_instr
                next if t.from_instr.comment.to_a.include? 'switch'
                next if not t.value.kind_of? C::CExpression or t.value.op != :funcall or t.value.rexpr != [] or not t.value.lexpr.kind_of? C::CExpression or t.value.lexpr.op
                p = uncast[t.value.lexpr.rexpr]
                next if not p.kind_of? C::CExpression or p.op != :* or p.lexpr
                p = uncast[p.rexpr]
                next if not p.kind_of? C::CExpression or p.op != :+
                r, l = uncast[p.rexpr], uncast[p.lexpr]
                r, l = l, r if r.kind_of? C::CExpression
                next if not r.kind_of? ::Integer or not l.kind_of? C::CExpression or l.op != :* or not l.lexpr
                lr, ll = uncast[l.rexpr], uncast[l.lexpr]
                lr, ll = ll, lr if not ll.kind_of? ::Integer
                next if ll != sizeof(nil, C::Pointer.new(C::BaseType.new(:void)))
                base, index = r, lr
                if s.test.kind_of? C::CExpression and (s.test.op == :<= or s.test.op == :<) and s.test.lexpr == index and
                                s.test.rexpr.kind_of? C::CExpression and not s.test.rexpr.op and s.test.rexpr.rexpr.kind_of? ::Integer
                        t.from_instr.add_comment 'switch'
                        sup = s.test.rexpr.rexpr
                        rng = ((s.test.op == :<) ? (0...sup) : (0..sup))
                        from = t.from_instr.address
                        rng.map { |i| @dasm.backtrace(Indirection[base+ll*i, ll, from], from, :type => :x, :origin => from, :maxdepth => 0) }
                        @dasm.disassemble
                        throw :restart, :restart
                end
                puts "unhandled switch() at #{t.from_instr}" if $VERBOSE
        }
end
decompile_cseq_while(ary, scope) click to toggle source
# File metasm/decompile.rb, line 789
def decompile_cseq_while(ary, scope)
        return if forbid_decompile_ifwhile

        # find the next instruction that is not a label
        ni = lambda { |l| ary[ary.index(l)..-1].find { |s| not s.kind_of? C::Label } }

        # TODO XXX get rid of #index
        finished = false ; while not finished ; finished = true # 1.9 does not support 'retry'
        ary.each { |s|
                case s
                when C::Label
                        if ss = ni[s] and ss.kind_of? C::If and not ss.belse and ss.bthen.kind_of? C::Block
                                if ss.bthen.statements.last.kind_of? C::Goto and ss.bthen.statements.last.target == s.name
                                        ss.bthen.statements.pop
                                        if l = ary[ary.index(ss)+1] and l.kind_of? C::Label
                                                ss.bthen.statements.grep(C::If).each { |i|
                                                        i.bthen = C::Break.new if i.bthen.kind_of? C::Goto and i.bthen.target == l.name
                                                }
                                        end
                                        ary[ary.index(ss)] = C::While.new(ss.test, ss.bthen)
                                elsif ss.bthen.statements.last.kind_of? C::Return and g = ary[ary.index(s)+1..-1].reverse.find { |_s| _s.kind_of? C::Goto and _s.target == s.name }
                                        wb = C::Block.new(scope)
                                        wb.statements = decompile_cseq_while(ary[ary.index(ss)+1...ary.index(g)], wb)
                                        w = C::While.new(C::CExpression.negate(ss.test), wb)
                                        ary[ary.index(ss)..ary.index(g)] = [w, *ss.bthen.statements]
                                        finished = false ; break  #retry
                                end
                        end
                        if g = ary[ary.index(s)..-1].reverse.find { |_s| _s.kind_of? C::Goto and _s.target == s.name }
                                wb = C::Block.new(scope)
                                wb.statements = decompile_cseq_while(ary[ary.index(s)...ary.index(g)], wb)
                                w = C::While.new(C::CExpression[1], wb)
                                ary[ary.index(s)..ary.index(g)] = [w]
                                finished = false ; break   #retry
                        end
                        if g = ary[ary.index(s)..-1].reverse.find { |_s| _s.kind_of? C::If and not _s.belse and gt = _s.bthen and
                                        (gt = gt.kind_of?(C::Block) && gt.statements.length == 1 ? gt.statements.first : gt) and gt.kind_of? C::Goto and gt.target == s.name }
                                wb = C::Block.new(scope)
                                wb.statements = decompile_cseq_while(ary[ary.index(s)...ary.index(g)], wb)
                                w = C::DoWhile.new(g.test, wb)
                                ary[ary.index(s)..ary.index(g)] = [w]
                                finished = false ; break   #retry
                        end
                when C::If
                        decompile_cseq_while(s.bthen.statements, s.bthen) if s.bthen.kind_of? C::Block
                        decompile_cseq_while(s.belse.statements, s.belse) if s.belse.kind_of? C::Block
                when C::While, C::DoWhile
                        decompile_cseq_while(s.body.statements, s.body) if s.body.kind_of? C::Block
                end
        }
        end
        ary
end
decompile_func(entry) click to toggle source

decompile a function, decompiling subfunctions as needed may return :restart, which means that the decompilation should restart from the entrypoint (and bubble up) (eg a new codepath is found which may changes dependency in blocks etc)

# File metasm/decompile.rb, line 48
def decompile_func(entry)
        return if @recurse < 0
        entry = @dasm.normalize entry
        return if not @dasm.decoded[entry]

        # create a new toplevel function to hold our code
        func = C::Variable.new
        func.name = @dasm.auto_label_at(entry, 'func')
        if f = @dasm.function[entry] and f.decompdata and f.decompdata[:return_type]
                rettype = f.decompdata[:return_type]
        else
                rettype = C::BaseType.new(:int)
        end
        func.type = C::Function.new rettype, []
        if @c_parser.toplevel.symbol[func.name]
                return if @recurse == 0
                if not @c_parser.toplevel.statements.grep(C::Declaration).find { |decl| decl.var.name == func.name }
                        # recursive dependency: declare prototype
                        puts "function #{func.name} is recursive: predecompiling for prototype" if $VERBOSE
                        pre_recurse = @recurse
                        @recurse = 0
                        @c_parser.toplevel.symbol.delete func.name
                        decompile_func(entry)
                        @recurse = pre_recurse
                        if not @c_parser.toplevel.statements.grep(C::Declaration).find { |decl| decl.var.name == func.name }
                                @c_parser.toplevel.statements << C::Declaration.new(func)
                        end
                end
                return
        end
        @c_parser.toplevel.symbol[func.name] = func
        puts "decompiling #{func.name}" if $VERBOSE

        while catch(:restart) { do_decompile_func(entry, func) } == :restart
                retval = :restart
        end

        @c_parser.toplevel.symbol[func.name] = func   # recursive func prototype could have overwritten us
        @c_parser.toplevel.statements << C::Declaration.new(func)

        puts " decompiled #{func.name}" if $VERBOSE

        retval
end
decompile_func_rec(entry) click to toggle source

calls #decompile_func with recurse -= 1 (internal use)

# File metasm/decompile.rb, line 94
def decompile_func_rec(entry)
        @recurse -= 1
        decompile_func(entry)
ensure
        @recurse += 1
end
do_decompile_func(entry, func) click to toggle source
# File metasm/decompile.rb, line 101
def do_decompile_func(entry, func)
        # find decodedinstruction graph of the function, decompile subfuncs
        myblocks = listblocks_func(entry)

        # [esp+8] => [:frameptr-12]
        makestackvars entry, myblocks.map { |b, to| @dasm.decoded[b].block }

        # find registry dependencies between blocks
        deps = @dasm.cpu.decompile_func_finddeps(self, myblocks, func)

        scope = func.initializer = C::Block.new(@c_parser.toplevel)
        if df = @dasm.function[entry]
                scope.decompdata = df.decompdata ||= {:stackoff_type => {}, :stackoff_name => {}}
        else
                scope.decompdata ||= {:stackoff_type => {}, :stackoff_name => {}}
        end

        # di blocks => raw c statements, declare variables
        @dasm.cpu.decompile_blocks(self, myblocks, deps, func)

        simplify_goto(scope)
        namestackvars(scope)
        unalias_vars(scope, func)
        decompile_c_types(scope)
        optimize(scope)
        remove_unreferenced_vars(scope)
        cleanup_var_decl(scope, func)
        if @recurse > 0
                decompile_controlseq(scope)
                optimize_vars(scope)
                optimize_ctrl(scope)
                optimize_vars(scope)
                remove_unreferenced_vars(scope)
                simplify_varname_noalias(scope)
                rename_variables(scope)
        end
        @dasm.cpu.decompile_check_abi(self, entry, func)

        case ret = scope.statements.last
        when C::CExpression; puts "no return at end of func" if $VERBOSE
        when C::Return
                if not ret.value
                        scope.statements.pop
                else
                        v = ret.value
                        v = v.rexpr if v.kind_of? C::CExpression and not v.op and v.rexpr.kind_of? C::Typed
                        func.type.type = v.type
                end
        end

        if @recurse == 0
                # we need only the prototype
                func.initializer = nil
        end
end
finalize() click to toggle source
# File metasm/decompile.rb, line 2412
def finalize
        optimize_global
        true
end
fix_pointer_arithmetic(scope) click to toggle source

fix pointer arithmetic (eg int foo += 4 => int* foo += 1) use struct member access (eg *(structptr+8) => structptr->bla) must be run only once, right after type setting

# File metasm/decompile.rb, line 1562
def fix_pointer_arithmetic(scope)
        walk_ce(scope, true) { |ce|
                if ce.lexpr and ce.lexpr.type.pointer? and [:&, :>>, :<<].include? ce.op
                        ce.lexpr = C::CExpression[[ce.lexpr], C::BaseType.new(:int)]
                end

                if ce.op == :+ and ce.lexpr and ((ce.lexpr.type.integral? and ce.rexpr.type.pointer?) or (ce.rexpr.type.pointer? and ce.rexpr.type.pointed.untypedef.kind_of? C::Union))
                        ce.rexpr, ce.lexpr = ce.lexpr, ce.rexpr
                end

                if ce.op == :* and not ce.lexpr and ce.rexpr.type.pointer? and ce.rexpr.type.pointed.untypedef.kind_of? C::Struct
                        s = ce.rexpr.type.pointed.untypedef
                        m = s.members.to_a.find { |m_| s.offsetof(@c_parser, m_.name) == 0 }
                        if sizeof(m) != sizeof(ce)
                                ce.rexpr = C::CExpression[[ce.rexpr, C::Pointer.new(s)], C::Pointer.new(ce.type)]
                                next
                        end
                        # *structptr => structptr->member
                        ce.lexpr = ce.rexpr
                        ce.op = :'->'
                        ce.rexpr = m.name
                        ce.type = m.type
                        next
                elsif ce.op == :'=' and ce.lexpr.type.untypedef.kind_of? C::Struct
                        s = ce.lexpr.type.untypedef
                        m = s.members.to_a.find { |m_| s.offsetof(@c_parser, m_.name) == 0 }
                        ce.lexpr = C::CExpression.new(ce.lexpr, :'.', m.name, m.type)
                        ce.type = m.type
                        next
                end

                if ce.op == :+ and ce.lexpr and ce.lexpr.type.pointer? and not ce.type.pointer?
                        ce.type = ce.lexpr.type
                end

                if ce.op == :& and not ce.lexpr and ce.rexpr.kind_of? C::CExpression and ce.rexpr.op == :* and not ce.rexpr.lexpr
                        ce.replace C::CExpression[ce.rexpr.rexpr]
                end

                next if not ce.lexpr or not ce.lexpr.type.pointer?
                if ce.op == :+ and (s = ce.lexpr.type.pointed.untypedef).kind_of? C::Union and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and
                                ce.rexpr.rexpr.kind_of? ::Integer and o = ce.rexpr.rexpr
                        # structptr + 4 => &structptr->member
                        ce.replace structoffset(s, ce.lexpr, o, nil)
                elsif [:+, :-, :'+=', :'-='].include? ce.op and ce.rexpr.kind_of? C::CExpression and ((not ce.rexpr.op and i = ce.rexpr.rexpr) or
                                (ce.rexpr.op == :* and i = ce.rexpr.lexpr and ((i.kind_of? C::CExpression and not i.op and i = i.rexpr) or true))) and
                                i.kind_of? ::Integer and psz = sizeof(nil, ce.lexpr.type.pointed) and i % psz == 0
                        # ptr += 4 => ptr += 1
                        if not ce.rexpr.op
                                ce.rexpr.rexpr /= psz
                        else
                                ce.rexpr.lexpr.rexpr /= psz
                                if ce.rexpr.lexpr.rexpr == 1
                                        ce.rexpr = ce.rexpr.rexpr
                                end
                        end
                        ce.type = ce.lexpr.type

                elsif (ce.op == :+ or ce.op == :-) and sizeof(nil, ce.lexpr.type.pointed) != 1
                        # ptr+x => (ptrtype*)(((__int8*)ptr)+x)
                        # XXX create struct ?
                        ce.rexpr = C::CExpression[ce.rexpr, C::BaseType.new(:int)] if not ce.rexpr.type.integral?
                        if sizeof(nil, ce.lexpr.type.pointed) != 1
                                ptype = ce.lexpr.type
                                p = C::CExpression[[ce.lexpr], C::Pointer.new(C::BaseType.new(:__int8))]
                                ce.replace C::CExpression[[p, ce.op, ce.rexpr, p.type], ptype]
                        end
                end
        }
end
fix_type_overlap(scope) click to toggle source

handling of var overlapping (eg __int32 var_10; __int8 var_F => replace all var_F by *(&var_10 + 1)) must be done before #fix_pointer_arithmetic

# File metasm/decompile.rb, line 1635
def fix_type_overlap(scope)
        varinfo = {}
        scope.symbol.each_value { |var|
                next if not off = var.stackoff
                len = sizeof(var)
                varinfo[var] = [off, len]
        }

        varinfo.each { |v1, (o1, l1)|
                next if not v1.type.integral?
                varinfo.each { |v2, (o2, l2)|
                        # XXX o1 may overlap o2 AND another (int32 v_10; int32 v_E; int32 v_C;)
                        # TODO should check stuff with aliasing domains
                        next if v1.name == v2.name or o1 >= o2+l2 or o1+l1 <= o2 or l1 > l2 or (l2 == l1 and o2 >= o1)
                        # v1 => *(&v2+delta)
                        p = C::CExpression[:&, v2]
                        p = C::CExpression[p, :+,  [o1-o2]]
                        p = C::CExpression[p, C::Pointer.new(v1.type)] if v1.type != p.type.type
                        p = C::CExpression[:*, p]
                        walk_ce(scope) { |ce|
                                ce.lexpr = p if ce.lexpr == v1
                                ce.rexpr = p if ce.rexpr == v1
                        }
                }
        }
end
isvar(ce, var) click to toggle source

checks if expr is a var (var or *&var)

# File metasm/decompile.rb, line 905
def isvar(ce, var)
        if var.stackoff and ce.kind_of? C::CExpression
                return unless ce.op == :* and not ce.lexpr
                ce = ce.rexpr
                ce = ce.rexpr while ce.kind_of? C::CExpression and not ce.op
                return unless ce.kind_of? C::CExpression and ce.op == :& and not ce.lexpr
                ce = ce.rexpr
        end
        ce == var
end
listblocks_func(entry) click to toggle source

return an array of [address of block start, list of block to]] decompile subfunctions

# File metasm/decompile.rb, line 275
def listblocks_func(entry)
        @autofuncs ||= []
        blocks = []
        entry = dasm.normalize entry
        todo = [entry]
        while a = todo.pop
                next if blocks.find { |aa, at| aa == a }
                next if not di = @dasm.di_at(a)
                blocks << [a, []]
                di.block.each_to { |ta, type|
                        next if type == :indirect
                        ta = dasm.normalize ta
                        if type != :subfuncret and not @dasm.function[ta] and
                                        (not @dasm.function[entry] or @autofuncs.include? entry) and
                                        di.block.list.last.opcode.props[:saveip]
                                # possible noreturn function
                                # XXX call $+5; pop eax
                                @autofuncs << ta
                                @dasm.function[ta] = DecodedFunction.new
                                puts "autofunc #{Expression[ta]}" if $VERBOSE
                        end

                        if @dasm.function[ta] and type != :subfuncret
                                f = dasm.auto_label_at(ta, 'func')
                                ta = dasm.normalize($1) if f =~ /^thunk_(.*)/
                                ret = decompile_func_rec(ta) if (ta != entry or di.block.to_subfuncret)
                                throw :restart, :restart if ret == :restart
                        else
                                @dasm.auto_label_at(ta, 'label') if blocks.find { |aa, at| aa == ta }
                                blocks.last[1] |= [ta]
                                todo << ta
                        end
                }
        end
        blocks
end
makestackvars(funcstart, blocks) click to toggle source

patches instruction's backtrace_binding to replace things referring to a static stack offset from func start by :frameptr+off

# File metasm/decompile.rb, line 328
def makestackvars(funcstart, blocks)
        blockstart = nil
        cache_di = nil
        cache = {}    # [i_s, e, type] => backtrace
        tovar = lambda { |di, e, i_s|
                case e
                when Expression; Expression[tovar[di, e.lexpr, i_s], e.op, tovar[di, e.rexpr, i_s]].reduce
                when Indirection; Indirection[tovar[di, e.target, i_s], e.len, e.origin]
                when :frameptr; e
                when ::Symbol
                        cache.clear if cache_di != di ; cache_di = di
                        vals = cache[[e, i_s, 0]] ||= @dasm.backtrace(e, di.address, :snapshot_addr => blockstart,
                                        :include_start => i_s, :no_check => true, :terminals => [:frameptr])
                        # backtrace only to blockstart first
                        if vals.length == 1 and ee = vals.first and ee.kind_of? Expression and (ee == Expression[:frameptr] or
                                        (ee.lexpr == :frameptr and ee.op == :+ and ee.rexpr.kind_of? ::Integer) or
                                        (not ee.lexpr and ee.op == :+ and ee.rexpr.kind_of? Indirection and eep = ee.rexpr.pointer and
                                        (eep == Expression[:frameptr] or (eep.lexpr == :frameptr and eep.op == :+ and eep.rexpr.kind_of? ::Integer))))
                                ee
                        else
                        # fallback on full run (could restart from blockstart with ee, but may reevaluate addr_binding..
                        vals = cache[[e, i_s, 1]] ||= @dasm.backtrace(e, di.address, :snapshot_addr => funcstart,
                                        :include_start => i_s, :no_check => true, :terminals => [:frameptr])
                        if vals.length == 1 and ee = vals.first and (ee.kind_of? Expression and (ee == Expression[:frameptr] or
                                        (ee.lexpr == :frameptr and ee.op == :+ and ee.rexpr.kind_of? ::Integer)))
                                ee
                        else e
                        end
                        end
                else e
                end
        }

        # must not change bt_bindings until everything is backtracked
        repl_bind = {}        # di => bt_bd

        @dasm.cpu.decompile_makestackvars(@dasm, funcstart, blocks) { |block|
                block.list.each { |di|
                        bd = di.backtrace_binding ||= @dasm.cpu.get_backtrace_binding(di)
                        newbd = repl_bind[di] = {}
                        bd.each { |k, v|
                                k = tovar[di, k, true] if k.kind_of? Indirection
                                next if k == Expression[:frameptr] or (k.kind_of? Expression and k.lexpr == :frameptr and k.op == :+ and k.rexpr.kind_of? ::Integer)
                                newbd[k] = tovar[di, v, false]
                        }
                }
        }

        repl_bind.each { |di, bd| di.backtrace_binding = bd }
end
namestackvars(scope) click to toggle source

patch scope to transform :frameoff-x into &var_x

# File metasm/decompile.rb, line 1163
def namestackvars(scope)
        off2var = {}
        newvar = lambda { |o, n|
                if not v = off2var[o]
                        v = off2var[o] = C::Variable.new
                        v.type = C::BaseType.new(:void)
                        v.name = n
                        v.stackoff = o
                        scope.symbol[v.name] = v
                        scope.statements << C::Declaration.new(v)
                end
                v
        }

        scope.decompdata[:stackoff_name].each { |o, n| newvar[o, n] }
        scope.decompdata[:stackoff_type].each { |o, t| newvar[o, stackoff_to_varname(o)] }

        walk_ce(scope) { |e|
                next if e.op != :+ and e.op != :-
                next if not e.lexpr.kind_of? C::Variable or e.lexpr.name != 'frameptr'
                next if not e.rexpr.kind_of? C::CExpression or e.rexpr.op or not e.rexpr.rexpr.kind_of? ::Integer
                off = e.rexpr.rexpr
                off = -off if e.op == :-
                v = newvar[off, stackoff_to_varname(off)]
                e.replace C::CExpression[:&, v]
        }
end
new_global_var(addr, type, scope=nil) click to toggle source
# File metasm/decompile.rb, line 180
def new_global_var(addr, type, scope=nil)
        addr = @dasm.normalize(addr)

        # (almost) NULL ptr
        return if addr.kind_of? Integer and addr >= 0 and addr < 32

        # check preceding structure we're hitting
        # TODO check what we step over when defining a new static struct
        0x100.times { |i_|
                next if not n = @dasm.get_label_at(addr-i_)
                next if not v = @c_parser.toplevel.symbol[n]
                next if not v.type.pointer? or not v.type.pointed.untypedef.kind_of? C::Union
                break if i_ == 0     # XXX it crashes later if we dont break here
                next if sizeof(v.type.pointed) <= i_
                return structoffset(v.type.pointed.untypedef, C::CExpression[v], i_, nil)
        }

        ptype = type.pointed.untypedef if type.pointer?
        if ptype.kind_of? C::Function
                name = @dasm.auto_label_at(addr, 'sub', 'xref', 'byte', 'word', 'dword', 'unk')
                if @dasm.get_section_at(addr) and @recurse > 0
                        puts "found function pointer to #{name}" if $VERBOSE
                        @dasm.disassemble(addr) if not @dasm.decoded[addr]  # TODO disassemble_fast ?
                        f = @dasm.function[addr] ||= DecodedFunction.new
                        # TODO detect thunks (__noreturn)
                        f.decompdata ||= { :stackoff_type => {}, :stackoff_name => {} }
                        if not s = @c_parser.toplevel.symbol[name] or not s.initializer or not s.type.untypedef.kind_of? C::Function
                                os = @c_parser.toplevel.symbol.delete name
                                @c_parser.toplevel.statements.delete_if { |ts| ts.kind_of? C::Declaration and ts.var.name == name }
                                aoff = 1
                                ptype.args.to_a.each { |a|
                                        aoff = (aoff + @c_parser.typesize[:ptr] - 1) / @c_parser.typesize[:ptr] * @c_parser.typesize[:ptr]
                                        f.decompdata[:stackoff_type][aoff] ||= a.type
                                        f.decompdata[:stackoff_name][aoff] ||= a.name if a.name
                                        aoff += sizeof(a) # ary ?
                                }
                                decompile_func_rec(addr)
                                s = @c_parser.toplevel.symbol[name]
                                walk_ce([@c_parser.toplevel, scope]) { |ce|
                                        ce.lexpr = s if ce.lexpr == os
                                        ce.rexpr = s if ce.rexpr == os
                                } if os and s      # update existing references to old instance
                        # else redecompile with new prototye ?
                        end
                end
        end

        name = case (type.pointer? && tsz = sizeof(nil, ptype))
        when 1; 'byte'
        when 2; 'word'
        when 4; 'dword'
        else 'unk'
        end
        name = 'stru' if ptype.kind_of? C::Union
        name = @dasm.auto_label_at(addr, name, 'xref', 'byte', 'word', 'dword', 'unk', 'stru')

        if not var = @c_parser.toplevel.symbol[name]
                var = C::Variable.new
                var.name = name
                var.type = type.pointer? ? C::Array.new(ptype) : type
                @c_parser.toplevel.symbol[var.name] = var
                @c_parser.toplevel.statements << C::Declaration.new(var)
        end
        if ptype.kind_of? C::Union and type.pointer? and s = @dasm.get_section_at(name) and s[0].ptr < s[0].length
                # TODO struct init, array, fptrs..
        elsif type.pointer? and not type.pointed.untypedef.kind_of? C::Function and s = @dasm.get_section_at(name) and s[0].ptr < s[0].length and
                        [1, 2, 4].include? tsz and (not var.type.pointer? or sizeof(var.type.pointed) != sizeof(type.pointed) or not var.initializer)
                # TODO do not overlap other statics (but labels may refer to elements of the array...)
                data = (0..256).map {
                        v = s[0].decode_imm("u#{tsz*8}".to_sym, @dasm.cpu.endianness)
                        v = decompile_cexpr(v, @c_parser.toplevel) if v.kind_of? Expression # relocation
                        v
                }
                var.initializer = data.map { |v| C::CExpression[v, C::BaseType.new(:int)] } unless (data - [0]).empty?
                if (tsz == 1 or tsz == 2) and eos = data.index(0) and (0..3).all? { |i| data[i] >= 0x20 and data[i] < 0x7f } # printable str
                        # XXX 0x80 with ruby1.9...
                        var.initializer = C::CExpression[data[0, eos].pack('C*'), C::Pointer.new(ptype)] rescue nil
                end
                if var.initializer.kind_of? ::Array and i = var.initializer.first and i.kind_of? C::CExpression and not i.op and i.rexpr.kind_of? C::Variable and
                                i.rexpr.type.kind_of? C::Function and not @dasm.get_section_at(@dasm.normalize(i.rexpr.name))      # iat_ExternalFunc
                        i.type = i.rexpr.type
                        type = var.type = C::Array.new(C::Pointer.new(i.type))
                        var.initializer = [i]
                end
                var.initializer = nil if var.initializer.kind_of? ::Array and not type.untypedef.kind_of? C::Array
        end

        # TODO patch existing references to addr ? (or would they have already triggered new_global_var?)

        # return the object to use to replace the raw addr
        var
end
optimize(scope) click to toggle source

to be run with scope = function body with only CExpr/Decl/Label/Goto/IfGoto/Return, with correct variables types will transform += 1 to ++, inline them to prev/next statement ('++x; if (x)..' => 'if (++x)..') remove useless variables ('int i;', i never used or 'i = 1; j = i;', i never read after => 'j = 1;') remove useless casts ('(int)i' with 'int i;' => 'i')

# File metasm/decompile.rb, line 1666
def optimize(scope)
        optimize_code(scope)
        optimize_vars(scope)
        optimize_vars(scope)  # 1st run may transform i = i+1 into i++ which second run may coalesce into if(i)
end
optimize_code(scope) click to toggle source

simplify cexpressions (char & 255, redundant casts, etc)

# File metasm/decompile.rb, line 1673
def optimize_code(scope)
        return if forbid_optimize_code

        sametype = lambda { |t1, t2|
                t1 = t1.untypedef
                t2 = t2.untypedef
                t1 = t1.pointed.untypedef if t1.pointer? and t1.pointed.untypedef.kind_of? C::Function
                t2 = t2.pointed.untypedef if t2.pointer? and t2.pointed.untypedef.kind_of? C::Function
                t1 == t2 or
                (t1.kind_of? C::Function and t2.kind_of? C::Function and sametype[t1.type, t2.type] and t1.args.to_a.length == t2.args.to_a.length and
                         t1.args.to_a.zip(t2.args.to_a).all? { |st1, st2| sametype[st1.type, st2.type] }) or
                (t1.kind_of? C::BaseType and t1.integral? and t2.kind_of? C::BaseType and t2.integral? and sizeof(nil, t1) == sizeof(nil, t2)) or
                (t1.pointer? and t2.pointer? and sametype[t1.type, t2.type])
        }

        # most of this is a CExpr#reduce
        future_array = []
        walk_ce(scope, true) { |ce|
                # (whatever)0 => 0
                if not ce.op and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr == 0
                        ce.replace ce.rexpr
                end

                # *&bla => bla if types ok
                if ce.op == :* and not ce.lexpr and ce.rexpr.kind_of? C::CExpression and ce.rexpr.op == :& and not ce.rexpr.lexpr and sametype[ce.rexpr.type.pointed, ce.rexpr.rexpr.type]
                        ce.replace C::CExpression[ce.rexpr.rexpr]
                end

                # int x + 0xffffffff -> x-1
                if ce.lexpr and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and [:+, :-, :'+=', :'-=', :'!=', :==, :>, :<, :>=, :<=].include? ce.op and
                                ce.rexpr.rexpr == (1 << (8*sizeof(ce.lexpr)))-1
                        ce.op = {:+ => :-, :- => :+, :'+=' => :'-=', :'-=' => :'+='}[ce.op]
                        ce.rexpr.rexpr = 1
                end

                # int *ptr; *(ptr + 4) => ptr[4]
                if ce.op == :* and not ce.lexpr and ce.rexpr.kind_of? C::CExpression and ce.rexpr.op == :+ and var = ce.rexpr.lexpr and var.kind_of? C::Variable and var.type.pointer?
                        ce.lexpr, ce.op, ce.rexpr = ce.rexpr.lexpr, :'[]', ce.rexpr.rexpr
                        future_array << var.name
                end

                # char x; x & 255 => x
                if ce.op == :& and ce.lexpr and (ce.lexpr.type.integral? or ce.lexpr.type.pointer?) and ce.rexpr.kind_of? C::CExpression and
                                not ce.rexpr.op and ce.rexpr.rexpr.kind_of? ::Integer and m = (1 << (8*sizeof(ce.lexpr))) - 1 and
                                ce.rexpr.rexpr & m == m
                        ce.replace C::CExpression[ce.lexpr]
                end

                # a + -b  =>  a - b
                if ce.op == :+ and ce.lexpr and ce.rexpr.kind_of? C::CExpression and ce.rexpr.op == :- and not ce.rexpr.lexpr
                        ce.op, ce.rexpr = :-, ce.rexpr.rexpr
                end

                # (((int) i >> 31) & 1)  =>  i < 0
                if ce.op == :& and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr == 1 and
                                ce.lexpr.kind_of? C::CExpression and ce.lexpr.op == :>> and ce.lexpr.rexpr.kind_of? C::CExpression and
                                not ce.lexpr.rexpr.op and ce.lexpr.rexpr.rexpr == sizeof(ce.lexpr.lexpr) * 8 - 1
                        ce.replace C::CExpression[ce.lexpr.lexpr, :<, [0]]
                end

                # a-b == 0  =>  a == b
                if ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr == 0 and [:==, :'!=', :<, :>, :<=, :>=].include? ce.op and
                                ce.lexpr.kind_of? C::CExpression and ce.lexpr.op == :- and ce.lexpr.lexpr
                        ce.lexpr, ce.rexpr = ce.lexpr.lexpr, ce.lexpr.rexpr
                end

                # (a > 0) != 0
                if ce.op == :'!=' and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr == 0 and ce.lexpr.kind_of? C::CExpression and
                                [:<, :<=, :>, :>=, :'==', :'!=', :'!'].include? ce.lexpr.op
                        ce.replace ce.lexpr
                end

                # (a < b) != ( [(a < 0) == !(b < 0)] && [(a < 0) != (a < b)] )  =>  jl
                # a<b  =>  true if !r  =>  a<0 == b<0  or a>=0  =>  a>=0 or b>=0
                # a>=b =>  true if  r  =>  a<0 == b>=0 and a<0  =>  a<0 and b>=0

                # x != (a && (b != x))  =>  [x && (!a || b)] || [!x && !(!a || b)]
                if ce.op == :'!=' and ce.lexpr.kind_of? C::CExpression and ce.lexpr.op == :< and ce.rexpr.kind_of? C::CExpression and
                                ce.rexpr.op == :'&&' and ce.rexpr.rexpr.kind_of? C::CExpression and ce.rexpr.rexpr.op == :'!=' and
                                ce.rexpr.rexpr.rexpr == ce.lexpr and not walk_ce(ce) { |ce_| break true if ce_.op == :funcall }
                        x, a, b = ce.lexpr, ce.rexpr.lexpr, ce.rexpr.rexpr.lexpr
                        ce.replace C::CExpression[ [x, :'&&', [[:'!',a],:'||',b]] , :'||',  [[:'!', x], :'&&', [:'!', [[:'!',a],:'||',b]]] ]
                        optimize_code(ce)
                end
                # (a != b) || a  =>  a || b
                if ce.op == :'||' and ce.lexpr.kind_of? C::CExpression and ce.lexpr.op == :'!=' and ce.lexpr.lexpr == ce.rexpr and not walk_ce(ce) { |ce_| break true if ce_.op == :funcall }
                        ce.lexpr, ce.rexpr = ce.rexpr, ce.lexpr.rexpr
                        optimize_code(ce)
                end
                # (a<b) && !(a>=0 && b<0)  ||  (a>=b) && (a>=0 && b<0)  =>  (signed)a < (signed)b
                if ce.op == :'||' and ce.lexpr.kind_of? C::CExpression and ce.rexpr.kind_of? C::CExpression and ce.lexpr.op == :'&&' and ce.rexpr.op == :'&&' and
                                ce.lexpr.lexpr.kind_of? C::CExpression and ce.lexpr.lexpr.op == :<
                        a, b = ce.lexpr.lexpr.lexpr, ce.lexpr.lexpr.rexpr
                        if ce.lexpr.rexpr === C::CExpression[[a, :'>=', [0]], :'&&', [b, :'<', [0]]].negate and
                                        ce.rexpr.lexpr === ce.lexpr.lexpr.negate and ce.rexpr.rexpr === ce.lexpr.rexpr.negate
                                ce.replace C::CExpression[a, :'<', b]
                        end
                end
                # a && 1
                if (ce.op == :'||' or ce.op == :'&&') and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr.kind_of? ::Integer
                        if ((ce.op == :'||' and ce.rexpr.rexpr == 0) or (ce.op == :'&&' and ce.rexpr.rexpr != 0))
                                ce.replace C::CExpression[ce.lexpr]
                        elsif not walk_ce(ce) { |ce_| break true if ce.op == :funcall }      # cannot wipe if sideeffect
                                ce.replace C::CExpression[[ce.op == :'||' ? 1 : 0]]
                        end
                end
                # (b < c || b >= c)
                if (ce.op == :'||' or ce.op == :'&&') and C::CExpression.negate(ce.lexpr) == C::CExpression[ce.rexpr]
                        ce.replace C::CExpression[[(ce.op == :'||') ? 1 : 0]]
                end

                # (a < b) | (a == b)  =>  a <= b
                if ce.op == :| and ce.rexpr.kind_of? C::CExpression and ce.rexpr.op == :== and ce.lexpr.kind_of? C::CExpression and
                                (ce.lexpr.op == :< or ce.lexpr.op == :>) and ce.lexpr.lexpr == ce.rexpr.lexpr and ce.lexpr.rexpr == ce.rexpr.rexpr
                        ce.op = {:< => :<=, :> => :>=}[ce.lexpr.op]
                        ce.lexpr, ce.rexpr = ce.lexpr.lexpr, ce.lexpr.rexpr
                end

                # a == 0  =>  !a
                if ce.op == :== and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr == 0
                        ce.lexpr, ce.op, ce.rexpr = nil, :'!', ce.lexpr
                end

                if ce.op == :'!' and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr.kind_of? ::Integer
                        ce.replace C::CExpression[[ce.rexpr.rexpr == 0 ? 1 : 0]]
                end

                # !(bool) => bool
                if ce.op == :'!' and ce.rexpr.kind_of? C::CExpression and [:'==', :'!=', :<, :>, :<=, :>=, :'||', :'&&', :'!'].include? ce.rexpr.op
                        ce.replace ce.rexpr.negate
                end

                # (foo)(bar)x => (foo)x
                if not ce.op and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr.kind_of? C::CExpression
                        ce.rexpr = ce.rexpr.rexpr
                end

                # &struct.1stmember => &struct
                if ce.op == :& and not ce.lexpr and ce.rexpr.kind_of? C::CExpression and ce.rexpr.op == :'.' and s = ce.rexpr.lexpr.type and
                                s.kind_of? C::Union and s.offsetof(@c_parser, ce.rexpr.rexpr) == 0
                        ce.rexpr = ce.rexpr.lexpr
                        ce.type = C::Pointer.new(ce.rexpr.type)
                end

                # (1stmember*)structptr => &structptr->1stmember
                if not ce.op and ce.type.pointer? and not ce.type.pointed.void? and ce.rexpr.kind_of? C::Typed and ce.rexpr.type.pointer? and
                                s = ce.rexpr.type.pointed.untypedef and s.kind_of? C::Union and ce.type.pointed.untypedef != s
                        ce.rexpr = C::CExpression[structoffset(s, ce.rexpr, 0, sizeof(ce.type.pointed))]
                        #ce.replace ce.rexpr if not ce.type.pointed.untypedef.kind_of? C::Function or (ce.rexpr.type.pointer? and
                        #ce.rexpr.type.pointed.untypedef.kind_of? C::Function)      # XXX ugly
                        # int32* v1 = (int32*)pstruct;
                        # z = v1+4  if v1 is not cast, the + is invalid (sizeof pointed changes)
                        # TODO when finding type of pstruct, set type of v1 accordingly
                end

                # (&foo)->bar => foo.bar
                if ce.op == :'->' and ce.lexpr.kind_of? C::CExpression and ce.lexpr.op == :& and not ce.lexpr.lexpr
                        ce.lexpr = ce.lexpr.rexpr
                        ce.op = :'.'
                end

                # (foo)bla => bla if bla of type foo
                if not ce.op and ce.rexpr.kind_of? C::Typed and sametype[ce.type, ce.rexpr.type]
                        ce.replace C::CExpression[ce.rexpr]
                end
                if ce.lexpr.kind_of? C::CExpression and not ce.lexpr.op and ce.lexpr.rexpr.kind_of? C::Variable and ce.lexpr.type == ce.lexpr.rexpr.type
                        ce.lexpr = ce.lexpr.rexpr
                end

                if ce.op == :'=' and ce.lexpr.kind_of? C::CExpression and ce.lexpr.op == :* and not ce.lexpr.lexpr and ce.lexpr.rexpr.kind_of? C::CExpression and
                                not ce.lexpr.rexpr.op and ce.lexpr.rexpr.type.pointer? and ce.lexpr.rexpr.type.pointed != ce.rexpr.type
                        ce.lexpr.rexpr.type = C::Pointer.new(ce.rexpr.type)
                        optimize_code(ce.lexpr)
                end
        }

        # if there is a ptr[4], change all *ptr to ptr[0] for consistency
        # do this after the first pass, which may change &*ptr to ptr
        walk_ce(scope) { |ce|
                if ce.op == :* and not ce.lexpr and ce.rexpr.kind_of? C::Variable and future_array.include? ce.rexpr.name
                        ce.lexpr, ce.op, ce.rexpr = ce.rexpr, :'[]', C::CExpression[0]
                end
        } if not future_array.empty?

        # if (x != 0) => if (x)
        walk(scope) { |st|
                if st.kind_of? C::If and st.test.kind_of? C::CExpression and st.test.op == :'!=' and
                                st.test.rexpr.kind_of? C::CExpression and not st.test.rexpr.op and st.test.rexpr.rexpr == 0
                        st.test = C::CExpression[st.test.lexpr]
                end
        }
end
optimize_ctrl(scope) click to toggle source

optimize if() { a; } to if() a;

# File metasm/decompile.rb, line 537
def optimize_ctrl(scope)
        simplify_goto(scope, true)

        # break/continue
        # XXX if (foo) while (bar) goto bla; bla:  should => break
        walk = lambda { |e, brk, cnt|
                case e
                when C::Block
                        walk[e.statements, brk, cnt]
                        e
                when ::Array
                        e.each_with_index { |st, i|
                                case st
                                when C::While, C::DoWhile
                                        l1 = (e[i+1].name if e[i+1].kind_of? C::Label)
                                        l2 = (e[i-1].name if e[i-1].kind_of? C::Label)
                                        e[i].body = walk[st.body, l1, l2]
                                else
                                        e[i] = walk[st, brk, cnt]
                                end
                        }
                        e
                when C::If
                        e.bthen = walk[e.bthen, brk, cnt] if e.bthen
                        e.belse = walk[e.belse, brk, cnt] if e.belse
                        e
                when C::While, C::DoWhile
                        e.body = walk[e.body, nil, nil]
                        e
                when C::Goto
                        if e.target == brk
                                C::Break.new
                        elsif e.target == cnt
                                C::Continue.new
                        else e
                        end
                else e
                end
        }
        walk[scope, nil, nil]

        remove_labels(scope)

        # while (1) { a; if(b) { c; return; }; d; }  =>  while (1) { a; if (b) break; d; } c;
        while st = scope.statements.last and st.kind_of? C::While and st.test.kind_of? C::CExpression and
                        not st.test.op and st.test.rexpr == 1 and st.body.kind_of? C::Block
                break if not i = st.body.statements.find { |ist|
                        ist.kind_of? C::If and not ist.belse and ist.bthen.kind_of? C::Block and ist.bthen.statements.last.kind_of? C::Return
                }
                walk(i.bthen.statements) { |sst| sst.outer = i.bthen.outer if sst.kind_of? C::Block and sst.outer == i.bthen }
                scope.statements.concat i.bthen.statements
                i.bthen = C::Break.new
        end

        patch_test = lambda { |ce|
                ce = ce.rexpr if ce.kind_of? C::CExpression and ce.op == :'!'
                # if (a+1)  =>  if (a != -1)
                if ce.kind_of? C::CExpression and (ce.op == :+ or ce.op == :-) and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr.kind_of? ::Integer and ce.lexpr
                        ce.rexpr.rexpr = -ce.rexpr.rexpr if ce.op == :+
                        ce.op = :'!='
                end
        }

        walk(scope) { |ce|
                case ce
                when C::If
                        patch_test[ce.test]
                        if ce.bthen.kind_of? C::Block
                                case ce.bthen.statements.length
                                when 1
                                        walk(ce.bthen.statements) { |sst| sst.outer = ce.bthen.outer if sst.kind_of? C::Block and sst.outer == ce.bthen }
                                        ce.bthen = ce.bthen.statements.first
                                when 0
                                        if not ce.belse and i = ce.bthen.outer.statements.index(ce)
                                                ce.bthen.outer.statements[i] = ce.test   # TODO remove sideeffectless parts
                                        end
                                end
                        end
                        if ce.belse.kind_of? C::Block and ce.belse.statements.length == 1
                                walk(ce.belse.statements) { |sst| sst.outer = ce.belse.outer if sst.kind_of? C::Block and sst.outer == ce.belse }
                                ce.belse = ce.belse.statements.first
                        end
                when C::While, C::DoWhile
                        patch_test[ce.test]
                        if ce.body.kind_of? C::Block
                               case ce.body.statements.length
                               when 1
                                       walk(ce.body.statements) { |sst| sst.outer = ce.body.outer if sst.kind_of? C::Block and sst.outer == ce.body }
                                       ce.body = ce.body.statements.first
                               when 0
                                       if ce.kind_of? C::DoWhile and i = ce.body.outer.statements.index(ce)
                                              ce = ce.body.outer.statements[i] = C::While.new(ce.test, ce.body)
                                       end
                                       ce.body = nil
                               end
                        end
                end
        }
        walk(scope) { |ce|
                next if not ce.kind_of? C::Block
                st = ce.statements
                st.length.times { |n|
                        while st[n].kind_of? C::If and st[n+1].kind_of? C::If and not st[n].belse and not st[n+1].belse and (
                                        (st[n].bthen.kind_of? C::Return and st[n+1].bthen.kind_of? C::Return and st[n].bthen.value == st[n+1].bthen.value) or
                                        (st[n].bthen.kind_of? C::Break and st[n+1].bthen.kind_of? C::Break) or
                                        (st[n].bthen.kind_of? C::Continue and st[n+1].bthen.kind_of? C::Continue))
                                # if (a) return x; if (b) return x; => if (a || b) return x;
                                st[n].test = C::CExpression[st[n].test, :'||', st[n+1].test]
                                st.delete_at(n+1)
                        end
                }
        }
end
optimize_global() click to toggle source
# File metasm/decompile.rb, line 2417
def optimize_global
        # check all global vars (pointers to global data)
        tl = @c_parser.toplevel
        vars = tl.symbol.keys.find_all { |k| tl.symbol[k].kind_of? C::Variable and not tl.symbol[k].type.kind_of? C::Function }
        countref = Hash.new(0)

        walk_ce(tl) { |ce|
                # XXX int foo; void bar() { int foo; }  =>  false negative
                countref[ce.rexpr.name] += 1 if ce.rexpr.kind_of? C::Variable
                countref[ce.lexpr.name] += 1 if ce.lexpr.kind_of? C::Variable
        }

        vars.delete_if { |v| countref[v] == 0 }
        countref.delete_if { |k, v| not vars.include? k }

        # by default globals are C::Arrays
        # if all references are *foo, dereference the var type
        # TODO allow foo to appear (change to &foo) (but still disallow casts/foo+12 etc)
        countderef = Hash.new(0)
        walk_ce(tl) { |ce|
                if ce.op == :* and not ce.lexpr
                        r = ce.rexpr
                elsif ce.op == :'->'
                        r = C::CExpression[ce.lexpr]
                else next
                end
                # compare type.type cause var is an Array and the cast is a Pointer
                countderef[r.rexpr.name] += 1 if r.kind_of? C::CExpression and not r.op and r.rexpr.kind_of? C::Variable and
                                sizeof(nil, r.type.type) == sizeof(nil, r.rexpr.type.type) rescue nil
        }
        vars.each { |n|
                if countref[n] == countderef[n]
                        v = tl.symbol[n]
                        target = C::CExpression[:*, [v]]
                        v.type = v.type.type
                        v.initializer = v.initializer.first if v.initializer.kind_of? ::Array
                        walk_ce(tl) { |ce|
                                if ce.op == :'->' and C::CExpression[ce.lexpr] == C::CExpression[v]
                                        ce.op = :'.'
                                elsif ce.lexpr == target
                                        ce.lexpr = v
                                end
                                ce.rexpr = v if ce.rexpr == target
                                ce.lexpr, ce.op, ce.rexpr = nil, nil, v if ce == target
                        }
                end
        }

        # if a global var appears only in one function, make it a static variable
        tl.statements.each { |st|
                next if not st.kind_of? C::Declaration or not st.var.type.kind_of? C::Function or not scope = st.var.initializer
                localcountref = Hash.new(0)
                walk_ce(scope) { |ce|
                        localcountref[ce.rexpr.name] += 1 if ce.rexpr.kind_of? C::Variable
                        localcountref[ce.lexpr.name] += 1 if ce.lexpr.kind_of? C::Variable
                }

                vars.delete_if { |n|
                        next if scope.symbol[n]
                        next if localcountref[n] != countref[n]
                        v = tl.symbol.delete(n)
                        tl.statements.delete_if { |d| d.kind_of? C::Declaration and d.var.name == n }

                        if countref[n] == 1 and v.initializer.kind_of? C::CExpression and v.initializer.rexpr.kind_of? String
                                walk_ce(scope) { |ce|
                                        if ce.rexpr.kind_of? C::Variable and ce.rexpr.name == n
                                                if not ce.op
                                                        ce.replace v.initializer
                                                else
                                                        ce.rexpr = v.initializer
                                                end
                                        elsif ce.lexpr.kind_of? C::Variable and ce.lexpr.name == n
                                                ce.lexpr = v.initializer
                                        end
                                }
                        else
                                v.storage = :static
                                scope.symbol[v.name] = v
                                scope.statements.unshift C::Declaration.new(v)
                        end

                        true
                }
        }
end
optimize_vars(scope) click to toggle source

dataflow optimization condenses expressions (++x; if (x) => if (++x)) remove local var assignment (x = 1; f(x); x = 2; g(x); => f(1); g(2); etc)

# File metasm/decompile.rb, line 1972
def optimize_vars(scope)
        return if forbid_optimize_dataflow

        g = c_to_graph(scope)

        # walks a cexpr in evaluation order (not strictly, but this is not strictly defined anyway..)
        # returns the first subexpr to read var in ce
        # returns :write if var is rewritten
        # returns nil if var not read
        # may return a cexpr var += 2
        find_next_read_ce = lambda { |ce_, var|
                walk_ce(ce_, true) { |ce|
                        case ce.op
                        when :funcall
                                break ce if ce.lexpr == var or ce.rexpr.find { |a| a == var }
                        when :'='
                                # a=a  /  a=a+1  => yield a, not :write
                                break ce if ce.rexpr == var
                                break :write if ce.lexpr == var
                        else
                                break ce if ce.lexpr == var or ce.rexpr == var
                        end
                }
        }

        # badlabels is a list of labels that may be reached without passing through the first invocation block
        find_next_read_rec = lambda { |label, idx, var, done, badlabels|
                next if done.include? label
                done << label if idx == 0

                idx += 1 while ce = g.exprs[label].to_a[idx] and not ret = find_next_read_ce[ce, var]
                next ret if ret

                to = g.to_optim[label].to_a.map { |t|
                        break [:split] if badlabels.include? t
                        find_next_read_rec[t, 0, var, done, badlabels]
                }.compact

                tw = to - [:write]
                if to.include? :split or tw.length > 1
                        :split
                elsif tw.length == 1
                        tw.first
                elsif to.include? :write
                        :write
                end
        }
        # return the previous subexpr reading var with no fwd path to another reading (otherwise split), see loop comment for reason
        find_next_read = nil
        find_prev_read_rec = lambda { |label, idx, var, done|
                next if done.include? label
                done << label if idx == g.exprs[label].length-1

                idx -= 1 while idx >= 0 and ce = g.exprs[label].to_a[idx] and not ret = find_next_read_ce[ce, var]
                if ret.kind_of? C::CExpression
                        fwchk = find_next_read[label, idx+1, var]
                        ret = fwchk if not fwchk.kind_of? C::CExpression
                end
                next ret if ret

                from = g.from_optim[label].to_a.map { |f|
                        find_prev_read_rec[f, g.exprs[f].to_a.length-1, var, done]
                }.compact

                next :split if from.include? :split
                fw = from - [:write]
                if fw.length == 1
                        fw.first
                elsif fw.length > 1
                        :split
                elsif from.include? :write
                        :write
                end
        }

        # list of labels reachable without using a label
        badlab = {}
        build_badlabel = lambda { |label|
                next if badlab[label]
                badlab[label] = []
                todo = [g.start]
                while l = todo.pop
                        next if l == label or badlab[label].include? l
                        badlab[label] << l
                        todo.concat g.to_optim[l].to_a
                end
        }

        # returns the next subexpr where var is read
        # returns :write if var is written before being read
        # returns :split if the codepath splits with both subpath reading or codepath merges with another
        # returns nil if var is never read
        # idx is the index of the first cexpr at g.exprs[label] to look at
        find_next_read = lambda { |label, idx, var|
                find_next_read_rec[label, idx, var, [], []]
        }
        find_prev_read = lambda { |label, idx, var|
                find_prev_read_rec[label, idx, var, []]
        }
        # same as find_next_read, but returns :split if there exist a path from g.start to the read without passing through label
        find_next_read_bl = lambda { |label, idx, var|
                build_badlabel[label]
                find_next_read_rec[label, idx, var, [], badlab[label]]
        }

        # walk each node, optimize data accesses there
        # replace no longer useful exprs with CExpr[nil, nil, nil], those are wiped later.
        g.exprs.each { |label, exprs|
                next if not g.block[label]
                i = 0
                while i < exprs.length
                        e = exprs[i]
                        i += 1

                        # TODO x = x + 1  =>  x += 1  =>  ++x       here, move all other optimizations after (in optim_code)
                        # needs also int & 0xffffffff -> int, *&var  etc (decomp_type? optim_type?)
                        if (e.op == :'++' or e.op == :'--') and v = (e.lexpr || e.rexpr) and v.kind_of? C::Variable and
                                        scope.symbol[v.name] and not v.type.qualifier.to_a.include? :volatile
                                next if !((pos = :post.to_sym) and (oe = find_next_read_bl[label, i, v]) and oe.kind_of? C::CExpression) and
                                        !((pos = :prev.to_sym) and (oe = find_prev_read[label, i-2, v]) and oe.kind_of? C::CExpression)
                                next if oe.op == :& and not oe.lexpr       # no &(++eax)

                                # merge pre/postincrement into next/prev var usage
                                # find_prev_read must fwd check when it finds something, to avoid
                                #  while(x) x++; return x; to be converted to while(x++); return x;  (return wrong value)
                                case oe.op
                                when e.op
                                        # bla(i--); --i   bla(--i); --i   ++i; bla(i++)  =>  ignore
                                        next if pos == :pre or oe.lexpr
                                        # ++i; bla(++i)  =>  bla(i += 2)
                                        oe.lexpr = oe.rexpr
                                        oe.op = ((oe.op == :'++') ? :'+=' : :'-=')
                                        oe.rexpr = C::CExpression[2]

                                when :'++', :'--'  # opposite of e.op
                                        if (pos == :post and not oe.lexpr) or (pos == :pre and not oe.rexpr)
                                                # ++i; bla(--i)  =>  bla(i)
                                                # bla(i--); ++i  =>  bla(i)
                                                oe.op = nil
                                        elsif pos == :post
                                                # ++i; bla(i--)  =>  bla(i+1)
                                                oe.op = ((oe.op == :'++') ? :- : :+)
                                                oe.rexpr = C::CExpression[1]
                                        elsif pos == :pre
                                                # bla(--i); ++i  =>  bla(i-1)
                                                oe.lexpr = oe.rexpr
                                                oe.op = ((oe.op == :'++') ? :+ : :-)
                                                oe.rexpr = C::CExpression[1]
                                        end
                                when :'+=', :'-='
                                        # TODO i++; i += 4  =>  i += 5
                                        next
                                when *AssignOp
                                        next      # ++i; i |= 4  =>  ignore
                                else
                                        if    pos == :post and v == oe.lexpr; oe.lexpr = C::CExpression[e.op, v]
                                        elsif pos == :post and v == oe.rexpr; oe.rexpr = C::CExpression[e.op, v]
                                        elsif pos == :prev and v == oe.rexpr; oe.rexpr = C::CExpression[v, e.op]
                                        elsif pos == :prev and v == oe.lexpr; oe.lexpr = C::CExpression[v, e.op]
                                        else raise 'foobar'       # find_dir_read failed
                                        end
                                end

                                i -= 1
                                exprs.delete_at(i)
                                e.lexpr = e.op = e.rexpr = nil


                        elsif e.op == :'=' and v = e.lexpr and v.kind_of? C::Variable and scope.symbol[v.name] and
                                        not v.type.qualifier.to_a.include? :volatile and not find_next_read_ce[e.rexpr, v]

                                # reduce trivial static assignments
                                if (e.rexpr.kind_of? C::CExpression and iv = e.rexpr.reduce(@c_parser) and iv.kind_of? ::Integer) or
                                   (e.rexpr.kind_of? C::CExpression and e.rexpr.op == :& and not e.rexpr.lexpr and e.rexpr.lexpr.kind_of? C::Variable) or
                                   (e.rexpr.kind_of? C::Variable and e.rexpr.type.kind_of? C::Array)
                                        rewritten = false
                                        readers = []
                                        discard = [e]
                                        g.exprs.each { |l, el|
                                                el.each_with_index { |ce, ci|
                                                        if ce_write(ce, v) and [label, i-1] != [l, ci]
                                                                if ce == e
                                                                        discard << ce
                                                                else
                                                                        rewritten = true
                                                                        break
                                                                end
                                                        elsif ce_read(ce, v)
                                                                if walk_ce(ce) { |_ce| break true if _ce.op == :& and not _ce.lexpr and _ce.rexpr == v }
                                                                        # i = 2 ; j = &i  =!>  j = &2
                                                                        rewritten = true
                                                                        break
                                                                end
                                                                readers << ce
                                                        end
                                                } if not rewritten
                                        }
                                        if not rewritten
                                                ce_patch(readers, v, C::CExpression[iv || e.rexpr])
                                                discard.each { |d| d.lexpr = d.op = d.rexpr = nil }
                                                next
                                        end
                                end

                                case nr = find_next_read[label, i, v]
                                when C::CExpression
                                        # read in one place only, try to patch rexpr in there
                                        r = e.rexpr

                                        # must check for conflicts (x = y; y += 1; foo(x)  =!>  foo(y))
                                        # XXX x = a[1]; *(a+1) = 28; foo(x)...
                                        isfunc = false
                                        depend_vars = []
                                        walk_ce(C::CExpression[r]) { |ce|
                                                isfunc = true if ce.op == :func and (not ce.lexpr.kind_of? C::Variable or
                                                                not ce.lexpr.has_attribute('pure'))    # XXX is there a C attr for func depending only on staticvars+param ?
                                                depend_vars << ce.lexpr if ce.lexpr.kind_of? C::Variable
                                                depend_vars << ce.rexpr if ce.rexpr.kind_of? C::Variable and (ce.lexpr or ce.op != :&)   # a = &v; v = 12; func(a) => func(&v)
                                                depend_vars << ce if ce.lvalue?
                                                depend_vars.concat(ce.rexpr.grep(C::Variable)) if ce.rexpr.kind_of? ::Array
                                        }
                                        depend_vars.uniq!

                                        # XXX x = 1; if () { x = 2; } foo(x)  =!>  foo(1)  (find_next_read will return this)
                                        #     we'll just redo a find_next_read like
                                        # XXX b = &a; a = 1; *b = 2; foo(a)  unhandled & generate bad C
                                        l_l = label
                                        l_i = i
                                        while g.exprs[l_l].to_a.each_with_index { |ce_, n_i|
                                                next if n_i < l_i
                                                # count occurences of read v in ce_
                                                cnt = 0
                                                bad = false
                                                walk_ce(ce_) { |ce|
                                                        case ce.op
                                                        when :funcall
                                                                bad = true if isfunc
                                                                ce.rexpr.each { |a| cnt += 1 if a == v }
                                                                cnt += 1 if ce.lexpr == v
                                                        when :'='
                                                                bad = true if depend_vars.include? ce.lexpr
                                                                cnt += 1 if ce.rexpr == v
                                                        else
                                                                bad = true if (ce.op == :'++' or ce.op == :'--') and depend_vars.include? ce.rexpr
                                                                bad = true if AssignOp.include? ce.op and depend_vars.include? ce.lexpr
                                                                cnt += 1 if ce.lexpr == v
                                                                cnt += 1 if ce.rexpr == v
                                                        end
                                                }
                                                case cnt
                                                when 0
                                                        break if bad
                                                        next
                                                when 1   # good
                                                        break if e.complexity > 10 and ce_.complexity > 3       # try to keep the C readable
                                                        # x = 1; y = x; z = x;  =>  cannot suppress x
                                                        nr = find_next_read[l_l, n_i+1, v]
                                                        break if (nr.kind_of? C::CExpression or nr == :split) and not walk_ce(ce_) { |ce| break true if ce.op == :'=' and ce.lexpr == v }
                                                else break       # a = 1; b = a + a  => fail
                                                end

                                                # TODO XXX x = 1; y = x; z = x;
                                                res = walk_ce(ce_, true) { |ce|
                                                        case ce.op
                                                        when :funcall
                                                                if ce.rexpr.to_a.each_with_index { |a,i_|
                                                                        next if a != v
                                                                        ce.rexpr[i_] = r
                                                                        break :done
                                                                } == :done
                                                                        break :done
                                                                elsif ce.lexpr == v
                                                                        ce.lexpr = r
                                                                        break :done
                                                                elsif isfunc
                                                                        break :fail
                                                                end
                                                        when *AssignOp
                                                                break :fail if not ce.lexpr and depend_vars.include? ce.rexpr  # ++depend
                                                                if ce.rexpr == v
                                                                        ce.rexpr = r
                                                                        break :done
                                                                elsif ce.lexpr == v or depend_vars.include? ce.lexpr
                                                                        break :fail
                                                                end
                                                        else
                                                                break :fail if ce.op == :& and not ce.lexpr and ce.rexpr == v
                                                                if ce.lexpr == v
                                                                        ce.lexpr = r
                                                                        break :done
                                                                elsif ce.rexpr == v
                                                                        ce_.type = r.type if not ce_.op and ce_.rexpr == v    # return (int32)eax
                                                                        ce.rexpr = r
                                                                        break :done
                                                                end
                                                        end
                                                }
                                                case res
                                                when :done
                                                        i -= 1
                                                        exprs.delete_at(i)
                                                        e.lexpr = e.op = e.rexpr = nil
                                                        break
                                                when :fail
                                                        break
                                                end
                                        }
                                                # ignore branches that will never reuse v
                                                may_to = g.to_optim[l_l].find_all { |to| find_next_read[to, 0, v].kind_of? C::CExpression }
                                                if may_to.length == 1 and to = may_to.first and to != l_l and g.from_optim[to] == [l_l]
                                                        l_i = 0
                                                        l_l = to
                                                else break
                                                end
                                        end

                                when nil, :write
                                        # useless assignment (value never read later)
                                        # XXX foo = &bar; bar = 12; baz(*foo)
                                        e.replace(C::CExpression[e.rexpr])
                                        # remove sideeffectless subexprs
                                        loop do
                                                case e.op
                                                when :funcall, *AssignOp
                                                else
                                                        l = (e.lexpr.kind_of? C::CExpression and sideeffect(e.lexpr))
                                                        r = (e.rexpr.kind_of? C::CExpression and sideeffect(e.rexpr))
                                                        if l and r      # could split...
                                                        elsif l
                                                                e.replace(e.lexpr)
                                                                next
                                                        elsif r
                                                                e.replace(e.rexpr)
                                                                next
                                                        else # remove the assignment altogether
                                                                i -= 1
                                                                exprs.delete_at(i)
                                                                e.lexpr = e.op = e.rexpr = nil
                                                        end
                                                end
                                                break
                                        end
                                end
                        end
                end
        }

        # wipe cexprs marked in the previous step
        walk(scope) { |st|
                next if not st.kind_of? C::Block
                st.statements.delete_if { |e| e.kind_of? C::CExpression and not e.lexpr and not e.op and not e.rexpr }
        }

        # reoptimize cexprs
        walk_ce(scope, true) { |ce|
                # redo some simplification that may become available after variable propagation
                # int8 & 255  =>  int8
                if ce.op == :& and ce.lexpr and ce.lexpr.type.integral? and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr == (1 << (8*sizeof(ce.lexpr))) - 1
                        ce.replace C::CExpression[ce.lexpr]
                end

                # int *ptr; *(ptr + 4) => ptr[4]
                if ce.op == :* and not ce.lexpr and ce.rexpr.kind_of? C::CExpression and ce.rexpr.op == :+ and var = ce.rexpr.lexpr and var.kind_of? C::Variable and var.type.pointer?
                        ce.lexpr, ce.op, ce.rexpr = ce.rexpr.lexpr, :'[]', ce.rexpr.rexpr
                end

                # useless casts
                if not ce.op and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and (ce.rexpr.rexpr.kind_of? C::CExpression or
                                (ce.type.pointer? and ce.rexpr.rexpr == 0 and not ce.type.pointed.untypedef.kind_of? C::Union))    # keep ((struct*)0)->memb
                        ce.rexpr = ce.rexpr.rexpr
                end
                if not ce.op and ce.rexpr.kind_of? C::CExpression and (ce.type == ce.rexpr.type or (ce.type.integral? and ce.rexpr.type.integral?))
                        ce.replace ce.rexpr
                end
                # useless casts (type)*((oeua)Ptype)
                if not ce.op and ce.rexpr.kind_of? C::CExpression and ce.rexpr.op == :* and not ce.rexpr.lexpr and ce.rexpr.rexpr.kind_of? C::CExpression and not ce.rexpr.rexpr.op and
                                p = ce.rexpr.rexpr.rexpr and p.kind_of? C::Typed and p.type.pointer? and ce.type == p.type.pointed
                        ce.op = ce.rexpr.op
                        ce.rexpr = ce.rexpr.rexpr.rexpr
                end
                # (a > 0) != 0
                if ce.op == :'!=' and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr == 0 and ce.lexpr.kind_of? C::CExpression and
                                [:<, :<=, :>, :>=, :'==', :'!=', :'!'].include? ce.lexpr.op
                        ce.replace ce.lexpr
                end
                # a == 0  =>  !a
                if ce.op == :== and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr == 0
                        ce.replace C::CExpression[:'!', ce.lexpr]
                end
                # !(int)a  =>  !a
                if ce.op == :'!' and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr.kind_of? C::CExpression
                        ce.rexpr = ce.rexpr.rexpr
                end
                # (int)a < (int)b  =>  a < b TODO uint <-> int
                if [:<, :<=, :>, :>=].include? ce.op and ce.rexpr.kind_of? C::CExpression and ce.lexpr.kind_of? C::CExpression and not ce.rexpr.op and not ce.lexpr.op and
                        ce.rexpr.rexpr.kind_of? C::CExpression and ce.rexpr.rexpr.type.pointer? and ce.lexpr.rexpr.kind_of? C::CExpression and ce.lexpr.rexpr.type.pointer?
                        ce.rexpr = ce.rexpr.rexpr
                        ce.lexpr = ce.lexpr.rexpr
                end

                # a & 3 & 1
                while (ce.op == :& or ce.op == :|) and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr.kind_of? ::Integer and
                                ce.lexpr.kind_of? C::CExpression and ce.lexpr.op == ce.op and ce.lexpr.lexpr and
                                ce.lexpr.rexpr.kind_of? C::CExpression and ce.lexpr.rexpr.rexpr.kind_of? ::Integer
                        ce.lexpr, ce.rexpr.rexpr = ce.lexpr.lexpr, ce.lexpr.rexpr.rexpr.send(ce.op, ce.rexpr.rexpr)
                end

                # x = x | 4 => x |= 4
                if ce.op == :'=' and ce.rexpr.kind_of? C::CExpression and [:+, :-, :*, :/, :|, :&, :^, :>>, :<<].include? ce.rexpr.op and ce.rexpr.lexpr == ce.lexpr
                        ce.op = (ce.rexpr.op.to_s + '=').to_sym
                        ce.rexpr = ce.rexpr.rexpr
                end

                # x += 1 => ++x
                if (ce.op == :'+=' or ce.op == :'-=') and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr == 1
                        ce.lexpr, ce.op, ce.rexpr = nil, {:'+=' => :'++', :'-=' => :'--'}[ce.op], ce.lexpr
                end

                # --x+1 => x--
                if (ce.op == :+ or ce.op == :-) and ce.lexpr.kind_of? C::CExpression and ce.lexpr.op == {:+ => :'--', :- => :'++'}[ce.op] and
                                ce.lexpr.rexpr and ce.rexpr.kind_of? C::CExpression and not ce.rexpr.op and ce.rexpr.rexpr == 1
                        ce.lexpr, ce.op, ce.rexpr = ce.lexpr.rexpr, ce.lexpr.op, nil
                end
        }
end
redecompile(name) click to toggle source

redecompile a function, redecompiles functions calling it if its prototype changed

# File metasm/decompile.rb, line 158
def redecompile(name)
        @c_parser.toplevel.statements.delete_if { |st| st.kind_of? C::Declaration and st.var.name == name }
        oldvar = @c_parser.toplevel.symbol.delete name

        decompile_func(name)

        if oldvar and newvar = @c_parser.toplevel.symbol[name] and oldvar.type.kind_of? C::Function and newvar.type.kind_of? C::Function
                o, n = oldvar.type, newvar.type
                if o.type != n.type or o.args.to_a.length != n.args.to_a.length or o.args.to_a.zip(n.args.to_a).find { |oa, na| oa.type != na.type }
                        # XXX a may depend on b and c, and b may depend on c -> redecompile c twice
                        # XXX if the dcmp is unstable, may also infinite loop on mutually recursive funcs..
                        @c_parser.toplevel.statements.dup.each { |st|
                                next if not st.kind_of? C::Declaration
                                next if not st.var.initializer
                                next if st.var.name == name
                                next if not walk_ce(st) { |ce| break true if ce.op == :funcall and ce.lexpr.kind_of? C::Variable and ce.lexpr.name == name }
                                redecompile(st.var.name)
                        }
                end
        end
end
remove_labels(scope) click to toggle source

remove unused labels

# File metasm/decompile.rb, line 881
def remove_labels(scope)
        return if forbid_optimize_labels

        used = []
        walk(scope) { |ss|
                used |= [ss.target] if ss.kind_of? C::Goto
        }
        walk(scope) { |s|
                next if not s.kind_of? C::Block
                s.statements.delete_if { |l|
                        l.kind_of? C::Label and not used.include? l.name
                }
        }

        # remove implicit continue; at end of loop
        walk(scope) { |s|
                next if not s.kind_of? C::While
                if s.body.kind_of? C::Block and s.body.statements.last.kind_of? C::Continue
                        s.body.statements.pop
                end
        }
end
remove_unreferenced_vars(scope) click to toggle source
# File metasm/decompile.rb, line 2398
def remove_unreferenced_vars(scope)
        used = {}
        walk_ce(scope) { |ce|
                # remove unreferenced local vars
                used[ce.rexpr.name] = true if ce.rexpr.kind_of? C::Variable
                used[ce.lexpr.name] = true if ce.lexpr.kind_of? C::Variable
                ce.rexpr.each { |v| used[v.name] = true if v.kind_of? C::Variable } if ce.rexpr.kind_of?(::Array)
        }
        unused = scope.symbol.keys.find_all { |n| not used[n] }
        unused.each { |v| scope.symbol[v].add_attribute 'unused' }    # fastcall args need it
        scope.statements.delete_if { |sm| sm.kind_of? C::Declaration and unused.include? sm.var.name }
        scope.symbol.delete_if { |n, v| unused.include? n }
end
rename_variables(scope) click to toggle source

rename local variables from subfunc arg names

# File metasm/decompile.rb, line 2558
def rename_variables(scope)
        funcs = []
        cntrs = []
        cmpi = []

        walk_ce(scope) { |ce|
                funcs << ce if ce.op == :funcall
                cntrs << (ce.lexpr || ce.rexpr) if ce.op == :'++'
                cmpi << ce.lexpr if [:<, :>, :<=, :>=, :==, :'!='].include? ce.op and ce.rexpr.kind_of? C::CExpression and ce.rexpr.rexpr.kind_of? ::Integer
        }

        rename = lambda { |var, name|
                var = var.rexpr if var.kind_of? C::CExpression and not var.op
                next if not var.kind_of? C::Variable or not scope.symbol[var.name] or not name
                next if (var.name !~ /^(var|arg)_/ and not var.storage == :register) or not scope.symbol[var.name] or name =~ /^(var|arg)_/
                s = scope.symbol_ancestors
                n = name
                i = 0
                n = name + "#{i+=1}" while s[n]
                scope.symbol[n] = scope.symbol.delete(var.name)
                var.name = n
        }

        funcs.each { |ce|
                next if not ce.lexpr.kind_of? C::Variable or not ce.lexpr.type.kind_of? C::Function
                ce.rexpr.to_a.zip(ce.lexpr.type.args.to_a).each { |a, fa| rename[a, fa.name] if fa }
        }
        funcs.each { |ce|
                next if not ce.lexpr.kind_of? C::Variable or not ce.lexpr.type.kind_of? C::Function
                ce.rexpr.to_a.zip(ce.lexpr.type.args.to_a).each { |a, fa|
                        next if not a.kind_of? C::CExpression or a.op != :& or a.lexpr
                        next if not fa or not fa.name
                        rename[a.rexpr, fa.name.sub(/^l?p/, '')]
                }
        }
        (cntrs & cmpi).each { |v| rename[v, 'cntr'] }
end
sideeffect(exp, scope=nil) click to toggle source

checks if an expr has sideeffects (funcall, var assignment, mem dereference, use var out of scope if specified)

# File metasm/decompile.rb, line 1867
def sideeffect(exp, scope=nil)
        case exp
        when nil, ::Numeric, ::String; false
        when ::Array; exp.any? { |_e| sideeffect _e, scope }
        when C::Variable; (scope and not scope.symbol[exp.name]) or exp.type.qualifier.to_a.include? :volatile
        when C::CExpression; (exp.op == :* and not exp.lexpr) or exp.op == :funcall or AssignOp.include?(exp.op) or
                        sideeffect(exp.lexpr, scope) or sideeffect(exp.rexpr, scope)
        else true     # failsafe
        end
end
simplify_goto(scope, keepret = false) click to toggle source

simplify goto -> goto / goto -> return

# File metasm/decompile.rb, line 444
def simplify_goto(scope, keepret = false)
        if not keepret and scope.statements[-1].kind_of? C::Return and not scope.statements[-2].kind_of? C::Label
                scope.statements.insert(-2, C::Label.new("ret_label"))
        end

        jumpto = {}
        walk(scope) { |s|
                next if not s.kind_of? C::Block
                s.statements.each_with_index { |ss, i|
                        case ss
                        when C::Goto, C::Return
                                while l = s.statements[i -= 1] and l.kind_of? C::Label
                                        jumpto[l.name] = ss
                                end
                        end
                }
        }

        simpler = lambda { |s|
                case s
                when C::Goto
                        if jumpto[s.target]
                                r = jumpto[s.target].dup
                                r.value = r.value.deep_dup if r.kind_of? C::Return and r.value.kind_of? C::CExpression
                                r
                        end
                when C::Return
                        if not keepret and scope.statements[-1].kind_of? C::Return and s.value == scope.statements[-1].value and s != scope.statements[-1]
                                C::Goto.new(scope.statements[-2].name)
                        end
                end
        }

        walk(scope) { |s|
                case s
                when C::Block
                        s.statements.each_with_index { |ss, i|
                                if sp = simpler[ss]
                                        ss = s.statements[i] = sp
                                end
                        }
                when C::If
                        if sp = simpler[s.bthen]
                                s.bthen = sp
                        end
                end
        }

        # remove unreferenced labels
        remove_labels(scope)

        walk(scope) { |s|
                next if not s.kind_of? C::Block
                del = false
                # remove dead code  goto a; goto b; if (0) { z: bla; }  => rm goto b
                s.statements.delete_if { |st|
                        case st
                        when C::Goto, C::Return
                                olddel = del
                                del = true
                                olddel
                        else
                                del = false
                        end
                }
                # if () { goto x; } x:
                s.statements.each_with_index { |ss, i|
                        if ss.kind_of? C::If
                                t = ss.bthen
                                t = t.statements.first if t.kind_of? C::Block
                                if t.kind_of? C::Goto and s.statements[i+1].kind_of? C::Label and s.statements[i+1].name == t.target
                                        ss.bthen = C::Block.new(scope)
                                end
                        end
                }
        }

        remove_labels(scope)
end
simplify_varname_noalias(scope) click to toggle source

revert the unaliasing namechange of vars where no alias subsists

# File metasm/decompile.rb, line 1150
def simplify_varname_noalias(scope)
        names = scope.symbol.keys
        names.delete_if { |k|
                next if not b = k[/^(.*)_a\d+$/, 1]
                next if scope.symbol[k].stackoff.to_i > 0
                if not names.find { |n| n != k and (n == b or n[/^(.*)_a\d+$/, 1] == b) }
                        scope.symbol[b] = scope.symbol.delete(k)
                        scope.symbol[b].name = b
                end
        }
end
sizeof(var, type=nil) click to toggle source

forwards to @c_parser, handles cast to Array (these should not happen btw…)

# File metasm/decompile.rb, line 2651
def sizeof(var, type=nil)
        var, type = nil, var if var.kind_of? C::Type and not type
        type ||= var.type
        return @c_parser.typesize[:ptr] if type.kind_of? C::Array and not var.kind_of? C::Variable
        @c_parser.sizeof(var, type) rescue -1
end
stackoff_to_varname(off) click to toggle source

give a name to a stackoffset (relative to start of func) 4 => :arg_0, -8 => :var_4 etc

# File metasm/decompile.rb, line 381
def stackoff_to_varname(off)
        if off >= @c_parser.typesize[:ptr]; 'arg_%X' % ( off-@c_parser.typesize[:ptr])        #  4 => arg_0,  8 => arg_4..
        elsif off > 0; 'arg_0%X' % off
        elsif off == 0; 'retaddr'
        elsif off <= -@dasm.cpu.size/8; 'var_%X' % (-off-@dasm.cpu.size/8)    # -4 => var_0, -8 => var_4..
        else 'var_0%X' % -off
        end
end
structoffset(st, ptr, off, msz) click to toggle source

struct foo { int i; int j; struct { int k; int l; } m; }; bla+12 => &bla->m.l st is a struct, ptr is an expr pointing to a struct, off is a numeric offset from ptr, msz is the size of the pointed member (nil ignored)

# File metasm/decompile.rb, line 1520
def structoffset(st, ptr, off, msz)
        tabidx = off / sizeof(st)
        off -= tabidx * sizeof(st)
        ptr = C::CExpression[:&, [ptr, :'[]', [tabidx]]] if tabidx != 0 or ptr.type.untypedef.kind_of? C::Array
        return ptr if off == 0 and (not msz or        # avoid infinite recursion with eg chained list
                        (ptr.kind_of? C::CExpression and ((ptr.op == :& and not ptr.lexpr and s=ptr.rexpr) or (ptr.op == :'.' and s=ptr)) and
                        not s.type.untypedef.kind_of? C::Union))

        m_ptr = lambda { |m|
                if ptr.kind_of? C::CExpression and ptr.op == :& and not ptr.lexpr
                       C::CExpression[ptr.rexpr, :'.', m.name]
                else
                       C::CExpression[ptr, :'->', m.name]
                end
        }

        # recursive proc to list all named members, including in anonymous substructs
        submemb = lambda { |sm| sm.name ? sm : sm.type.kind_of?(C::Union) ? sm.type.members.to_a.map { |ssm| submemb[ssm] } : nil }
        mbs = st.members.to_a.map { |m| submemb[m] }.flatten.compact
        mo = mbs.inject({}) { |h, m| h.update m => st.offsetof(@c_parser, m.name) }

        if  sm = mbs.find { |m| mo[m] == off and (not msz or sizeof(m) == msz) } ||
                 mbs.find { |m| mo[m] <= off and mo[m]+sizeof(m) > off }
                off -= mo[sm]
                sst = sm.type.untypedef
                #return ptr if mo[sm] == 0 and sst.pointer? and sst.type.untypedef == st     # TODO fix infinite recursion on mutually recursive ptrs
                ptr = C::CExpression[:&, m_ptr[sm]]
                if sst.kind_of? C::Union
                        return structoffset(sst, ptr, off, msz)
                end
        end

        if off != 0
                C::CExpression[[[ptr], C::Pointer.new(C::BaseType.new(:__int8))], :+, [off]]
        else
                ptr
        end
end
unalias_var(var, scope, g = c_to_graph(scope)) click to toggle source

duplicates a var per domain value

# File metasm/decompile.rb, line 968
def unalias_var(var, scope, g = c_to_graph(scope))
        # [label, index] of references to var (reading it, writing it, ro/wo it (eg eax = *eax => eax_0 = *eax_1))
        read = {}
        write = {}
        ro = {}
        wo = {}

        # list of [l, i] for which domain is not known
        unchecked = []

        # mark all exprs of the graph
        # TODO handle var_14 __attribute__((out)) = &curvar <=> curvar write
        r = var.has_attribute_var('register')
        g.exprs.each { |label, exprs|
                exprs.each_with_index { |ce, i|
                        if ce_read(ce, var)
                                if (ce.op == :'=' and isvar(ce.lexpr, var) and not ce_write(ce.rexpr, var)) or
                                   (ce.op == :funcall and r and not ce_write(ce.lexpr, var) and not ce_write(ce.rexpr, var) and @dasm.cpu.abi_funcall[:changed].include?(r.to_sym))
                                        (ro[label] ||= []) << i
                                        (wo[label] ||= []) << i
                                        unchecked << [label, i, :up] << [label, i, :down]
                                else
                                        (read[label] ||= []) << i
                                        unchecked << [label, i]
                                end
                        elsif ce_write(ce, var)
                                (write[label] ||= []) << i
                                unchecked << [label, i]
                        end
                }
        }

        # stuff when filling the domain (flood algorithm)
        dom = dom_ro = dom_wo = todo_up = todo_down = func_top = nil

        # flood by walking the graph up from [l, i] (excluded)
        # marks stuff do walk down
        walk_up = lambda { |l,  i|
                todo_w = [[l, i-1]]
                done_w = []
                while o = todo_w.pop
                        next if done_w.include? o
                        done_w << o
                        l, i = o
                        loop do
                                if read[l].to_a.include? i
                                        # XXX not optimal (should mark only the uppest read)
                                        todo_down |= [[l, i]] if not dom.include? [l, i]
                                        dom |= [[l, i]]
                                elsif write[l].to_a.include? i
                                        todo_down |= [[l, i]] if not dom.include? [l, i]
                                        dom |= [[l, i]]
                                        break
                                elsif wo[l].to_a.include? i
                                        todo_down |= [[l, i]] if not dom_wo.include? [l, i, :down]
                                        dom_wo |= [[l, i, :down]]
                                        break
                                end
                                i -= 1
                                if i < 0
                                        g.from_optim[l].to_a.each { |ll|
                                                todo_w << [ll, g.exprs[ll].to_a.length-1]
                                        }
                                        func_top = true if g.from_optim[l].to_a.empty?
                                        break
                                end
                        end
                end
        }

        # flood by walking the graph down from [l, i] (excluded)
        # malks stuff to walk up
        walk_down = lambda { |l, i|
                todo_w = [[l, i+1]]
                done_w = []
                while o = todo_w.pop
                        next if done_w.include? o
                        done_w << o
                        l, i = o
                        loop do
                                if read[l].to_a.include? i
                                        todo_up |= [[l, i]] if not dom.include? [l, i]
                                        dom |= [[l, i]]
                                elsif write[l].to_a.include? i
                                        break
                                elsif ro[l].to_a.include? i
                                        todo_up |= [[l, i]] if not dom_ro.include? [l, i, :up]
                                        dom_ro |= [[l, i, :up]]
                                        break
                                end
                                i += 1
                                if i >= g.exprs[l].to_a.length
                                        g.to_optim[l].to_a.each { |ll|
                                                todo_w << [ll, 0]
                                        }
                                        break
                                end
                        end
                end
        }

        # check it out
        while o = unchecked.shift
                dom = []
                dom_ro = []
                dom_wo = []
                func_top = false

                todo_up = []
                todo_down = []

                # init
                if read[o[0]].to_a.include? o[1]
                        todo_up << o
                        todo_down << o
                        dom << o
                elsif write[o[0]].to_a.include? o[1]
                        todo_down << o
                        dom << o
                elsif o[2] == :up
                        todo_up << o
                        dom_ro << o
                elsif o[2] == :down
                        todo_down << o
                        dom_wo << o
                else raise
                end

                # loop
                while todo_up.first or todo_down.first
                        todo_up.each { |oo| walk_up[oo[0], oo[1]] }
                        todo_up.clear

                        todo_down.each { |oo| walk_down[oo[0], oo[1]] }
                        todo_down.clear
                end

                unchecked -= dom + dom_wo + dom_ro

                next if func_top

                # patch
                n_i = 0
                n_i += 1 while scope.symbol_ancestors[newvarname = "#{var.name}_a#{n_i}"]

                nv = var.dup
                nv.storage = :register if nv.has_attribute_var('register')
                nv.attributes = nv.attributes.dup if nv.attributes
                nv.name = newvarname
                scope.statements << C::Declaration.new(nv)
                scope.symbol[nv.name] = nv

                dom.each { |oo| ce_patch(g.exprs[oo[0]][oo[1]], var, nv) }
                dom_ro.each { |oo|
                        ce = g.exprs[oo[0]][oo[1]]
                        if ce.op == :funcall or ce.rexpr.kind_of? C::CExpression
                                ce_patch(ce.rexpr, var, nv)
                        else
                                ce.rexpr = nv
                        end
                }
                dom_wo.each { |oo|
                        ce = g.exprs[oo[0]][oo[1]]
                        if ce.op == :funcall
                        elsif ce.lexpr.kind_of? C::CExpression
                                ce_patch(ce.lexpr, var, nv)
                        else
                                ce.lexpr = nv
                        end
                }

                # check if the var is only used as an __out__ parameter
                if false and dom_ro.empty? and dom_wo.empty? and dom.length == 2 and # TODO
                                arg.has_attribute('out') and not arg.has_attribute('in')
                        # *(int32*)&var_10 = &var_4;
                        # set_pointed_value(*(int32*)&var_10);  =>  writeonly var_4, may start a new domain
                        nv.add_attribute('out')
                end
        end
end
unalias_vars(scope, func) click to toggle source

duplicate vars per domain value eg eax = 1; foo(eax); eax = 2; bar(eax); => eax = 1; foo(eax) eax_1 = 2; bar(eax_1);

eax = 1; if (bla) eax = 2; foo(eax);   =>  no change
# File metasm/decompile.rb, line 955
def unalias_vars(scope, func)
        g = c_to_graph(scope)

        # unalias func args first, they may include __attr__((out)) needed by the others
        funcalls = []
        walk_ce(scope) { |ce| funcalls << ce if ce.op == :funcall }
        vars = scope.symbol.values.sort_by { |v| walk_ce(funcalls) { |ce| break true if ce.rexpr == v } ? 0 : 1 }

        # find the domains of var aliases
        vars.each { |var| unalias_var(var, scope, g) }
end
walk(scope, post=false) { |scope| ... } click to toggle source

yields each statement (recursive)

# File metasm/decompile.rb, line 2627
def walk(scope, post=false, &b)
        case scope
        when ::Array; scope.each { |s| walk(s, post, &b) }
        when C::Statement
                yield scope if not post
                case scope
                when C::Block; walk(scope.statements, post, &b)
                when C::If
                        yield scope.test
                        walk(scope.bthen, post, &b)
                        walk(scope.belse, post, &b) if scope.belse
                when C::While, C::DoWhile
                        yield scope.test
                        walk(scope.body, post, &b)
                when C::Return
                        yield scope.value
                end
                yield scope if post
        when C::Declaration
                walk(scope.var.initializer, post, &b) if scope.var.initializer
        end
end
walk_ce(ce, post=false) { |ce| ... } click to toggle source

yield each CExpr member (recursive, allows arrays, order: self(!post), lexpr, rexpr, self(post)) if given a non-CExpr, walks it until it finds a CExpr to yield

# File metasm/decompile.rb, line 2598
def walk_ce(ce, post=false, &b)
        case ce
        when C::CExpression
                yield ce if not post
                walk_ce(ce.lexpr, post, &b)
                walk_ce(ce.rexpr, post, &b)
                yield ce if post
        when ::Array
                ce.each { |ce_| walk_ce(ce_, post, &b) }
        when C::Statement
                case ce
                when C::Block; walk_ce(ce.statements, post, &b)
                when C::If
                        walk_ce(ce.test, post, &b)
                        walk_ce(ce.bthen, post, &b)
                        walk_ce(ce.belse, post, &b) if ce.belse
                when C::While, C::DoWhile
                        walk_ce(ce.test, post, &b)
                        walk_ce(ce.body, post, &b)
                when C::Return
                        walk_ce(ce.value, post, &b) if ce.value
                end
        when C::Declaration
                walk_ce(ce.var.initializer, post, &b) if ce.var.initializer
        end
        nil
end