module PEdump::SigParser
Constants
- DATA_ROOT
- SPECIAL_PARSE_OPTIONS
- TEXT_SIGS_FILES
Public Class Methods
_add_sig(sigs, sig, args = {})
click to toggle source
XXX “BxE9rczi GxE1bor”.force_encoding(‘binary’).to_yaml: RuntimeError: expected SCALAR, SEQUENCE-START, MAPPING-START, or ALIAS
# File lib/pedump/sig_parser.rb, line 134 def _add_sig sigs, sig, args = {} raise "null RE: #{sig.inspect}" unless sig.re # bad sigs return if sig.re[/\A538BD833C0A30:::::/] return if sig.name == "Name of the Packer v1.0" return if sig.name == "Alias PIX/Vivid IMG Graphics format" return if sig.name == "JAR Archive" return if sig.name == "Turbo / Borland Pascal v7.x Unit" return if sig.re == "54 68 69 73 20 70 72 6F 67 72 61 6D 20 63 61 6E 6E 6F 74 20 62 65 20 72 75 6E 20 69 6E 20 44 4F 53 20 6D 6F" # dos stub sig.name.sub!(/^\*\s+/, '') sig.name.sub!(/\s+\(h\)$/, '') sig.name.sub!(/version (\d)/i,"v\\1") sig.name.sub!(/Microsoft/i, "MS") sig.name.sub!(/ or /i, " / ") sig.name.sub! 'RLP ','RLPack ' sig.name.sub! '.beta', ' beta' sig.name.sub! '(com)','[com]' sig.name.gsub!(/ V(\d)/, " v\\1") # V1.1 -> v1.1 sig.name = sig.name.split(/\s*-+>\s*/).join(' -> ') # fix spaces around '->' sig.name = sig.name.split(' ').delete_if do |x| # delete words: vX.X, v?.?, ?.?, x.x x =~ /\Av?[?x]\.[?x]\Z/i end.join(' ') sig.re = sig.re.strip.upcase.tr(':','?') sig.re = sig.re.scan(/../).join(' ') if sig.re.split.first.size > 2 # sig contains entirely zeroes or masks or only both a_bad = [%w'00', %w'??', %w'00 ??', %w'90', %w'90 ??'] # ?a, 0? => ??, ?? a_cur = sig.re.split.map{ |x| x['?'] ? '??' : x }.uniq.sort return if a_bad.include?(a_cur) # first byte is unique and all others are zeroes or masks a_cur = sig.re.split[1..-1].map{ |x| x['?'] ? '??' : x }.uniq.sort return if a_bad.include?(a_cur) # too short signatures if sig.re.split.delete_if{ |x| x['?'] }.size < 3 require 'awesome_print' puts sig.inspect.red end # fs.txt contains a lot of signatures that copied from other sources # BUT have all 01 replaced with '??' # // replaced the file with filtered one (see 'fs-good' below) // zzz if args[:fix1] sigs.keys.each do |re| if re.gsub("01","??") == sig.re puts "[.] fix1: rejecting #{sig.name} - already present with 01 in place" if args[:verbose] return end end # File.open("fs-good.txt","a") do |f| # f << "[#{sig.name}=#{sig.re.tr(' ','')}]\n" # end end if sigs[sig.re] a = [sig, sigs[sig.re]].map{ |x| x.name.upcase.split('->').first.tr('V ','') } return if a[0][a[1]] || a[1][a[0]] new_name = _merge_names(sigs[sig.re].name, sig.name) if new_name && new_name != sig.name && new_name != sigs[sig.re].name puts "[.] sig name join: #{new_name}" if args[:verbose] sigs[sig.re].name = new_name end else # new sig sigs[sig.re] = sig end end
_diff(res)
click to toggle source
range of common difference between N given sigs
# File lib/pedump/sig_parser.rb, line 462 def _diff res raise "diff sizes" if res.map(&:size).uniq.size != 1 size = res.first.size dstart = nil dend = size - 1 prev_eq = true size.times do |i| eq = res.map{ |re| re[i] }.uniq.size == 1 if eq != prev_eq if eq # end of current diff dend = i-1 else # start of new diff return nil if dstart # return nil if it's a 2nd diff dstart = i end end prev_eq = eq end dstart ||= 0 r = dstart..dend r == (0..(size-1)) ? nil : r end
_join(a, sep='')
click to toggle source
# File lib/pedump/sig_parser.rb, line 294 def _join a, sep='' a.map do |x| case x when OrBlock '(' + _join(x, '|') + ')' when Array _join x when String x end end.join(sep) end
_merge(sigs)
click to toggle source
merge array of signatures into one signature
# File lib/pedump/sig_parser.rb, line 490 def _merge sigs sizes = sigs.map(&:re).map(&:size) if sizes.uniq.size != 1 puts "[?] wrong sizes: #{sizes.inspect}" return nil end res = sigs.map(&:re) diff = _diff res return nil unless diff ref = res.first ref[0...diff.first] + [OrBlock.new(res.map{ |re| re[diff] })] + ref[(diff.last+1)..-1] end
_merge_names(name1, name2)
click to toggle source
# File lib/pedump/sig_parser.rb, line 209 def _merge_names name1, name2 a = [name1, name2].map{ |x| x.split('->').first.split } d = [a[0]-a[1], a[1]-a[0]] # different words d.map! do |x| x - [ 'EXE','[EXE]', 'DLL','(DLL)','[DLL]', '[LZMA]','(LZMA)','LZMA', '-','~','(pack)','(1)','(2)', '19??', 'with:', 'with?' ] end return if d.all?(&:empty?) # no different words => can keep ANY name # if name1 =~ /pecompact/i # require 'awesome_print' # puts "[d] #{name1}".yellow # puts "[d] #{name2}".yellow # end # [["v1.14/v1.20"], ["v1.14,", "v1.20"]]] # [["EXEShield", "v0.3b/v0.3", "v0.6"], ["Shield", "v0.3b,", "v0.3"]]] 2.times do |i| return if d[i].all? do |x| x = x.downcase.delete(',-').sub(/tm$/,'') d[1-i].any? do |y| y = y.downcase.delete(',-').sub(/tm$/,'') y[x] end end end # require 'awesome_print' # puts "[d] #{name1.yellow} #{name2.green}" a = name1.split b = name2.split # merge common head new_name_head = [] while a.any? && b.any? && a.first.upcase == b.first.upcase new_name_head << a.shift b.shift end # merge common tail new_name_tail = [] while a.any? && b.any? && a.last.upcase == b.last.upcase new_name_tail.unshift a.pop b.pop end # rm common words from middle separators = [ "/", "->" ] was = true while was was = false b.each do |bw| next if bw == "/" || bw == "->" if a.include?(bw) || a.include?(bw+")") || a.include?("("+bw) || a.include?("(#{bw})") b -= [bw] was = true break end end end while separators.include?(b.last) b.pop end new_name = new_name_head new_name << [a.join(' '), b.join(' ')].delete_if{|x| x.empty?}.join(' / ') new_name += new_name_tail # if name1 =~ /pecompact/i # p a # p b # p new_name_tail # puts "[=] #{new_name.inspect}".red # end new_name = new_name.join(' ') end
_name2wordonly(name)
click to toggle source
# File lib/pedump/sig_parser.rb, line 360 def _name2wordonly name name.downcase.split(/[^a-z0-9_.]+/).join(' ').strip end
_optimize(sigs)
click to toggle source
# File lib/pedump/sig_parser.rb, line 318 def _optimize sigs nfound = 0 min_sz = 6 max_diff = 6 sigs.each_with_index do |sig1,idx| #break if idx == 100 next if sig1.re.size < min_sz next if sig1.name['PseudoSigner'] sigs[(idx+1)..-1].each do |sig2| next if sig2.re.size < min_sz next if sig2.name['PseudoSigner'] if rd = _re_diff(sig1.re, sig2.re, max_diff) if rd.all?{ |x| x[0].nil? || x[0] == '.' } && sig2.re.size >= sig1.re.size if new_name = _merge_names(sig2.name, sig1.name) # require 'pp' # pp ["FIRST", sig1.name, sig2.name, new_name, sig1.re.join, sig2.re.join] if new_name =~ /pecompact/i sig1.name = new_name end sig2.ep_only ||= sig1.ep_only sig2.re = [] elsif rd.all?{ |x| x[1].nil? || x[1] == '.' } && sig1.re.size >= sig2.re.size if new_name = _merge_names(sig2.name, sig1.name) # require 'pp' # pp ["SECOND", sig1.name, sig2.name, new_name, sig1.re.join, sig2.re.join] if new_name =~ /pecompact/i sig2.name = new_name end sig1.re = [] sig1.ep_only ||= sig2.ep_only break else next end nfound += 1 end end end sigs.delete_if{ |sig| sig.re.empty? } end
_re_diff(a,b, max_cnt = 1000)
click to toggle source
# File lib/pedump/sig_parser.rb, line 307 def _re_diff a,b, max_cnt = 1000 r = [] [a,b].map(&:size).max.times.map do |i| if a[i] != b[i] r << [a[i],b[i]] return nil if r.size > max_cnt end end r end
optimize(sigs)
click to toggle source
# File lib/pedump/sig_parser.rb, line 392 def optimize sigs optimize_names sigs # XXX no optimize from now, prefer more precise sigs #print "[.] sigs merge: #{sigs.size}"; _optimize(sigs); puts " -> #{sigs.size}" # try to merge signatures with same name, size & ep_only sigs.group_by{ |sig| [sig.re.size, sig.name, sig.ep_only] }. values.each do |a| next if a.size == 1 if merged_re = _merge(a) a.first.re = merged_re a[1..-1].each{ |sig| sig.re = nil } end end print "[.] sigs merge: #{sigs.size}"; sigs.delete_if{ |x| x.re.nil? }; puts " -> #{sigs.size}" # 361 entries of ["VMProtect v1.25 (PolyTech)", true, "h....\xE8...."]) sigs.group_by{ |sig| [sig.name, sig.ep_only, sig.re[0,10].join] }.each do |k,entries| next if entries.size < 10 #printf "%5d %s\n", entries.size, k prefix = entries.first.re[0,10] infix = entries.map{ |sig| sig.re[10..-1] } entries.first.re = prefix + [OrBlock.new(infix)] entries.first.size = entries.map(&:size).max entries[1..-1].each{ |sig| sig.re = nil } end print "[.] sigs merge: #{sigs.size}"; sigs.delete_if{ |x| x.re.nil? }; puts " -> #{sigs.size}" # # merge signatures with same prefix & suffix # # most ineffecient part :) # sigs.group_by{ |sig| # [sig.name, sig.ep_only, sig.re.index{ |x| x.is_a?(Array)}] # }.values.each do |a| # next if a.size == 1 # next unless idx = a.first.re.index{ |x| x.is_a?(Array) } # a.group_by{ |sig| [sig.re[0...idx], sig.re[(idx+1)..-1]] }.each do |k,entries| # # prefix | infix | suffix # # s o m [[b r e r o] [e w h a t]] h e r e # prefix, suffix = k # infix = entries.map{ |sig| sig.re[idx] } # #infix = [['f','o','o']] # merged_re = prefix + infix + suffix # max_size = entries.map(&:size).max # entries.each{ |sig| sig.re = merged_re; sig.size = max_size } # end # end # print "[.] sigs merge: #{sigs.size}"; sigs.uniq!; puts " -> #{sigs.size}" # stats # aa = [] # 6.upto(20) do |len| # sigs.group_by{ |sig| [sig.re[0,len].join, sig.name, sig.ep_only] }.each do |a,b| # aa << [b.size, a[0], [b.map(&:size).min, b.map(&:size).max].join(' .. ') ] if b.size > 2 # end # end # aa.sort_by(&:first).each do |sz,prefix,name| # printf "%5d %-50s %s\n", sz, prefix.inspect, name # end sigs end
optimize_names(sigs)
click to toggle source
# File lib/pedump/sig_parser.rb, line 364 def optimize_names sigs # replaces all duplicate names with references to one name # saves ~30k out of ~200k mem h = {} # find shortest names sigs.each do |sig| t = _name2wordonly(sig.name) if h[t] # keep shortest name if h[t] != sig.name #print "[d] #{[h[t], sig.name].inspect} -> " h[t] = [h[t], sig.name].sort_by(&:size).first #puts h[t] else # fully identical names end else h[t] = sig.name end end # assign names back to sigs sigs.each{ |sig| sig.name = h[_name2wordonly(sig.name)] } puts "[.] sigs merge: #{h.size} unique names" end
parse(args = {})
click to toggle source
parse text signatures
# File lib/pedump/sig_parser.rb, line 22 def parse args = {} args[:fnames] ||= TEXT_SIGS_FILES sigs = {}; sig = nil args[:fnames].each do |fname| n0 = sigs.size; add_sig_args = args.dup add_sig_args.merge!(SPECIAL_PARSE_OPTIONS[fname] || {}) File.open(fname,'r:utf-8') do |f| while line = f.gets case line.strip when /^[<;#]/, /^$/ # comments & blank lines next when /^\[(.+)=(.+)\]$/ _add_sig(sigs, Packer.new($1, $2, true), add_sig_args ) when /^\[([^=]+)\](\s+\/\/.+)?$/ sig = Packer.new($1) when /^signature = (.+)$/ sig.re = $1 _add_sig(sigs, sig, add_sig_args) when /^ep_only = (.+)$/ sig.ep_only = ($1.strip.downcase == 'true') else raise line end end end puts "[=] #{sigs.size-n0} sigs from #{File.basename(fname)}\n\n" if args[:verbose] end bins = Hash.new{ |k,v| k[v] = ''.force_encoding('binary') } # convert strings to Regexps sigs = sigs.values sigs.each_with_index do |sig,idx| sig.re = sig.re.split(' ').tap do |a| sig.size = a.size end.map do |x| case x when /\A\?\?\Z/ bins[sig] << '.' '.' when /\A.\?/,/\?.\Z/ puts "[?] #{x.inspect} -> \"??\" in #{sig.name}" if args[:verbose] bins[sig] << '.' '.' when /\A[a-f0-9]{2}\Z/i x = x.to_i(16).chr bins[sig] << x if args[:raw] x elsif args[:raword] x.ord else Regexp::escape(x) end else puts "[?] unknown re element: #{x.inspect} in #{sig.inspect}" if args[:verbose] "BAD_RE" break end end if sig.name[/-+>/] a = sig.name.split(/-+>/,2).map(&:strip) sig.name = "#{a[0]} (#{a[1]})" end sig.re.pop while sig.re && sig.re.last == '??' end sigs.delete_if{ |sig| !sig.re || sig.re.index('BAD_RE') } return sigs if args[:raw] || args[:raword] # require 'awesome_print' # bins.each do |bin_sig, bin| # next if bin.size < 5 # #next unless bin_sig.name['UPX'] # # bin_re = Regexp.new(bin_sig.re.join, Regexp::MULTILINE) # was = false # sigs.each do |sig| # next if sig.size < 5 || sig == bin_sig # #next unless sig.name['UPX'] # # re = Regexp.new(sig.re.join, Regexp::MULTILINE) # if bin.index(re) == 0 # rd = _re_diff(bin_re.source, re.source) # if rd.any? && rd.size <= 4 # #if sig.name.split.first.upcase != bin_sig.name.split.first.upcase # puts "\n[.] #{bin_sig.name.yellow}\n#{bin_re.source.inspect.red}" unless was # puts "[=] #{sig.name}" # puts re.source.inspect.green # p rd # was = true # #end # end # end # end # end optimize sigs if args[:optimize] # convert re-arrays to Regexps sigs.each do |sig| sig.re = Regexp.new( _join(sig.re), Regexp::MULTILINE ) end sigs end