module Google::SafeBrowsingParser

Constants

CHUNKNUM_SIZE
FULL_HASH_SIZE

Public Instance Methods

parse_add_data(byte_arr, hash_len) click to toggle source
# File lib/google/safe_browsing_parser.rb, line 89
def parse_add_data byte_arr, hash_len
  ret = {}
  total_chars = 0
  pointer = 0
  while pointer < byte_arr.size
    host_key = parse_host_key byte_arr[pointer...pointer+HOST_KEY_SIZE]
    total_chars += HOST_KEY_SIZE
    pointer += HOST_KEY_SIZE
    ret[host_key] ||= []
    count = parse_count_number byte_arr[pointer]
    pointer += 1
    total_chars += 1
    
    if count > 0
      sub_count = 0
      while sub_count < count
        ret[host_key] << parse_hash_prefix(byte_arr[pointer...pointer+hash_len], hash_len)
        total_chars += hash_len
        pointer += hash_len
        sub_count += 1
      end
    end
  end
  
  ret
end
parse_count_number(char) click to toggle source
# File lib/google/safe_browsing_parser.rb, line 154
def parse_count_number char
  char.unpack('C').first
end
parse_full_hash_data(byte_arr, full_hash_arr) click to toggle source
# File lib/google/safe_browsing_parser.rb, line 207
def parse_full_hash_data byte_arr, full_hash_arr
  byte_arr.each_slice(FULL_HASH_SIZE) do |slice|
    full_hash_arr << slice.join('').unpack("H#{FULL_HASH_SIZE*2}").first
  end
end
parse_full_hash_entries(str) click to toggle source

Returns { 'goog-malware-shavar' (list_name)

=> {
    :add_chunk_num1 => [full_hash0, full_hash1, ...]
    :add_chunk_num2 => [full_hash0]
   }

}

BODY = ([MAC LF] HASHENTRY+) | (REKEY LF) EOF HASHENTRY = LISTNAME “:” ADDCHUNK “:” HASHDATALEN LF HASHDATA ADDCHUNK = DIGIT+ # Add chunk number HASHDATALEN = DIGIT+ # Length of HASHDATA HASHDATA = <HASHDATALEN number of unsigned bytes> # Full length hashes in binary MAC = (LOALPHA | DIGIT)+

Ignore rekey response for now

# File lib/google/safe_browsing_parser.rb, line 180
def parse_full_hash_entries str
  full_list = {}
  scanner = StringScanner.new(str)
  count = 0; scanner.pos = 0

  while !(head = scanner.scan_until(FULL_HASH_HEAD)).nil?
    m = FULL_HASH_HEAD.match head
    return full_list if m[:rekey]
    
    count += 1
    list_name, chunk_num, chunk_len = m[:list].to_s.to_sym, m[:chunk_num].to_i, m[:chunk_len].to_i
    pointer = 0; chunk_data = []
    
    my_list = (full_list[list_name] ||= {})
    my_list[chunk_num] ||= []
    
    while pointer < chunk_len
      chunk_data << scanner.get_byte
      pointer += 1
    end

    parse_full_hash_data chunk_data, my_list[chunk_num]
  end
  
  full_list
end
parse_hash_prefix(char_arr, hash_len) click to toggle source
# File lib/google/safe_browsing_parser.rb, line 158
def parse_hash_prefix char_arr, hash_len
  char_arr.join('').unpack("H#{hash_len*2}").first
end
parse_host_key(char_arr) click to toggle source
# File lib/google/safe_browsing_parser.rb, line 150
def parse_host_key char_arr
  char_arr.join('').unpack('H8').first
end
parse_shavar_list(str, test_mode = false) click to toggle source

@params str A clob of characters returned from the redirect download url @returns Two arrays of shavar list data decoded: one for ADD, the other for SUB

The shavar list data has the following structure For ADD { :chunk_num => 343243, :hash_len => 4, :chunk_len => 4343,

:chunk_data => {
                 :host_key_one => [prefix0, prefix1, ...],
                 :host_key_two => []
               }

}

For SUB { :chunk_num => 343243, :hash_len => 4, :chunk_len => 4343,

:chunk_data => {
                 :host_key_one => { 
                                    :add_chunknum_one => [prefix0, prefix1, ...],
                                    :add_chunknum_two => []
                                  }
                 :host_key_two => {
                                    :add_chunknum_three => [prefix0, prefix1, ...],
                                    :add_chunknum_four => []
                                  }
               }

}

# File lib/google/safe_browsing_parser.rb, line 42
def parse_shavar_list str, test_mode = false
  adds = []; subs = []
  scanner = StringScanner.new(str)
        
  count = 0; scanner.pos = 0
  while !(head = scanner.scan_until(ADD_SUB_HEAD)).nil?
    if test_mode && count > 0
      break
    end
    
    count += 1
    m = ADD_SUB_HEAD.match head
    chunk_num, hash_len, chunk_len = m[:chunk_num].to_i, m[:hash_len].to_i, m[:chunk_len].to_i
    pointer = 0; chunk_data = []

    while pointer < chunk_len
      chunk_data << scanner.get_byte
      pointer += 1
    end

    if m[:add_sub] == 'a'
      data_arr = parse_add_data(chunk_data, hash_len)
    elsif m[:add_sub] == 's'
      data_arr = parse_sub_data(chunk_data, hash_len)
    end
      
    obj = {
        chunk_num: chunk_num,
        hash_len:  hash_len,
        chunk_len: chunk_len,
        chunk_data: data_arr
      }

    if m[:add_sub] == ADD
      adds << obj
    elsif m[:add_sub] == SUB
      subs << obj
    end
    
  end
  
  Rails.logger.info "Total # of ADD/SUB section is #{count}, #{adds.size} adds, #{subs.size} subs"
  
  [adds, subs]
end
parse_sub_data(byte_arr, hash_len) click to toggle source
# File lib/google/safe_browsing_parser.rb, line 116
def parse_sub_data byte_arr, hash_len
  ret = {}
  total_chars = 0
  pointer = 0
  while pointer < byte_arr.size
    host_key = parse_host_key byte_arr[pointer...pointer+HOST_KEY_SIZE]
    total_chars += HOST_KEY_SIZE
    pointer += HOST_KEY_SIZE
    count = parse_count_number byte_arr[pointer]
    total_chars += 1
    pointer += 1

    ret[host_key] ||= {}
    if count > 0
      sub_count = 0
      while sub_count < count
        add_chunknum = byte_arr[pointer...pointer+CHUNKNUM_SIZE].join('').unpack('L>').first
        pointer += CHUNKNUM_SIZE; total_chars += CHUNKNUM_SIZE
        ret[host_key][add_chunknum] ||= []
        ret[host_key][add_chunknum] << parse_hash_prefix(byte_arr[pointer...pointer+hash_len], hash_len)
        pointer += hash_len; total_chars += hash_len
        sub_count += 1
      end
    else
      add_chunknum = byte_arr[pointer...pointer+CHUNKNUM_SIZE].join('').unpack('L>').first
      ret[host_key][add_chunknum] = []
      pointer += CHUNKNUM_SIZE; total_chars += CHUNKNUM_SIZE
    end
    
  end
  
  ret
end