class RXRawLineParser
Public Class Methods
new(format_mask)
click to toggle source
# File lib/rxraw-lineparser.rb, line 27 def initialize(format_mask) @format_mask = format_mask end
Public Instance Methods
parse(line)
click to toggle source
# File lib/rxraw-lineparser.rb, line 31 def parse(line) if @format_mask.to_s.include? '(?<' then r = line.match(/#{@format_mask}/) field_names, field_values = r.names, r.captures else field_names = @format_mask.to_s.scan(/\[!(\w+)\]/).flatten.map(&:to_sym) # only perform possible patterns to match when a custom # format_mask is detected pattern = if @format_mask[0] != '\\' then patterns = possible_patterns(@format_mask) if field_names.map{|x| "[!%s]" % x}.join(' ') == @format_mask \ and field_names.length > 1 then insert2space_patterns(field_names.length, patterns) end pattern = patterns.detect do |x| line.match(/#{x.join}/) end.join if patterns.length > 1 then end_part = @format_mask[/[^\]]+$/].to_s pattern += end_part else pattern end else @format_mask.gsub(/\[!\w+\]/,'(.*)') end field_values = line.match(/#{pattern}/).captures.map(&:strip) found_quotes = find_qpattern(pattern) if found_quotes then found_quotes.each {|i| field_values[i] = field_values[i][1..-2]} end end field_values += [''] * (field_names.length - field_values.length) [field_names, field_values] end
Private Instance Methods
diminishing_permutation(max_fields)
click to toggle source
# File lib/rxraw-lineparser.rb, line 166 def diminishing_permutation(max_fields) result = max_fields.times.inject([]) do |r,i| r + [1,0].repeated_permutation(max_fields-i).to_a end end
find_qpattern(s)
click to toggle source
# File lib/rxraw-lineparser.rb, line 172 def find_qpattern(s) s.split(/(?=\(\?=)|(?=\(\[")/).map.with_index\ .select{|x,i| x[/\["'\]\[\^"'\]\+\["'\]/] }.map(&:last) end
fmask_delimiters(f)
click to toggle source
# File lib/rxraw-lineparser.rb, line 177 def fmask_delimiters(f) a = f.split(/(?=\[!\w+\])/)[0..-2].map {|x| x[/\](.*)/,1] }.compact end
insert2space_patterns(field_count, patterns)
click to toggle source
# File lib/rxraw-lineparser.rb, line 84 def insert2space_patterns(field_count, patterns) a = (field_count-2).times.inject([2]) do |r,x| v = 2 ** (x + 3) diff = field_count - 2 r << (diff >= x+1 ? v-1 : v) end unit = ['([^\s]+)\s{2,}'] space2_patterns = a.map.with_index do |x,i| [x,(['^'] + Array.new(i+1,unit).concat(['(.*)'])).flatten] end space2_patterns.each_with_index do |x_line, i| x, line = x_line patterns = patterns.insert(patterns.length - x-i-2, line) end end
possible_patterns(format_mask)
click to toggle source
# File lib/rxraw-lineparser.rb, line 104 def possible_patterns(format_mask) part1 = format_mask[/^[^\[]+/].to_s pure_regex = format_mask.gsub(/\[!(\w+)\]/,'(?<\1>.*)') tot_fields = format_mask.scan(/\[!\w+\]/).length return [[pure_regex]] if tot_fields <= 1 or @format_mask[0] != '[' main_fields = tot_fields - 1 qpattern = %q{(["'][^"']+["'])} a = fmask_delimiters(format_mask) r = diminishing_permutation(main_fields) if r.length > 2 then itemx = [r.slice!(-2)] r2 = r[0..-3] + itemx + r[-2..-1] else r2 = r end rr = r2.map do |x| x2 = x.each_with_index.map do |item, i| d = a[i] case item when 1 qpattern #+ d when 0 if d.length == 1 then if i < x.length - 1 then s = "([^%s]+)%s+" % ([d]*2) else s = "([^%s]+)" % d end else s = i < x.length - 1 ? "(.*)(?=#{d})#{d}" : "(.*)" end s end end r = x2.unshift '^' + part1 + x2.shift #+ end_part end count = 2**main_fields rr2 = rr.take(count+1).map {|x| x + [a[-1] +'(.*)']} if rr.length > 2 then wild_r = rr2.slice!(-1) rrr = rr2 + rr[0..count-1] + [wild_r] + rr[count..-1] else rrr = rr2 + rr[0..count-1] + rr[count..-1] end rrr + [[part1 + '(.*)']] end