class Ripcols::Ripper
Public Class Methods
new(patterns, str, column_gap=3, out_f=$stdout, err_f=$stderr)
click to toggle source
# File lib/ripcols/ripper.rb, line 6 def initialize(patterns, str, column_gap=3, out_f=$stdout, err_f=$stderr) unless @@REQUIRED_PATTERNS.all? { |req_pattern| patterns.include? req_pattern } raise ArgumentError, "all required keys not present.\n Required keys: #{@@REQUIRED_PATTERNS}" end @COL_GAP = column_gap # @in_f = in_f @fbuf = str @out_f = out_f col_del = /\s{#{@COL_GAP},}/ @patterns = patterns.dup @patterns[:HEADER_COL_SEP] ||= col_del @patterns[:LINE_COL_SEP] ||= col_del @patterns[:LINE_SEP] ||= /\n/ # @patterns[:LINE_HEADER_SEP] ||= /\n+/ @hbuf = nil @bbuf = nil @line_column_begin = 0 end
Public Instance Methods
debug!()
click to toggle source
# File lib/ripcols/ripper.rb, line 71 def debug! seperate_body_head puts "HEADER\n______" p header_lines puts "BODY\n____" p body_lines end
parse()
click to toggle source
# File lib/ripcols/ripper.rb, line 30 def parse headers = parse_head lines = body_lines.split( @patterns[:LINE_SEP] ) # col_sep = @patterns[:LINE_COL_SEP] lines.map { |line| columize_line(line, headers) } end
parse_head()
click to toggle source
# File lib/ripcols/ripper.rb, line 38 def parse_head hbuf = header_lines k = hbuf.lines.reduce([]) do |grouping, l| off = 0 l.strip .split( @patterns[:'HEADER_COL_SEP'] ) .each do |w| if w.empty? next end bc = l.index(w, off) off = ec = bc + w.length insert_to( grouping , w, bc, ec ) end grouping end k = k.sort { |(_, abc), (_, bbc)| abc <=> bbc } .map { |(titles, bc, ec)| [titles.join(' '), bc, ec] } if k.first # todo: (possible BUG!) # this code will break, when the initial columns dont begin from 0, # its better to have some kind of hinting to know where the column # begins. # # going with simplicity, beginning_column_position of 1st column be 0, k.first[1] = @line_column_begin end k end
Private Instance Methods
body_lines()
click to toggle source
# File lib/ripcols/ripper.rb, line 169 def body_lines return @bbuf if @bbuf seperate_body_head @bbuf end
columize_line(line, headers)
click to toggle source
line : single line of string headers : [ (title, bc, ec) …+ ]
OUTPUT
¶ ↑
columized_line : Hash
> {“col1”: “matching stripped text”, …* }¶ ↑
Note
¶ ↑
blank columns will not be part of the result.
# File lib/ripcols/ripper.rb, line 94 def columize_line line, headers return Hash[] if headers.empty? ks = {} idx = 0 delim = @patterns[:LINE_COL_SEP] unresolved = nil headers.each do |(title, bc, ec)| if unresolved if (unresolved[:text][:ec] + @COL_GAP) < bc head = unresolved[:header] ks[ head[:title] ] = unresolved[:text][:text] idx = unresolved[:text][:ec] unresolved = nil end end break unless bc_idx = line.index( /\S/, idx ) ec_idx = line.index( delim, bc_idx ) || -1 val = line[ ec_idx == -1 ? (bc_idx .. ec_idx) : (bc_idx ... ec_idx) ] if (bc_idx - @COL_GAP) <= ec unresolved = nil idx = ec_idx ks[title] = val # line[bc_idx ... ec_idx] break if ec_idx == -1 else unresolved = { "text": Hash[:text, val, :bc, bc_idx, :ec, ec_idx], "header": Hash[:title, title, :bc, bc, :ec, ec], } end end ks end
header_lines()
click to toggle source
# File lib/ripcols/ripper.rb, line 163 def header_lines return @hbuf if @hbuf seperate_body_head @hbuf end
insert_to( grouping , title, bc, ec )
click to toggle source
# File lib/ripcols/ripper.rb, line 187 def insert_to( grouping , title, bc, ec ) group = grouping.find { |group| overlap?(group, [title, bc, ec]) } if group group[0].push( title ) ibc, iec = group[1..2] group[1] = [bc, ibc].min group[2] = [ec, iec].max else grouping.push( [[title], bc, ec] ) end end
overlap?( group_a, group_b )
click to toggle source
check whether given 2 groups appear within boundaries of each other group = [ title, beginning_column, ending_col ] note: the ending column is exclusive
# File lib/ripcols/ripper.rb, line 178 def overlap?( group_a, group_b ) (_, a_bc, a_ec) = group_a (_, b_bc, b_ec) = group_b (b_bc.between?( a_bc, a_ec.pred ) || b_ec.between?( a_bc, a_ec.pred ) || a_bc.between?( b_bc, b_ec.pred )) end
seperate_body_head()
click to toggle source
# File lib/ripcols/ripper.rb, line 135 def seperate_body_head fbuf = @fbuf hbeg_idx = @patterns[:HEADER_BEGIN] =~ fbuf unless hbeg_idx raise ArgumentError, "Failed to located beginning of Header" end head_begin_buf = fbuf[ hbeg_idx .. -1 ] if head_begin_buf.start_with? "\n" head_begin_buf.sub!(/\n+/, '') end hend_idx = @patterns[:HEADER_END] =~ head_begin_buf unless hend_idx raise ArgumentError, @patterns[:HEADER_END], "Failed to locate ending of Header" end @hbuf = head_begin_buf[ 0..hend_idx ] bbuf = $~.post_match if bbuf.start_with? "\n" bbuf.sub!(/\n+/, '') end lend_idx = @patterns[:LINE_END] =~ bbuf unless lend_idx raise ArgumentError, @patterns[:LINE_END], "Failed to locate ending of lines" end @bbuf = $~.pre_match end