class Ripcols::Ripper

Public Class Methods

new(patterns, str, column_gap=3, out_f=$stdout, err_f=$stderr) click to toggle source
# File lib/ripcols/ripper.rb, line 6
def initialize(patterns, str, column_gap=3, out_f=$stdout, err_f=$stderr)
  unless @@REQUIRED_PATTERNS.all? { |req_pattern| patterns.include? req_pattern }
    raise ArgumentError, "all required keys not present.\n Required keys:  #{@@REQUIRED_PATTERNS}"
  end

  @COL_GAP = column_gap

  # @in_f = in_f
  @fbuf = str
  @out_f = out_f

  col_del = /\s{#{@COL_GAP},}/
  @patterns = patterns.dup
  @patterns[:HEADER_COL_SEP]  ||= col_del
  @patterns[:LINE_COL_SEP]    ||= col_del
  @patterns[:LINE_SEP]        ||= /\n/
  # @patterns[:LINE_HEADER_SEP] ||= /\n+/

  @hbuf = nil
  @bbuf = nil

  @line_column_begin = 0
end

Public Instance Methods

debug!() click to toggle source
# File lib/ripcols/ripper.rb, line 71
def debug!
  seperate_body_head
  puts "HEADER\n______"
  p header_lines
  puts "BODY\n____"
  p body_lines
end
parse() click to toggle source
# File lib/ripcols/ripper.rb, line 30
def parse
  headers = parse_head
  lines = body_lines.split( @patterns[:LINE_SEP] )
  # col_sep = @patterns[:LINE_COL_SEP]
  lines.map { |line| columize_line(line, headers) }
end
parse_head() click to toggle source
# File lib/ripcols/ripper.rb, line 38
def parse_head
  hbuf = header_lines
  k = hbuf.lines.reduce([]) do |grouping, l|
    off = 0
    l.strip
      .split( @patterns[:'HEADER_COL_SEP'] )
      .each do |w|
        if w.empty?
          next
        end
        bc = l.index(w, off)
        off = ec = bc + w.length
        insert_to( grouping , w, bc, ec )
      end
    grouping
  end

  k = k.sort { |(_, abc), (_, bbc)| abc <=> bbc }
      .map { |(titles, bc, ec)| [titles.join(' '), bc, ec] }

  if k.first
    # todo: (possible BUG!)
    #  this code will break, when the initial columns dont begin from 0,
    #  its better to have some kind of hinting to know where the column
    #  begins.
    #
    # going with simplicity, beginning_column_position of 1st column be 0,
    k.first[1] = @line_column_begin
  end

  k
end

Private Instance Methods

body_lines() click to toggle source
# File lib/ripcols/ripper.rb, line 169
def body_lines
  return @bbuf if @bbuf
  seperate_body_head
  @bbuf
end
columize_line(line, headers) click to toggle source

line : single line of string headers : [ (title, bc, ec) …+ ]

OUTPUT

columized_line : Hash

> {“col1”: “matching stripped text”, …* }

Note

blank columns will not be part of the result.

# File lib/ripcols/ripper.rb, line 94
def columize_line line, headers
  return Hash[] if headers.empty?

  ks = {}
  idx = 0
  delim = @patterns[:LINE_COL_SEP]
  unresolved = nil

  headers.each do |(title, bc, ec)|

    if unresolved
      if (unresolved[:text][:ec] + @COL_GAP) < bc
        head = unresolved[:header]
        ks[ head[:title] ] = unresolved[:text][:text]
        idx = unresolved[:text][:ec]
        unresolved = nil
      end
    end

    break unless bc_idx = line.index( /\S/, idx )
    ec_idx = line.index( delim, bc_idx ) || -1
    val = line[ ec_idx == -1 ? (bc_idx .. ec_idx) : (bc_idx ... ec_idx) ]
    if (bc_idx - @COL_GAP) <= ec
      unresolved = nil
      idx = ec_idx
      ks[title] = val # line[bc_idx ... ec_idx]

      break if ec_idx == -1
    else
      unresolved = {
        "text":   Hash[:text, val, :bc, bc_idx, :ec, ec_idx],
        "header": Hash[:title, title, :bc, bc, :ec, ec],
      }
    end

  end

  ks
end
header_lines() click to toggle source
# File lib/ripcols/ripper.rb, line 163
def header_lines
  return @hbuf if @hbuf
  seperate_body_head
  @hbuf
end
insert_to( grouping , title, bc, ec ) click to toggle source
# File lib/ripcols/ripper.rb, line 187
def insert_to( grouping , title, bc, ec )
  group = grouping.find { |group| overlap?(group, [title, bc, ec]) }
  if group
    group[0].push( title )
    ibc, iec = group[1..2]
    group[1] = [bc, ibc].min
    group[2] = [ec, iec].max
  else
    grouping.push( [[title], bc, ec] )
  end
end
overlap?( group_a, group_b ) click to toggle source

check whether given 2 groups appear within boundaries of each other group = [ title, beginning_column, ending_col ] note: the ending column is exclusive

# File lib/ripcols/ripper.rb, line 178
def overlap?( group_a, group_b )
  (_, a_bc, a_ec) = group_a
  (_, b_bc, b_ec) = group_b
  (b_bc.between?( a_bc, a_ec.pred ) ||
   b_ec.between?( a_bc, a_ec.pred ) ||
   a_bc.between?( b_bc, b_ec.pred ))
end
seperate_body_head() click to toggle source
# File lib/ripcols/ripper.rb, line 135
def seperate_body_head
  fbuf = @fbuf
  hbeg_idx = @patterns[:HEADER_BEGIN] =~ fbuf
  unless hbeg_idx
    raise ArgumentError, "Failed to located beginning of Header"
  end

  head_begin_buf = fbuf[ hbeg_idx .. -1 ]
  if head_begin_buf.start_with? "\n"
    head_begin_buf.sub!(/\n+/, '')
  end
  hend_idx = @patterns[:HEADER_END] =~ head_begin_buf
  unless hend_idx
    raise ArgumentError, @patterns[:HEADER_END], "Failed to locate ending of Header"
  end
  @hbuf = head_begin_buf[ 0..hend_idx ]

  bbuf = $~.post_match
  if bbuf.start_with? "\n"
    bbuf.sub!(/\n+/, '')
  end
  lend_idx = @patterns[:LINE_END] =~ bbuf
  unless lend_idx
    raise ArgumentError, @patterns[:LINE_END], "Failed to locate ending of lines"
  end
  @bbuf = $~.pre_match
end