class Iguvium::Table

Represents single table from the [Iguvium::Page]:

Additional functionality like an option to detect an open table grid at the end or at the beginning of the page will be added later

To render table into 2D text array, call {#to_a}

Attributes

box[R]
lines[R]
page[R]

Public Class Methods

new(box, page) click to toggle source

@api private

# File lib/iguvium/table.rb, line 15
def initialize(box, page)
  @box = box
  @lines = page.lines
  @page = page
  grid
  heal
end

Public Instance Methods

grid() click to toggle source

def floorless?

@floorless

end

# File lib/iguvium/table.rb, line 63
def grid
  @grid ||=
      {
          rows: lines_to_ranges(lines[:horizontal]),
          columns: lines_to_ranges(lines[:vertical])
      }
end
to_a(newlines: false, phrases: true) click to toggle source

Renders the table into an array of strings.

Newlines in PDF have usually no semantic value, and are replaced with spaces by default. Sometimes you may need to keep them; in this case use `newlines: true` option.

@param [Boolean] newlines keep newlines inside table cells, false by default @param [Boolean] phrases keep phrases unsplit, true by default.

Poor man's merged cells workaround. Could break some tables , could fix some.

@return [Array] 2D array of strings (content of table's cells)

# File lib/iguvium/table.rb, line 34
def to_a(newlines: false, phrases: true)
  grid[:rows]
    .reverse
    .map { |row|
    grid[:columns].map do |column|
      render(
        phrases ? words_inside(column, row) : chars_inside(column, row),
        newlines: newlines
      )
    end
  }
end

Private Instance Methods

characters() click to toggle source
# File lib/iguvium/table.rb, line 111
def characters
  xrange = box.first
  yrange = box.last
  @characters ||=
    page
    .characters
    .select { |character| xrange.cover?(character.x) && yrange.cover?(character.y) }
end
chars_inside(xrange, yrange) click to toggle source
# File lib/iguvium/table.rb, line 151
def chars_inside(xrange, yrange)
  characters.select { |character|
    xrange.cover?(character.x) && yrange.cover?(character.y)
  }
end
heal() click to toggle source

Looks if there are characters inside the box but outside of already detected cells and adds rows and/or columns if necessary. @return [Iguvium::Table] with added open-cell rows and columns

# File lib/iguvium/table.rb, line 78
def heal
  heal_rows unless grid[:rows].empty?
  heal_cols unless grid[:columns].empty?
  self
end
heal_cols() click to toggle source
# File lib/iguvium/table.rb, line 91
def heal_cols
  leftcol = box.first.begin..grid[:columns].first.begin
  rightcol = grid[:columns].last.end..box.first.end
  @grid[:columns].unshift(leftcol) if chars_inside(leftcol, box.last).any?
  @grid[:columns].append(rightcol) if chars_inside(rightcol, box.last).any?
end
heal_rows() click to toggle source
# File lib/iguvium/table.rb, line 98
def heal_rows
  roofrow = box.last.begin..grid[:rows].first.begin
  floorrow = grid[:rows].last.end..box.last.end
  if chars_inside(box.first, roofrow).any?
    @grid[:rows].unshift(roofrow)
    @roofless = true
  end
  if chars_inside(box.first, floorrow).any?
    @grid[:rows].append(floorrow)
    @floorless = true
  end
end
line_in_box?(line, box) click to toggle source
# File lib/iguvium/table.rb, line 143
def line_in_box?(line, box)
  line = line.map { |coord| coord.is_a?(Range) ? coord.to_a.minmax : [coord] }
  (
  line.first.map { |coord| box.first.cover?(coord) } +
      line.last.map { |coord| box.last.cover?(coord) }
).all?
end
lines_to_ranges(lines) click to toggle source
# File lib/iguvium/table.rb, line 134
def lines_to_ranges(lines)
  lines.select { |line| line_in_box?(line, wide_box) }
       .map { |line| line.first.is_a?(Numeric) ? line.first : line.last }
       .sort
       .uniq
       .each_cons(2)
       .map { |a, b| a...b }
end
render(characters, newlines: false) click to toggle source
# File lib/iguvium/table.rb, line 157
def render(characters, newlines: false)
  separator = newlines ? "\n" : ' '
  characters
    .sort
    .chunk_while { |a, b| a.mergable?(b) }
    .map { |chunk| chunk.inject(:+).to_s.strip.gsub(/[\s|\p{Z}]+/, ' ') }
    .join(separator)
    .gsub(/ +/, ' ')
end
wide_box() click to toggle source
# File lib/iguvium/table.rb, line 84
def wide_box
  @wide_box ||= [
    box.first.begin - 2..box.first.end + 2,
    box.last.begin - 2..box.last.end + 2
  ]
end
words() click to toggle source
# File lib/iguvium/table.rb, line 120
def words
  @words ||=
    characters
    .sort
    .chunk_while { |a, b| a.mergable?(b) }
    .map { |chunk| chunk.inject(:+) }
end
words_inside(xrange, yrange) click to toggle source
# File lib/iguvium/table.rb, line 128
def words_inside(xrange, yrange)
  words.select { |character|
    xrange.cover?(character.x) && yrange.cover?(character.y)
  }
end