class ChupaText::Decomposers::Spreadsheet

Constants

TARGET_EXTENSIONS
TARGET_MIME_TYPES

Public Instance Methods

decompose(data) { |text_data| ... } click to toggle source
# File lib/chupa-text/decomposers/spreadsheet.rb, line 33
def decompose(data)
  open_book(data) do |book|
    book.sheets.each do |sheet_name|
      sheet = book.sheet(sheet_name)
      body = build_body(sheet)
      text_data = TextData.new(body, source_data: data)
      text_data["name"] = sheet_name
      text_data["digest"] = Digest::SHA1.hexdigest(body)
      text_data["size"] = body.bytesize
      text_data["first-row"] = sheet.first_row
      text_data["last-row"] = sheet.last_row
      text_data["first-column"] = sheet.first_column && sheet.first_column_as_letter
      text_data["last-column"] = sheet.last_column && sheet.last_column_as_letter
      yield text_data
    end
  end
end
target?(data) click to toggle source
# File lib/chupa-text/decomposers/spreadsheet.rb, line 20
def target?(data)
  TARGET_EXTENSIONS.include?(data.extension) or
    TARGET_MIME_TYPES.include?(data.mime_type)
end
target_score(data) click to toggle source
# File lib/chupa-text/decomposers/spreadsheet.rb, line 25
def target_score(data)
  if target?(data)
    10
  else
    nil
  end
end

Private Instance Methods

build_body(sheet) click to toggle source
# File lib/chupa-text/decomposers/spreadsheet.rb, line 73
def build_body(sheet)
  body = ""
  first_row = sheet.first_row
  return body if first_row.nil?

  1.upto(sheet.last_row) do |row|
    1.upto(sheet.last_column) do |column|
      body << "\t" if column > 1
      body << build_cell(sheet, row, column)
    end
    body << "\n"
  end

  body
end
build_cell(sheet, row, column) click to toggle source
# File lib/chupa-text/decomposers/spreadsheet.rb, line 89
def build_cell(sheet, row, column)
  return "" if sheet.empty?(row, column)

  cell = sheet.cell(row, column)
  case sheet.celltype(row, column)
  when :string
    cell
  when :time
    sheet.integer_to_timestring(cell)
  when :link
    cell.url
  else
    cell.to_s
  end
end
log_tag() click to toggle source
# File lib/chupa-text/decomposers/spreadsheet.rb, line 105
def log_tag
  "[decomposer][spreadsheet]"
end
open_book(data) { |book| ... } click to toggle source
# File lib/chupa-text/decomposers/spreadsheet.rb, line 52
def open_book(data)
  book = nil
  begin
    book = Roo::Spreadsheet.open(data.path.to_s)
  rescue Ole::Storage::FormatError => format_error
    error do
      message = "#{log_tag} Invalid format: "
      message << "#{format_error.class}: #{format_error.message}\n"
      message << format_error.backtrace.join("\n")
      message
    end
    return
  end

  begin
    yield(book)
  ensure
    book.close
  end
end