class NdrImport::File::Excel

This class is an excel file handler that returns tables (worksheets). It provides a file reader method and methods to cast raw values appropriately. These methods can be overridden or aliased as required.

Public Instance Methods

tables() { |sheet_name, excel_rows(workbook, sheet_name)| ... } click to toggle source

Iterate through the file table by table, yielding each one in turn.

# File lib/ndr_import/file/excel.rb, line 20
def tables
  return enum_for(:tables) unless block_given?

  workbook = load_workbook(@filename)
  workbook.sheets.each do |sheet_name|
    yield sheet_name, excel_rows(workbook, sheet_name)
  end
end

Protected Instance Methods

cast_excel_datetime_as_date(raw_value) click to toggle source
# File lib/ndr_import/file/excel.rb, line 48
def cast_excel_datetime_as_date(raw_value)
  raw_value.to_s(:db)
end
cast_excel_value(raw_value) click to toggle source
# File lib/ndr_import/file/excel.rb, line 31
def cast_excel_value(raw_value)
  return raw_value if raw_value.nil?

  if raw_value.is_a?(Date) || raw_value.is_a?(DateTime) || raw_value.is_a?(Time)
    cast_excel_datetime_as_date(raw_value)
  elsif raw_value.is_a?(Float)
    if raw_value.to_f == raw_value.to_i
      # Whole number
      return raw_value.to_i.to_s
    else
      return raw_value.to_f.to_s
    end
  else
    return raw_value.to_s.strip
  end
end

Private Instance Methods

copy_file(source, dest) click to toggle source

Note that this method can produce insecure calls. All callers must protect their arguments. Arguments:

* source - SafeFile
* dest - SafeFile
# File lib/ndr_import/file/excel.rb, line 125
def copy_file(source, dest)
  # SECURE: TVB Mon Aug 13 13:53:02 BST 2012 : Secure SafePath will do the security checks
  # before it is converted to string.
  # SafeFile will make sure that the arguments are actually SafePath
  FileUtils.mkdir_p(SafeFile.safepath_to_string(SafeFile.dirname(dest)))
  FileUtils.cp(SafeFile.safepath_to_string(source), SafeFile.safepath_to_string(dest))
end
excel_rows(workbook, sheet_name, &block) click to toggle source

Iterate through the sheet line by line, yielding each one in turn.

# File lib/ndr_import/file/excel.rb, line 55
def excel_rows(workbook, sheet_name, &block)
  return enum_for(:excel_rows, workbook, sheet_name) unless block

  if workbook.is_a?(Roo::Excelx)
    # FIXME: xlsx_rows(sheet, &block) should produce the same output as xls_rows
    xls_rows(workbook, sheet_name, &block)
  else
    xls_rows(workbook, sheet_name, &block)
  end
end
load_workbook(path) click to toggle source
# File lib/ndr_import/file/excel.rb, line 89
def load_workbook(path)
  case SafeFile.extname(path).downcase
  when '.xls'
    Roo::Excel.new(SafeFile.safepath_to_string(path))
  when '.xlsm', '.xlsx'
    if @options['file_password']
      Roo::Excelx.new(StringIO.new(decrypted_file_string(path, @options['file_password'])))
    else
      Roo::Excelx.new(SafeFile.safepath_to_string(path))
    end
  else
    raise "Received file path with unexpected extension #{SafeFile.extname(path)}"
  end
rescue Ole::Storage::FormatError => e
  # TODO: Do we need to remove the new_file after using it?

  # try to load the .xls file as an .xlsx file, useful for sources like USOM
  # roo check file extensions in file_type_check (GenericSpreadsheet),
  # so we create a duplicate file in xlsx extension
  raise e.message unless /(.*)\.xls$/.match(path)

  new_file_name = SafeFile.basename(path).gsub(/(.*)\.xls$/, '\1_amend.xlsx')
  new_file_path = SafeFile.dirname(path).join(new_file_name)
  copy_file(path, new_file_path)

  load_workbook(new_file_path)
rescue RuntimeError, ::Zip::Error => e
  raise ["Unable to read the file '#{path}'", e.message].join('; ')
end
xls_rows(workbook, sheet_name) { |map { |col| cast_excel_value(cell) }| ... } click to toggle source

Iterate through an xls sheet line by line, yielding each one in turn.

# File lib/ndr_import/file/excel.rb, line 67
def xls_rows(workbook, sheet_name)
  return enum_for(:xls_rows, workbook, sheet_name) unless block_given?

  return unless workbook.first_row(sheet_name)
  rows    = workbook.first_row(sheet_name)..workbook.last_row(sheet_name)
  columns = workbook.first_column(sheet_name)..workbook.last_column(sheet_name)

  rows.each do |row|
    yield columns.map { |col| cast_excel_value(workbook.cell(row, col, sheet_name)) }
  end
end
xlsx_rows(workbook, sheet_name) { |map { |cell| cast_excel_value(value) }| ... } click to toggle source

Iterate through an xlsx sheet line by line, yielding each one in turn. This method uses streaming github.com/roo-rb/roo#excel-xlsx-and-xlsm-support

# File lib/ndr_import/file/excel.rb, line 81
def xlsx_rows(workbook, sheet_name)
  return enum_for(:xlsx_rows, workbook, sheet_name) unless block_given?

  workbook.each_row_streaming(sheet: sheet_name, pad_cells: true) do |row|
    yield row.map { |cell| cast_excel_value(cell.value) }
  end
end