class Roo::Excel2003XML

Constants

A_ROO_TYPE

Public Class Methods

new(filename, options = {}) click to toggle source

initialization and opening of a spreadsheet file values for packed: :zip

Calls superclass method
# File lib/roo/xls/excel_2003_xml.rb, line 9
def initialize(filename, options = {})
  packed = options[:packed]
  file_warning = options[:file_warning] || :error

  Dir.mktmpdir do |tmpdir|
    filename = download_uri(filename, tmpdir) if uri?(filename)
    filename = unzip(filename, tmpdir) if packed == :zip

    file_type_check(filename, '.xml', 'an Excel 2003 XML', file_warning)
    @filename = filename
    unless File.file?(@filename)
      raise IOError, "file #{@filename} does not exist"
    end
    @doc = ::Roo::Utils.load_xml(@filename)
  end
  namespace = @doc.namespaces.select { |_, urn| urn == 'urn:schemas-microsoft-com:office:spreadsheet' }.keys.last
  @namespace = (namespace.nil? || namespace.empty?) ? 'ss' : namespace.split(':').last
  super(filename, options)
  @formula = {}
  @style = {}
  @style_defaults = Hash.new { |h, k| h[k] = [] }
  @style_definitions = {}
  read_styles
end

Private Class Methods

oo_type_2_roo_type(ootype) click to toggle source
# File lib/roo/xls/excel_2003_xml.rb, line 290
def self.oo_type_2_roo_type(ootype)
  A_ROO_TYPE[ootype]
end

Public Instance Methods

cell(row, col, sheet = nil) click to toggle source

Returns the content of a spreadsheet-cell. (1,1) is the upper left corner. (1,1), (1,'A'), ('A',1), ('a',1) all refers to the cell at the first line and first row.

# File lib/roo/xls/excel_2003_xml.rb, line 38
def cell(row, col, sheet = nil)
  sheet ||= @default_sheet
  read_cells(sheet)
  row, col = normalize(row, col)
  if celltype(row, col, sheet) == :date
    yyyy, mm, dd = @cell[sheet][[row, col]].split('-')
    return Date.new(yyyy.to_i, mm.to_i, dd.to_i)
  end
  @cell[sheet][[row, col]]
end
celltype(row, col, sheet = nil) click to toggle source

returns the type of a cell:

  • :float

  • :string

  • :date

  • :percentage

  • :formula

  • :time

  • :datetime

# File lib/roo/xls/excel_2003_xml.rb, line 93
def celltype(row, col, sheet = nil)
  sheet ||= @default_sheet
  read_cells(sheet)
  row, col = normalize(row, col)
  if @formula[sheet][[row, col]]
    return :formula
  else
    @cell_type[sheet][[row, col]]
  end
end
font(row, col, sheet = nil) click to toggle source

Given a cell, return the cell's style

# File lib/roo/xls/excel_2003_xml.rb, line 77
def font(row, col, sheet = nil)
  sheet ||= @default_sheet
  read_cells(sheet)
  row, col = normalize(row, col)
  style_name = @style[sheet][[row, col]] || @style_defaults[sheet][col - 1] || 'Default'
  @style_definitions[style_name]
end
formula(row, col, sheet = nil) click to toggle source

Returns the formula at (row,col). Returns nil if there is no formula. The method formula? checks if there is a formula.

# File lib/roo/xls/excel_2003_xml.rb, line 52
def formula(row, col, sheet = nil)
  sheet ||= @default_sheet
  read_cells(sheet)
  row, col = normalize(row, col)
  @formula[sheet][[row, col]] && @formula[sheet][[row, col]]['oooc:'.length..-1]
end
Also aliased as: formula?
formula?(row, col, sheet = nil)
Alias for: formula
formulas(sheet = nil) click to toggle source

returns each formula in the selected sheet as an array of elements

row, col, formula
# File lib/roo/xls/excel_2003_xml.rb, line 132
def formulas(sheet = nil)
  theformulas = []
  sheet ||= @default_sheet
  read_cells(sheet)
  first_row(sheet).upto(last_row(sheet)) do|row|
    first_column(sheet).upto(last_column(sheet)) do|col|
      if formula?(row, col, sheet)
        f = [row, col, formula(row, col, sheet)]
        theformulas << f
      end
    end
  end
  theformulas
end
officeversion() click to toggle source

version of the openoffice document at 2007 this is always “1.0”

# File lib/roo/xls/excel_2003_xml.rb, line 112
def officeversion
  oo_version
  @officeversion
end
sheets() click to toggle source
# File lib/roo/xls/excel_2003_xml.rb, line 104
def sheets
  @doc.xpath("/#{@namespace}:Workbook/#{@namespace}:Worksheet").map do |sheet|
    sheet["#{@namespace}:Name"]
  end
end
to_s(sheet = nil) click to toggle source

shows the internal representation of all cells mainly for debugging purposes

# File lib/roo/xls/excel_2003_xml.rb, line 119
def to_s(sheet = nil)
  sheet ||= @default_sheet
  read_cells(sheet)
  @cell[sheet].inspect
end

Private Instance Methods

children_to_string(children) click to toggle source

helper method to convert compressed spaces and other elements within an text into a string

# File lib/roo/xls/excel_2003_xml.rb, line 296
def children_to_string(children)
  result = ''
  children.each do|child|
    if child.text?
      result = result + child.content
    else
      if child.name == 's'
        compressed_spaces = child['c'].to_i
        # no explicit number means a count of 1:
        if compressed_spaces == 0
          compressed_spaces = 1
        end
        result = result + ' ' * compressed_spaces
      else
        result = result + child.content
      end
    end
  end
  result
end
oo_version() click to toggle source

read the version of the OO-Version

# File lib/roo/xls/excel_2003_xml.rb, line 150
def oo_version
  @doc.find("//*[local-name()='document-content']").each do |office|
    @officeversion = office['version']
  end
end
read_cells(sheet = nil) click to toggle source

read all cells in the selected sheet

# File lib/roo/xls/excel_2003_xml.rb, line 189
def read_cells(sheet = nil)
  sheet ||= @default_sheet
  validate_sheet!(sheet)
  return if @cells_read[sheet]
  sheet_found = false

  @doc.xpath("/#{@namespace}:Workbook/#{@namespace}:Worksheet[@#{@namespace}:Name='#{sheet}']").each do |ws|
    sheet_found = true
    column_styles = {}

    # Column Styles
    col = 1
    ws.xpath("./#{@namespace}:Table/#{@namespace}:Column").each do |c|
      skip_to_col = c["#{@namespace}:Index"].to_i
      col = skip_to_col if skip_to_col > 0
      col_style_name = c["#{@namespace}:StyleID"]
      column_styles[col] = col_style_name unless col_style_name.nil?
      col += 1
    end

    # Rows
    row = 1
    ws.xpath("./#{@namespace}:Table/#{@namespace}:Row").each do |r|
      skip_to_row = r["#{@namespace}:Index"].to_i
      row = skip_to_row if skip_to_row > 0

      # Excel uses a 'Span' attribute on a 'Row' to indicate the presence of
      # empty rows to skip.
      skip_next_rows = r["#{@namespace}:Span"].to_i

      row_style_name = r["#{@namespace}:StyleID"]

      # Cells
      col = 1
      r.xpath("./#{@namespace}:Cell").each do |c|
        skip_to_col = c["#{@namespace}:Index"].to_i
        col = skip_to_col if skip_to_col > 0

        skip_next_cols = c["#{@namespace}:MergeAcross"].to_i

        cell_style_name = c["#{@namespace}:StyleID"]
        style_name = cell_style_name || row_style_name || column_styles[col]

        # Cell Data
        c.xpath("./#{@namespace}:Data").each do |cell|
          formula = cell['Formula']
          value_type = cell["#{@namespace}:Type"].downcase.to_sym
          v = cell.content
          str_v = v
          case value_type
          when :number
            v = v.to_f
            value_type = :float
          when :datetime
            if v =~ /^1899-12-31T(\d{2}:\d{2}:\d{2})/
              v = Regexp.last_match[1]
              value_type = :time
            elsif v =~ /([^T]+)T00:00:00.000/
              v = Regexp.last_match[1]
              value_type = :date
            end
          when :boolean
            v = cell['boolean-value']
          end
          set_cell_values(sheet, col, row, 0, v, value_type, formula, cell, str_v, style_name)
        end
        col += (skip_next_cols + 1)
      end
      row += (skip_next_rows + 1)
    end
  end
  unless sheet_found
    raise RangeError, "Unable to find sheet #{sheet} for reading"
  end
  @cells_read[sheet] = true
end
read_styles() click to toggle source
# File lib/roo/xls/excel_2003_xml.rb, line 266
def read_styles
  @doc.xpath("/#{@namespace}:Workbook/#{@namespace}:Styles/#{@namespace}:Style").each do |style|
    style_id = style["#{@namespace}:ID"]
    font = style.at_xpath("./#{@namespace}:Font")
    unless font.nil?
      @style_definitions[style_id] = Roo::Excel2003XML::Font.new
      @style_definitions[style_id].bold      = font["#{@namespace}:Bold"]
      @style_definitions[style_id].italic    = font["#{@namespace}:Italic"]
      @style_definitions[style_id].underline = font["#{@namespace}:Underline"]
      @style_definitions[style_id].color     = font["#{@namespace}:Color"]
      @style_definitions[style_id].name      = font["#{@namespace}:FontName"]
      @style_definitions[style_id].size      = font["#{@namespace}:Size"]
    end
  end
end
set_cell_values(sheet, x, y, i, v, value_type, formula, _table_cell, str_v, style_name) click to toggle source

helper function to set the internal representation of cells

# File lib/roo/xls/excel_2003_xml.rb, line 157
def set_cell_values(sheet, x, y, i, v, value_type, formula, _table_cell, str_v, style_name)
  key = [y, x + i]
  @cell_type[sheet] = {} unless @cell_type[sheet]
  @cell_type[sheet][key] = value_type
  @formula[sheet] = {} unless @formula[sheet]
  @formula[sheet][key] = formula  if formula
  @cell[sheet]    = {} unless @cell[sheet]
  @style[sheet] = {} unless @style[sheet]
  @style[sheet][key] = style_name
  @cell[sheet][key] =
    case @cell_type[sheet][key]
    when :float
      v.to_f
    when :string
      str_v
    when :datetime
      DateTime.parse(v)
    when :percentage
      v.to_f
    # when :time
    #   hms = v.split(':')
    #   hms[0].to_i*3600 + hms[1].to_i*60 + hms[2].to_i
    else
      v
    end
end