class Roo::Excel2003XML
Constants
- A_ROO_TYPE
Public Class Methods
initialization and opening of a spreadsheet file values for packed: :zip
# File lib/roo/xls/excel_2003_xml.rb, line 9 def initialize(filename, options = {}) packed = options[:packed] file_warning = options[:file_warning] || :error Dir.mktmpdir do |tmpdir| filename = download_uri(filename, tmpdir) if uri?(filename) filename = unzip(filename, tmpdir) if packed == :zip file_type_check(filename, '.xml', 'an Excel 2003 XML', file_warning) @filename = filename unless File.file?(@filename) raise IOError, "file #{@filename} does not exist" end @doc = ::Roo::Utils.load_xml(@filename) end namespace = @doc.namespaces.select { |_, urn| urn == 'urn:schemas-microsoft-com:office:spreadsheet' }.keys.last @namespace = (namespace.nil? || namespace.empty?) ? 'ss' : namespace.split(':').last super(filename, options) @formula = {} @style = {} @style_defaults = Hash.new { |h, k| h[k] = [] } @style_definitions = {} read_styles end
Private Class Methods
# File lib/roo/xls/excel_2003_xml.rb, line 290 def self.oo_type_2_roo_type(ootype) A_ROO_TYPE[ootype] end
Public Instance Methods
Returns the content of a spreadsheet-cell. (1,1) is the upper left corner. (1,1), (1,'A'), ('A',1), ('a',1) all refers to the cell at the first line and first row.
# File lib/roo/xls/excel_2003_xml.rb, line 38 def cell(row, col, sheet = nil) sheet ||= @default_sheet read_cells(sheet) row, col = normalize(row, col) if celltype(row, col, sheet) == :date yyyy, mm, dd = @cell[sheet][[row, col]].split('-') return Date.new(yyyy.to_i, mm.to_i, dd.to_i) end @cell[sheet][[row, col]] end
returns the type of a cell:
-
:float
-
:string
-
:date
-
:percentage
-
:formula
-
:time
-
:datetime
# File lib/roo/xls/excel_2003_xml.rb, line 93 def celltype(row, col, sheet = nil) sheet ||= @default_sheet read_cells(sheet) row, col = normalize(row, col) if @formula[sheet][[row, col]] return :formula else @cell_type[sheet][[row, col]] end end
Given a cell, return the cell's style
# File lib/roo/xls/excel_2003_xml.rb, line 77 def font(row, col, sheet = nil) sheet ||= @default_sheet read_cells(sheet) row, col = normalize(row, col) style_name = @style[sheet][[row, col]] || @style_defaults[sheet][col - 1] || 'Default' @style_definitions[style_name] end
Returns the formula at (row,col). Returns nil if there is no formula. The method formula?
checks if there is a formula.
# File lib/roo/xls/excel_2003_xml.rb, line 52 def formula(row, col, sheet = nil) sheet ||= @default_sheet read_cells(sheet) row, col = normalize(row, col) @formula[sheet][[row, col]] && @formula[sheet][[row, col]]['oooc:'.length..-1] end
returns each formula in the selected sheet as an array of elements
- row, col, formula
# File lib/roo/xls/excel_2003_xml.rb, line 132 def formulas(sheet = nil) theformulas = [] sheet ||= @default_sheet read_cells(sheet) first_row(sheet).upto(last_row(sheet)) do|row| first_column(sheet).upto(last_column(sheet)) do|col| if formula?(row, col, sheet) f = [row, col, formula(row, col, sheet)] theformulas << f end end end theformulas end
version of the openoffice document at 2007 this is always “1.0”
# File lib/roo/xls/excel_2003_xml.rb, line 112 def officeversion oo_version @officeversion end
# File lib/roo/xls/excel_2003_xml.rb, line 104 def sheets @doc.xpath("/#{@namespace}:Workbook/#{@namespace}:Worksheet").map do |sheet| sheet["#{@namespace}:Name"] end end
shows the internal representation of all cells mainly for debugging purposes
# File lib/roo/xls/excel_2003_xml.rb, line 119 def to_s(sheet = nil) sheet ||= @default_sheet read_cells(sheet) @cell[sheet].inspect end
Private Instance Methods
helper method to convert compressed spaces and other elements within an text into a string
# File lib/roo/xls/excel_2003_xml.rb, line 296 def children_to_string(children) result = '' children.each do|child| if child.text? result = result + child.content else if child.name == 's' compressed_spaces = child['c'].to_i # no explicit number means a count of 1: if compressed_spaces == 0 compressed_spaces = 1 end result = result + ' ' * compressed_spaces else result = result + child.content end end end result end
read the version of the OO-Version
# File lib/roo/xls/excel_2003_xml.rb, line 150 def oo_version @doc.find("//*[local-name()='document-content']").each do |office| @officeversion = office['version'] end end
read all cells in the selected sheet
# File lib/roo/xls/excel_2003_xml.rb, line 189 def read_cells(sheet = nil) sheet ||= @default_sheet validate_sheet!(sheet) return if @cells_read[sheet] sheet_found = false @doc.xpath("/#{@namespace}:Workbook/#{@namespace}:Worksheet[@#{@namespace}:Name='#{sheet}']").each do |ws| sheet_found = true column_styles = {} # Column Styles col = 1 ws.xpath("./#{@namespace}:Table/#{@namespace}:Column").each do |c| skip_to_col = c["#{@namespace}:Index"].to_i col = skip_to_col if skip_to_col > 0 col_style_name = c["#{@namespace}:StyleID"] column_styles[col] = col_style_name unless col_style_name.nil? col += 1 end # Rows row = 1 ws.xpath("./#{@namespace}:Table/#{@namespace}:Row").each do |r| skip_to_row = r["#{@namespace}:Index"].to_i row = skip_to_row if skip_to_row > 0 # Excel uses a 'Span' attribute on a 'Row' to indicate the presence of # empty rows to skip. skip_next_rows = r["#{@namespace}:Span"].to_i row_style_name = r["#{@namespace}:StyleID"] # Cells col = 1 r.xpath("./#{@namespace}:Cell").each do |c| skip_to_col = c["#{@namespace}:Index"].to_i col = skip_to_col if skip_to_col > 0 skip_next_cols = c["#{@namespace}:MergeAcross"].to_i cell_style_name = c["#{@namespace}:StyleID"] style_name = cell_style_name || row_style_name || column_styles[col] # Cell Data c.xpath("./#{@namespace}:Data").each do |cell| formula = cell['Formula'] value_type = cell["#{@namespace}:Type"].downcase.to_sym v = cell.content str_v = v case value_type when :number v = v.to_f value_type = :float when :datetime if v =~ /^1899-12-31T(\d{2}:\d{2}:\d{2})/ v = Regexp.last_match[1] value_type = :time elsif v =~ /([^T]+)T00:00:00.000/ v = Regexp.last_match[1] value_type = :date end when :boolean v = cell['boolean-value'] end set_cell_values(sheet, col, row, 0, v, value_type, formula, cell, str_v, style_name) end col += (skip_next_cols + 1) end row += (skip_next_rows + 1) end end unless sheet_found raise RangeError, "Unable to find sheet #{sheet} for reading" end @cells_read[sheet] = true end
# File lib/roo/xls/excel_2003_xml.rb, line 266 def read_styles @doc.xpath("/#{@namespace}:Workbook/#{@namespace}:Styles/#{@namespace}:Style").each do |style| style_id = style["#{@namespace}:ID"] font = style.at_xpath("./#{@namespace}:Font") unless font.nil? @style_definitions[style_id] = Roo::Excel2003XML::Font.new @style_definitions[style_id].bold = font["#{@namespace}:Bold"] @style_definitions[style_id].italic = font["#{@namespace}:Italic"] @style_definitions[style_id].underline = font["#{@namespace}:Underline"] @style_definitions[style_id].color = font["#{@namespace}:Color"] @style_definitions[style_id].name = font["#{@namespace}:FontName"] @style_definitions[style_id].size = font["#{@namespace}:Size"] end end end
helper function to set the internal representation of cells
# File lib/roo/xls/excel_2003_xml.rb, line 157 def set_cell_values(sheet, x, y, i, v, value_type, formula, _table_cell, str_v, style_name) key = [y, x + i] @cell_type[sheet] = {} unless @cell_type[sheet] @cell_type[sheet][key] = value_type @formula[sheet] = {} unless @formula[sheet] @formula[sheet][key] = formula if formula @cell[sheet] = {} unless @cell[sheet] @style[sheet] = {} unless @style[sheet] @style[sheet][key] = style_name @cell[sheet][key] = case @cell_type[sheet][key] when :float v.to_f when :string str_v when :datetime DateTime.parse(v) when :percentage v.to_f # when :time # hms = v.split(':') # hms[0].to_i*3600 + hms[1].to_i*60 + hms[2].to_i else v end end