class ChupaText::Decomposers::OfficeOpenXMLWorkbook
Public Class Methods
new(options={})
click to toggle source
Calls superclass method
# File lib/chupa-text/decomposers/office-open-xml-workbook.rb, line 24 def initialize(options={}) super @extensions = [ "xlsx", "xlsm", "xltx", "xltm", ] @mime_types = [ "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "application/vnd.ms-excel.sheet.macroEnabled.12", "application/vnd.openxmlformats-officedocument.spreadsheetml.template", "application/vnd.ms-excel.template.macroEnabled.12", ] @namespace_uri = "http://schemas.openxmlformats.org/spreadsheetml/2006/main" end
Private Instance Methods
finish_decompose(context) { |metadata| ... }
click to toggle source
# File lib/chupa-text/decomposers/office-open-xml-workbook.rb, line 65 def finish_decompose(context, &block) metadata = TextData.new("", source_data: context[:data]) context[:attributes].each do |name, value| metadata[name] = value end yield(metadata) shared_strings = context[:shared_strings] sheets = context[:sheets].sort_by(&:first).collect(&:last) sheet_names = context[:sheet_names] sheets.each_with_index do |sheet, i| sheet_text = "" sheet.each do |row| row_texts = row.collect do |cell| case cell when Integer shared_strings[cell] else cell end end sheet_text << row_texts.join("\t") << "\n" end text_data = TextData.new(sheet_text, source_data: context[:data]) text_data["index"] = i name = sheet_names[i] text_data["name"] = name if name yield(text_data) end end
log_tag()
click to toggle source
# File lib/chupa-text/decomposers/office-open-xml-workbook.rb, line 96 def log_tag "#{super}[workbook]" end
process_entry(entry, context)
click to toggle source
# File lib/chupa-text/decomposers/office-open-xml-workbook.rb, line 49 def process_entry(entry, context) case entry.zip_path when "xl/sharedStrings.xml" extract_text(entry, context[:shared_strings]) when "xl/workbook.xml" listener = WorkbookListener.new(context[:sheet_names]) parse(entry.file_data, listener) when /\Axl\/worksheets\/sheet(\d+)\.xml\z/ nth_sheet = Integer($1, 10) sheet = [] listener = SheetListener.new(sheet) parse(entry.file_data, listener) context[:sheets] << [nth_sheet, sheet] end end
start_decompose(context)
click to toggle source
# File lib/chupa-text/decomposers/office-open-xml-workbook.rb, line 43 def start_decompose(context) context[:shared_strings] = [] context[:sheet_names] = [] context[:sheets] = [] end