class ChupaText::Decomposers::OfficeOpenXML
Public Instance Methods
decompose(data, &block)
click to toggle source
# File lib/chupa-text/decomposers/office-open-xml.rb, line 38 def decompose(data, &block) unzip(data) do |zip| context = { data: data, attributes: {}, } start_decompose(context) zip.each do |entry| next unless entry.file? case entry.zip_path when "docProps/app.xml" listener = AttributesListener.new(context[:attributes]) parse(entry.file_data, listener) when "docProps/core.xml" listener = AttributesListener.new(context[:attributes]) parse(entry.file_data, listener) else process_entry(entry, context) end end finish_decompose(context, &block) end end
target?(data)
click to toggle source
# File lib/chupa-text/decomposers/office-open-xml.rb, line 25 def target?(data) @extensions.include?(data.extension) or @mime_types.include?(data.mime_type) end
target_score(data)
click to toggle source
# File lib/chupa-text/decomposers/office-open-xml.rb, line 30 def target_score(data) if target?(data) -1 else nil end end
Private Instance Methods
extract_text(entry, texts)
click to toggle source
# File lib/chupa-text/decomposers/office-open-xml.rb, line 68 def extract_text(entry, texts) listener = TextListener.new(texts, @namespace_uri) parse(entry.file_data, listener) end
log_tag()
click to toggle source
# File lib/chupa-text/decomposers/office-open-xml.rb, line 73 def log_tag "[decomposer][office-open-xml]" end
parse(input, listener)
click to toggle source
# File lib/chupa-text/decomposers/office-open-xml.rb, line 63 def parse(input, listener) parser = SAXParser.new(input, listener) parser.parse end