class ChupaText::Decomposers::OfficeOpenXML

Public Instance Methods

decompose(data, &block) click to toggle source
# File lib/chupa-text/decomposers/office-open-xml.rb, line 38
def decompose(data, &block)
  unzip(data) do |zip|
    context = {
      data: data,
      attributes: {},
    }
    start_decompose(context)
    zip.each do |entry|
      next unless entry.file?
      case entry.zip_path
      when "docProps/app.xml"
        listener = AttributesListener.new(context[:attributes])
        parse(entry.file_data, listener)
      when "docProps/core.xml"
        listener = AttributesListener.new(context[:attributes])
        parse(entry.file_data, listener)
      else
        process_entry(entry, context)
      end
    end
    finish_decompose(context, &block)
  end
end
target?(data) click to toggle source
# File lib/chupa-text/decomposers/office-open-xml.rb, line 25
def target?(data)
  @extensions.include?(data.extension) or
    @mime_types.include?(data.mime_type)
end
target_score(data) click to toggle source
# File lib/chupa-text/decomposers/office-open-xml.rb, line 30
def target_score(data)
  if target?(data)
    -1
  else
    nil
  end
end

Private Instance Methods

extract_text(entry, texts) click to toggle source
# File lib/chupa-text/decomposers/office-open-xml.rb, line 68
def extract_text(entry, texts)
  listener = TextListener.new(texts, @namespace_uri)
  parse(entry.file_data, listener)
end
log_tag() click to toggle source
# File lib/chupa-text/decomposers/office-open-xml.rb, line 73
def log_tag
  "[decomposer][office-open-xml]"
end
parse(input, listener) click to toggle source
# File lib/chupa-text/decomposers/office-open-xml.rb, line 63
def parse(input, listener)
  parser = SAXParser.new(input, listener)
  parser.parse
end