class OoxmlParser::Parser

Basic class for OoxmlParser

Public Class Methods

parse(path_to_file) click to toggle source

Base method to parse document of any type @param path_to_file [String] file @return [CommonDocumentStructure] structure of doc

# File lib/ooxml_parser/common_parser/parser.rb, line 24
def self.parse(path_to_file)
  Parser.parse_format(path_to_file) do
    format = Parser.recognize_folder_format
    case format
    when :docx
      DocumentStructure.parse
    when :xlsx
      XLSXWorkbook.new.parse
    when :pptx
      Presentation.new.parse
    else
      warn "#{path_to_file} is a simple zip file without OOXML content"
    end
  end
end
parse_format(path_to_file) { || ... } click to toggle source

Base method to yield parse document of any type @param path_to_file [String] file @return [CommonDocumentStructure] structure of doc

# File lib/ooxml_parser/common_parser/parser.rb, line 9
def self.parse_format(path_to_file)
  return nil if OOXMLDocumentObject.encrypted_file?(path_to_file)

  path_to_zip_file = OOXMLDocumentObject.copy_file_and_rename_to_zip(path_to_file)
  OOXMLDocumentObject.path_to_folder = path_to_zip_file.sub(File.basename(path_to_zip_file), '')
  OOXMLDocumentObject.unzip_file(path_to_zip_file, OOXMLDocumentObject.path_to_folder)
  model = yield
  model.file_path = path_to_file if model
  FileUtils.rm_rf(OOXMLDocumentObject.path_to_folder)
  model
end
recognize_folder_format(directory = OOXMLDocumentObject.path_to_folder) click to toggle source

Recognize folder format @param directory [String] path to dirctory @return [Symbol] type of document

# File lib/ooxml_parser/common_parser/parser.rb, line 43
def self.recognize_folder_format(directory = OOXMLDocumentObject.path_to_folder)
  return :docx if Dir.exist?("#{directory}/word")
  return :xlsx if Dir.exist?("#{directory}/xl")
  return :pptx if Dir.exist?("#{directory}/ppt")
end