class OoxmlParser::DocumentStructure

Basic class for DocumentStructure

Attributes

default_paragraph_style[RW]
default_run_style[RW]
default_table_paragraph_style[RW]
default_table_run_style[RW]
background[RW]

@return [DocumentBackground] background of document

comments[RW]

@return [Comments] comment of document

comments_document[RW]

@return [CommentsDocument] comments of whole document

comments_extended[RW]

@return [CommentsExtended] extended comments

document_properties[RW]

@return [DocumentProperties] properties of document

elements[RW]

@return [Array<OOXMLDocumentObject>] list of elements

notes[RW]

@return [Note] notes of document

numbering[RW]

@return [Numbering] store numbering data

page_properties[RW]

@return [PageProperties] properties of document

relationships[RW]

@return [Relationships] relationships

settings[RW]

@return [DocumentSettings] settings

styles[RW]

@return [Styles] styles of document

theme[RW]

@return [PresentationTheme] theme of docx

theme_colors[RW]

@return [PresentationTheme] theme of docx

Public Class Methods

new() click to toggle source
# File lib/ooxml_parser/docx_parser/docx_data/document_structure.rb, line 48
def initialize
  @elements = []
  @notes = []
  @document_properties = DocumentProperties.new
  @comments = []
  super
end
parse() click to toggle source

Parse docx file @return [DocumentStructure] parsed structure

# File lib/ooxml_parser/docx_parser/docx_data/document_structure.rb, line 160
def self.parse
  doc_structure = DocumentStructure.new
  doc_structure.content_types = ContentTypes.new(parent: doc_structure).parse
  OOXMLDocumentObject.root_subfolder = 'word/'
  OOXMLDocumentObject.xmls_stack = []
  @comments = []
  DocumentStructure.default_paragraph_style = DocxParagraph.new
  DocumentStructure.default_run_style = DocxParagraphRun.new(parent: doc_structure)
  doc_structure.theme = PresentationTheme.parse('word/theme/theme1.xml')
  doc_structure.relationships = Relationships.new(parent: self).parse_file("#{OOXMLDocumentObject.path_to_folder}word/_rels/document.xml.rels")
  doc_structure.parse_styles
  number = 0
  OOXMLDocumentObject.add_to_xmls_stack('word/document.xml')
  doc = doc_structure.parse_xml(OOXMLDocumentObject.current_xml)
  doc.search('//w:document').each do |document|
    document.xpath('w:background').each do |background|
      doc_structure.background = DocumentBackground.new(parent: doc_structure).parse(background)
    end
    document.xpath('w:body').each do |body|
      body.xpath('*').each do |element|
        case element.name
        when 'p'
          child = element.child
          unless child.nil? && doc_structure.elements.last.instance_of?(Table)
            paragraph_style = DocumentStructure.default_paragraph_style.dup.parse(element, number, DocumentStructure.default_run_style, parent: doc_structure)
            number += 1
            doc_structure.elements << paragraph_style.dup
          end
        when 'tbl'
          table = Table.new(parent: doc_structure).parse(element,
                                                         number,
                                                         TableProperties.new)
          number += 1
          doc_structure.elements << table
        when 'sdt'
          doc_structure.elements << StructuredDocumentTag.new(parent: doc_structure).parse(element)
        end
      end
      body.xpath('w:sectPr').each do |sect_pr|
        doc_structure.page_properties = PageProperties.new(parent: doc_structure).parse(sect_pr,
                                                                                        DocumentStructure.default_paragraph_style,
                                                                                        DocumentStructure.default_run_style)
        doc_structure.notes = doc_structure.page_properties.notes # keep copy of notes to compatibility with previous docx models
      end
    end
  end
  OOXMLDocumentObject.xmls_stack.pop
  doc_structure.document_properties = DocumentProperties.new(parent: doc_structure).parse
  doc_structure.comments = Comments.new(parent: doc_structure).parse
  doc_structure.comments_extended = CommentsExtended.new(parent: doc_structure).parse
  doc_structure.comments_document = Comments.new(parent: doc_structure,
                                                 file: "#{OOXMLDocumentObject.path_to_folder}word/#{doc_structure.relationships.target_by_type('commentsDocument').first}")
                                            .parse
  doc_structure.settings = DocumentSettings.new(parent: doc_structure).parse
  doc_structure
end

Public Instance Methods

==(other) click to toggle source

Compare this object to other @param other [Object] any other object @return [True, False] result of comparision

# File lib/ooxml_parser/docx_parser/docx_data/document_structure.rb, line 61
def ==(other)
  @elements == other.elements &&
    @page_properties == other.page_properties &&
    @notes == other.notes &&
    @background == other.background &&
    @document_properties == other.document_properties &&
    @comments == other.comments
end
document_styles() click to toggle source

@return [Array<DocumentStyle>] style of documents

# File lib/ooxml_parser/docx_parser/docx_data/document_structure.rb, line 154
def document_styles
  styles.styles
end
element_by_description(location: :canvas, type: :docx_paragraph) click to toggle source

Get element by it's type @param location [Symbol] location of object @param type [Symbol] type of object @return [OOXMLDocumentObject]

# File lib/ooxml_parser/docx_parser/docx_data/document_structure.rb, line 74
def element_by_description(location: :canvas, type: :docx_paragraph)
  case location
  when :canvas
    case type
    when :table
      elements[1].rows[0].cells[0].elements
    when :docx_paragraph, :simple, :paragraph
      elements
    when :shape
      elements[0].nonempty_runs.first.alternate_content.office2007_content.data.text_box
    else
      raise 'Wrong location(Need One of ":table", ":paragraph", ":shape")'
    end
  when :footer
    case type
    when :table
      note_by_description(:footer1).elements[0].rows[0].cells[0].elements
    when :docx_paragraph, :simple, :paragraph
      note_by_description(:footer1).elements
    when :shape
      note_by_description(:footer1).elements[0].nonempty_runs.first.alternate_content.office2007_content.data.text_box
    else
      raise 'Wrong location(Need One of ":table", ":simple", ":shape")'
    end
  when :header
    case type
    when :table
      note_by_description(:header1).elements[0].rows[0].cells[0].elements
    when :docx_paragraph, :simple, :paragraph
      note_by_description(:header1).elements
    when :shape
      note_by_description(:header1).elements[0].nonempty_runs.first.alternate_content.office2007_content.data.text_box
    else
      raise 'Wrong location(Need One of ":table", ":simple", ":shape")'
    end
  when :comment
    comments[0].paragraphs
  else
    raise 'Wrong global location(Need One of ":canvas", ":footer", ":header", ":comment")'
  end
end
note_by_description(type) click to toggle source

Get note by it's description @param type [Symbol] note type @return [Note]

# File lib/ooxml_parser/docx_parser/docx_data/document_structure.rb, line 119
def note_by_description(type)
  notes.each do |note|
    return note if note.type.to_sym == type
  end
  raise 'There isn\'t this type of the note'
end
outline(location: :canvas, type: :simple, levels_count: 1) click to toggle source

Return outline type @param location [Symbol] location of object @param type [Symbol] type of object @param levels_count [Integer] count of levels to detect @return [Array<String,String>] type of outline

# File lib/ooxml_parser/docx_parser/docx_data/document_structure.rb, line 143
def outline(location: :canvas, type: :simple, levels_count: 1)
  elements = element_by_description(location: location, type: type)
  set = []
  levels_count.times do |col|
    set[0] = elements[col].numbering.abstruct_numbering.level_list[col].numbering_format.value
    set[1] = elements[col].numbering.abstruct_numbering.level_list[col].text.value
  end
  set
end
parse_default_style() click to toggle source

Parse default style @return [void]

# File lib/ooxml_parser/docx_parser/docx_data/document_structure.rb, line 219
def parse_default_style
  doc = parse_xml("#{OOXMLDocumentObject.path_to_folder}word/styles.xml")
  doc.search('//w:style').each do |style|
    next if style.attribute('default').nil?

    if (style.attribute('default').value == '1' ||
        style.attribute('default').value == 'on' ||
        style.attribute('default').value == 'true') &&
       style.attribute('type').value == 'paragraph'
      style.xpath('w:pPr').each do |paragraph_pr_tag|
        DocumentStructure.default_paragraph_style = DocxParagraph.new.parse_paragraph_style(paragraph_pr_tag, DocumentStructure.default_run_style)
      end
      style.xpath('w:rPr').each do |character_pr_tag|
        DocumentStructure.default_run_style.parse_properties(character_pr_tag)
      end
    elsif (style.attribute('default').value == '1' ||
           style.attribute('default').value == 'on' ||
           style.attribute('default').value == 'true') &&
          style.attribute('type').value == 'character'
      style.xpath('w:rPr').each do |character_pr_tag|
        DocumentStructure.default_run_style.parse_properties(character_pr_tag)
      end
    end
  end
  DocumentStructure.default_table_paragraph_style = DocumentStructure.default_paragraph_style.dup
  DocumentStructure.default_table_paragraph_style.spacing = Spacing.new(0, 0, 1, :auto)
  DocumentStructure.default_table_run_style = DocumentStructure.default_run_style.dup
  doc.search('//w:style').each do |style|
    next if style.attribute('default').nil?
    next unless (style.attribute('default').value == '1' ||
                 style.attribute('default').value == 'on' ||
                 style.attribute('default').value == 'true') &&
                style.attribute('type').value == 'table'

    style.xpath('w:rPr').each do |table_character_pr_tag|
      DocumentStructure.default_table_run_style.parse_properties(table_character_pr_tag)
    end
  end
end
parse_styles() click to toggle source

Perform parsing styles.xml

# File lib/ooxml_parser/docx_parser/docx_data/document_structure.rb, line 260
def parse_styles
  file = "#{OOXMLDocumentObject.path_to_folder}/word/styles.xml"
  DocumentStructure.default_paragraph_style = DocxParagraph.new(parent: self)
  DocumentStructure.default_table_paragraph_style = DocxParagraph.new(parent: self)
  DocumentStructure.default_run_style = DocxParagraphRun.new(parent: self)
  DocumentStructure.default_table_run_style = DocxParagraphRun.new(parent: self)

  return unless File.exist?(file)

  doc = parse_xml(file)
  # TODO: Remove this old way parsing in favor of doc_structure.styles.document_defaults
  doc.search('//w:docDefaults').each do |doc_defaults|
    doc_defaults.xpath('w:pPrDefault').each do |p_pr_defaults|
      DocumentStructure.default_paragraph_style = DocxParagraph.new(parent: self).parse(p_pr_defaults, 0)
    end
    doc_defaults.xpath('w:rPrDefault').each do |r_pr_defaults|
      r_pr_defaults.xpath('w:rPr').each do |r_pr|
        DocumentStructure.default_run_style = DocxParagraphRun.new(parent: self).parse_properties(r_pr)
      end
    end
  end
  parse_default_style
  @numbering = Numbering.new(parent: self).parse
  @styles = Styles.new(parent: self).parse
end
recognize_numbering(location: :canvas, type: :simple, paragraph_number: 0) click to toggle source

Detect numbering type @param location [Symbol] location of object @param type [Symbol] type of object @param paragraph_number [Integer] number of object @return [Array<String,String>] type of numbering

# File lib/ooxml_parser/docx_parser/docx_data/document_structure.rb, line 131
def recognize_numbering(location: :canvas, type: :simple, paragraph_number: 0)
  elements = element_by_description(location: location, type: type)
  lvl_text = elements[paragraph_number].numbering.abstruct_numbering.level_list[0].text.value
  num_format = elements[paragraph_number].numbering.abstruct_numbering.level_list[0].numbering_format.value
  [num_format, lvl_text]
end