class OoxmlParser::DocumentStructure
Basic class for DocumentStructure
Attributes
@return [DocumentBackground] background of document
@return [Comments] comment of document
@return [CommentsDocument] comments of whole document
@return [CommentsExtended] extended comments
@return [DocumentProperties] properties of document
@return [Array<OOXMLDocumentObject>] list of elements
@return [Note] notes of document
@return [Numbering] store numbering data
@return [PageProperties] properties of document
@return [Relationships] relationships
@return [DocumentSettings] settings
@return [Styles] styles of document
@return [PresentationTheme] theme of docx
@return [PresentationTheme] theme of docx
Public Class Methods
OoxmlParser::CommonDocumentStructure::new
# File lib/ooxml_parser/docx_parser/docx_data/document_structure.rb, line 48 def initialize @elements = [] @notes = [] @document_properties = DocumentProperties.new @comments = [] super end
Parse docx file @return [DocumentStructure] parsed structure
# File lib/ooxml_parser/docx_parser/docx_data/document_structure.rb, line 160 def self.parse doc_structure = DocumentStructure.new doc_structure.content_types = ContentTypes.new(parent: doc_structure).parse OOXMLDocumentObject.root_subfolder = 'word/' OOXMLDocumentObject.xmls_stack = [] @comments = [] DocumentStructure.default_paragraph_style = DocxParagraph.new DocumentStructure.default_run_style = DocxParagraphRun.new(parent: doc_structure) doc_structure.theme = PresentationTheme.parse('word/theme/theme1.xml') doc_structure.relationships = Relationships.new(parent: self).parse_file("#{OOXMLDocumentObject.path_to_folder}word/_rels/document.xml.rels") doc_structure.parse_styles number = 0 OOXMLDocumentObject.add_to_xmls_stack('word/document.xml') doc = doc_structure.parse_xml(OOXMLDocumentObject.current_xml) doc.search('//w:document').each do |document| document.xpath('w:background').each do |background| doc_structure.background = DocumentBackground.new(parent: doc_structure).parse(background) end document.xpath('w:body').each do |body| body.xpath('*').each do |element| case element.name when 'p' child = element.child unless child.nil? && doc_structure.elements.last.instance_of?(Table) paragraph_style = DocumentStructure.default_paragraph_style.dup.parse(element, number, DocumentStructure.default_run_style, parent: doc_structure) number += 1 doc_structure.elements << paragraph_style.dup end when 'tbl' table = Table.new(parent: doc_structure).parse(element, number, TableProperties.new) number += 1 doc_structure.elements << table when 'sdt' doc_structure.elements << StructuredDocumentTag.new(parent: doc_structure).parse(element) end end body.xpath('w:sectPr').each do |sect_pr| doc_structure.page_properties = PageProperties.new(parent: doc_structure).parse(sect_pr, DocumentStructure.default_paragraph_style, DocumentStructure.default_run_style) doc_structure.notes = doc_structure.page_properties.notes # keep copy of notes to compatibility with previous docx models end end end OOXMLDocumentObject.xmls_stack.pop doc_structure.document_properties = DocumentProperties.new(parent: doc_structure).parse doc_structure.comments = Comments.new(parent: doc_structure).parse doc_structure.comments_extended = CommentsExtended.new(parent: doc_structure).parse doc_structure.comments_document = Comments.new(parent: doc_structure, file: "#{OOXMLDocumentObject.path_to_folder}word/#{doc_structure.relationships.target_by_type('commentsDocument').first}") .parse doc_structure.settings = DocumentSettings.new(parent: doc_structure).parse doc_structure end
Public Instance Methods
Compare this object to other @param other [Object] any other object @return [True, False] result of comparision
# File lib/ooxml_parser/docx_parser/docx_data/document_structure.rb, line 61 def ==(other) @elements == other.elements && @page_properties == other.page_properties && @notes == other.notes && @background == other.background && @document_properties == other.document_properties && @comments == other.comments end
@return [Array<DocumentStyle>] style of documents
# File lib/ooxml_parser/docx_parser/docx_data/document_structure.rb, line 154 def document_styles styles.styles end
Get element by it's type @param location [Symbol] location of object @param type [Symbol] type of object @return [OOXMLDocumentObject]
# File lib/ooxml_parser/docx_parser/docx_data/document_structure.rb, line 74 def element_by_description(location: :canvas, type: :docx_paragraph) case location when :canvas case type when :table elements[1].rows[0].cells[0].elements when :docx_paragraph, :simple, :paragraph elements when :shape elements[0].nonempty_runs.first.alternate_content.office2007_content.data.text_box else raise 'Wrong location(Need One of ":table", ":paragraph", ":shape")' end when :footer case type when :table note_by_description(:footer1).elements[0].rows[0].cells[0].elements when :docx_paragraph, :simple, :paragraph note_by_description(:footer1).elements when :shape note_by_description(:footer1).elements[0].nonempty_runs.first.alternate_content.office2007_content.data.text_box else raise 'Wrong location(Need One of ":table", ":simple", ":shape")' end when :header case type when :table note_by_description(:header1).elements[0].rows[0].cells[0].elements when :docx_paragraph, :simple, :paragraph note_by_description(:header1).elements when :shape note_by_description(:header1).elements[0].nonempty_runs.first.alternate_content.office2007_content.data.text_box else raise 'Wrong location(Need One of ":table", ":simple", ":shape")' end when :comment comments[0].paragraphs else raise 'Wrong global location(Need One of ":canvas", ":footer", ":header", ":comment")' end end
Get note by it's description @param type [Symbol] note type @return [Note]
# File lib/ooxml_parser/docx_parser/docx_data/document_structure.rb, line 119 def note_by_description(type) notes.each do |note| return note if note.type.to_sym == type end raise 'There isn\'t this type of the note' end
Return outline type @param location [Symbol] location of object @param type [Symbol] type of object @param levels_count [Integer] count of levels to detect @return [Array<String,String>] type of outline
# File lib/ooxml_parser/docx_parser/docx_data/document_structure.rb, line 143 def outline(location: :canvas, type: :simple, levels_count: 1) elements = element_by_description(location: location, type: type) set = [] levels_count.times do |col| set[0] = elements[col].numbering.abstruct_numbering.level_list[col].numbering_format.value set[1] = elements[col].numbering.abstruct_numbering.level_list[col].text.value end set end
Parse default style @return [void]
# File lib/ooxml_parser/docx_parser/docx_data/document_structure.rb, line 219 def parse_default_style doc = parse_xml("#{OOXMLDocumentObject.path_to_folder}word/styles.xml") doc.search('//w:style').each do |style| next if style.attribute('default').nil? if (style.attribute('default').value == '1' || style.attribute('default').value == 'on' || style.attribute('default').value == 'true') && style.attribute('type').value == 'paragraph' style.xpath('w:pPr').each do |paragraph_pr_tag| DocumentStructure.default_paragraph_style = DocxParagraph.new.parse_paragraph_style(paragraph_pr_tag, DocumentStructure.default_run_style) end style.xpath('w:rPr').each do |character_pr_tag| DocumentStructure.default_run_style.parse_properties(character_pr_tag) end elsif (style.attribute('default').value == '1' || style.attribute('default').value == 'on' || style.attribute('default').value == 'true') && style.attribute('type').value == 'character' style.xpath('w:rPr').each do |character_pr_tag| DocumentStructure.default_run_style.parse_properties(character_pr_tag) end end end DocumentStructure.default_table_paragraph_style = DocumentStructure.default_paragraph_style.dup DocumentStructure.default_table_paragraph_style.spacing = Spacing.new(0, 0, 1, :auto) DocumentStructure.default_table_run_style = DocumentStructure.default_run_style.dup doc.search('//w:style').each do |style| next if style.attribute('default').nil? next unless (style.attribute('default').value == '1' || style.attribute('default').value == 'on' || style.attribute('default').value == 'true') && style.attribute('type').value == 'table' style.xpath('w:rPr').each do |table_character_pr_tag| DocumentStructure.default_table_run_style.parse_properties(table_character_pr_tag) end end end
Perform parsing styles.xml
# File lib/ooxml_parser/docx_parser/docx_data/document_structure.rb, line 260 def parse_styles file = "#{OOXMLDocumentObject.path_to_folder}/word/styles.xml" DocumentStructure.default_paragraph_style = DocxParagraph.new(parent: self) DocumentStructure.default_table_paragraph_style = DocxParagraph.new(parent: self) DocumentStructure.default_run_style = DocxParagraphRun.new(parent: self) DocumentStructure.default_table_run_style = DocxParagraphRun.new(parent: self) return unless File.exist?(file) doc = parse_xml(file) # TODO: Remove this old way parsing in favor of doc_structure.styles.document_defaults doc.search('//w:docDefaults').each do |doc_defaults| doc_defaults.xpath('w:pPrDefault').each do |p_pr_defaults| DocumentStructure.default_paragraph_style = DocxParagraph.new(parent: self).parse(p_pr_defaults, 0) end doc_defaults.xpath('w:rPrDefault').each do |r_pr_defaults| r_pr_defaults.xpath('w:rPr').each do |r_pr| DocumentStructure.default_run_style = DocxParagraphRun.new(parent: self).parse_properties(r_pr) end end end parse_default_style @numbering = Numbering.new(parent: self).parse @styles = Styles.new(parent: self).parse end
Detect numbering type @param location [Symbol] location of object @param type [Symbol] type of object @param paragraph_number [Integer] number of object @return [Array<String,String>] type of numbering
# File lib/ooxml_parser/docx_parser/docx_data/document_structure.rb, line 131 def recognize_numbering(location: :canvas, type: :simple, paragraph_number: 0) elements = element_by_description(location: location, type: type) lvl_text = elements[paragraph_number].numbering.abstruct_numbering.level_list[0].text.value num_format = elements[paragraph_number].numbering.abstruct_numbering.level_list[0].numbering_format.value [num_format, lvl_text] end