class Docx::Document
The Document
class wraps around a docx file and provides methods to interface with it.
# get a Docx::Document for a docx file in the local directory doc = Docx::Document.open("test.docx") # get the text from the document puts doc.text # do the same thing in a block Docx::Document.open("test.docx") do |d| puts d.text end
Attributes
Public Class Methods
# File lib/docx/document.rb, line 23 def initialize(path, &block) @replace = {} @zip = Zip::File.open(path) @document_xml = @zip.read('word/document.xml') @doc = Nokogiri::XML(@document_xml) @styles_xml = @zip.read('word/styles.xml') @styles = Nokogiri::XML(@styles_xml) if block_given? yield self @zip.close end end
With no associated block, Docx::Document.open
is a synonym for Docx::Document.new
. If the optional code block is given, it will be passed the opened docx
file as an argument and the Docx::Document
oject will automatically be closed when the block terminates. The values of the block will be returned from Docx::Document.open
.
# File lib/docx/document.rb, line 49 def self.open(path, &block) self.new(path, &block) end
Public Instance Methods
# File lib/docx/document.rb, line 57 def bookmarks bkmrks_hsh = Hash.new bkmrks_ary = @doc.xpath('//w:bookmarkStart').map { |b_node| parse_bookmark_from b_node } # auto-generated by office 2010 bkmrks_ary.reject! {|b| b.name == "_GoBack" } bkmrks_ary.each {|b| bkmrks_hsh[b.name] = b } bkmrks_hsh end
This stores the current global document properties, for now
# File lib/docx/document.rb, line 38 def document_properties { font_size: font_size } end
Deprecated
Iterates over paragraphs within document
# File lib/docx/document.rb, line 83 def each_paragraph paragraphs.each { |p| yield(p) } end
Some documents have this set, others don't. Values are returned as half-points, so to get points, that's why it's divided by 2.
# File lib/docx/document.rb, line 72 def font_size size_tag = @styles.xpath('//w:docDefaults//w:rPrDefault//w:rPr//w:sz').first size_tag ? size_tag.attributes['val'].value.to_i / 2 : nil end
# File lib/docx/document.rb, line 53 def paragraphs @doc.xpath('//w:document//w:body//w:p').map { |p_node| parse_paragraph_from p_node } end
# File lib/docx/document.rb, line 119 def replace_entry(entry_path, file_contents) @replace[entry_path] = file_contents end
Save document to provided path
# File lib/docx/document.rb, line 101 def save(path) update Zip::OutputStream.open(path) do |out| zip.each do |entry| out.put_next_entry(entry.name) if @replace[entry.name] out.write(@replace[entry.name]) else out.write(zip.read(entry.name)) end end end zip.close end
# File lib/docx/document.rb, line 66 def tables @doc.xpath('//w:document//w:body//w:tbl').map { |t_node| parse_table_from t_node } end
Output entire document as a String HTML fragment
# File lib/docx/document.rb, line 94 def to_html paragraphs.map(&:to_html).join('\n') end
# File lib/docx/document.rb, line 89 def to_s paragraphs.map(&:to_s).join("\n") end
Private Instance Methods
generate Elements::Bookmark
from bookmark XML node
# File lib/docx/document.rb, line 140 def parse_bookmark_from(b_node) Elements::Bookmark.new(b_node) end
generate Elements::Containers::Paragraph
from paragraph XML node
# File lib/docx/document.rb, line 135 def parse_paragraph_from(p_node) Elements::Containers::Paragraph.new(p_node, document_properties) end
# File lib/docx/document.rb, line 144 def parse_table_from(t_node) Elements::Containers::Table.new(t_node) end
# File lib/docx/document.rb, line 130 def update replace_entry "word/document.xml", doc.serialize(:save_with => 0) end