class Docx::Document

The Document class wraps around a docx file and provides methods to interface with it.

# get a Docx::Document for a docx file in the local directory
doc = Docx::Document.open("test.docx")

# get the text from the document
puts doc.text

# do the same thing in a block
Docx::Document.open("test.docx") do |d|
  puts d.text
end

Attributes

doc[R]
styles[R]
xml[R]
zip[R]

Public Class Methods

new(path) { |self| ... } click to toggle source
# File lib/docx/document.rb, line 23
def initialize(path, &block)
  @replace = {}
  @zip = Zip::File.open(path)
  @document_xml = @zip.read('word/document.xml')
  @doc = Nokogiri::XML(@document_xml)
  @styles_xml = @zip.read('word/styles.xml')
  @styles = Nokogiri::XML(@styles_xml)
  if block_given?
    yield self
    @zip.close
  end
end
open(filepath) → file click to toggle source
open(filepath) {|file| block } → obj

With no associated block, Docx::Document.open is a synonym for Docx::Document.new. If the optional code block is given, it will be passed the opened docx file as an argument and the Docx::Document oject will automatically be closed when the block terminates. The values of the block will be returned from Docx::Document.open.

# File lib/docx/document.rb, line 49
def self.open(path, &block)
  self.new(path, &block)
end

Public Instance Methods

bookmarks() click to toggle source
# File lib/docx/document.rb, line 57
def bookmarks
  bkmrks_hsh = Hash.new
  bkmrks_ary = @doc.xpath('//w:bookmarkStart').map { |b_node| parse_bookmark_from b_node }
  # auto-generated by office 2010
  bkmrks_ary.reject! {|b| b.name == "_GoBack" }
  bkmrks_ary.each {|b| bkmrks_hsh[b.name] = b }
  bkmrks_hsh
end
document_properties() click to toggle source

This stores the current global document properties, for now

# File lib/docx/document.rb, line 38
def document_properties
  {
    font_size: font_size
  }
end
each_paragraph → Enumerator click to toggle source

Deprecated

Iterates over paragraphs within document

# File lib/docx/document.rb, line 83
def each_paragraph
  paragraphs.each { |p| yield(p) }
end
font_size() click to toggle source

Some documents have this set, others don't. Values are returned as half-points, so to get points, that's why it's divided by 2.

# File lib/docx/document.rb, line 72
def font_size
  size_tag = @styles.xpath('//w:docDefaults//w:rPrDefault//w:rPr//w:sz').first
  size_tag ? size_tag.attributes['val'].value.to_i / 2 : nil
end
paragraphs() click to toggle source
# File lib/docx/document.rb, line 53
def paragraphs
  @doc.xpath('//w:document//w:body//w:p').map { |p_node| parse_paragraph_from p_node }
end
replace_entry(entry_path, file_contents) click to toggle source
# File lib/docx/document.rb, line 119
def replace_entry(entry_path, file_contents)
  @replace[entry_path] = file_contents
end
save(filepath) → void click to toggle source

Save document to provided path

# File lib/docx/document.rb, line 101
def save(path)
  update
  Zip::OutputStream.open(path) do |out|
    zip.each do |entry|
      out.put_next_entry(entry.name)

      if @replace[entry.name]
        out.write(@replace[entry.name])
      else
        out.write(zip.read(entry.name))
      end
    end
  end
  zip.close
end
tables() click to toggle source
# File lib/docx/document.rb, line 66
def tables
  @doc.xpath('//w:document//w:body//w:tbl').map { |t_node| parse_table_from t_node }
end
Alias for: to_s
to_html() click to toggle source

Output entire document as a String HTML fragment

# File lib/docx/document.rb, line 94
def to_html
  paragraphs.map(&:to_html).join('\n')
end
to_s → string click to toggle source
# File lib/docx/document.rb, line 89
def to_s
  paragraphs.map(&:to_s).join("\n")
end
Also aliased as: text

Private Instance Methods

parse_bookmark_from(b_node) click to toggle source

generate Elements::Bookmark from bookmark XML node

# File lib/docx/document.rb, line 140
def parse_bookmark_from(b_node)
  Elements::Bookmark.new(b_node)
end
parse_paragraph_from(p_node) click to toggle source

generate Elements::Containers::Paragraph from paragraph XML node

# File lib/docx/document.rb, line 135
def parse_paragraph_from(p_node)
  Elements::Containers::Paragraph.new(p_node, document_properties)
end
parse_table_from(t_node) click to toggle source
# File lib/docx/document.rb, line 144
def parse_table_from(t_node)
  Elements::Containers::Table.new(t_node)
end
update() click to toggle source
# File lib/docx/document.rb, line 130
def update
  replace_entry "word/document.xml", doc.serialize(:save_with => 0)
end