class Docx::Document

The Document class wraps around a docx file and provides methods to interface with it.

# get a Docx::Document for a docx file in the local directory
doc = Docx::Document.open("test.docx")

# get the text from the document
puts doc.text

# do the same thing in a block
Docx::Document.open("test.docx") do |d|
  puts d.text
end

Attributes

doc[R]
styles[R]
xml[R]
zip[R]

Public Class Methods

new(path_or_io, options = {}) { |self| ... } click to toggle source
# File lib/docx/document.rb, line 23
def initialize(path_or_io, options = {})
  @replace = {}

  # if path-or_io is string && does not contain a null byte
  if (path_or_io.instance_of?(String) && !/\u0000/.match?(path_or_io))
    @zip = Zip::File.open(path_or_io)
  else
    @zip = Zip::File.open_buffer(path_or_io)
  end

  document = @zip.glob('word/document*.xml').first
  raise Errno::ENOENT if document.nil?

  @document_xml = document.get_input_stream.read
  @doc = Nokogiri::XML(@document_xml)
  load_styles
  yield(self) if block_given?
ensure
  @zip.close
end
open(filepath) → file click to toggle source
open(filepath) {|file| block } → obj

With no associated block, Docx::Document.open is a synonym for Docx::Document.new. If the optional code block is given, it will be passed the opened docx file as an argument and the Docx::Document oject will automatically be closed when the block terminates. The values of the block will be returned from Docx::Document.open.

# File lib/docx/document.rb, line 56
def self.open(path, &block)
  new(path, &block)
end

Public Instance Methods

bookmarks() click to toggle source
# File lib/docx/document.rb, line 64
def bookmarks
  bkmrks_hsh = {}
  bkmrks_ary = @doc.xpath('//w:bookmarkStart').map { |b_node| parse_bookmark_from b_node }
  # auto-generated by office 2010
  bkmrks_ary.reject! { |b| b.name == '_GoBack' }
  bkmrks_ary.each { |b| bkmrks_hsh[b.name] = b }
  bkmrks_hsh
end
document_properties() click to toggle source

This stores the current global document properties, for now

# File lib/docx/document.rb, line 45
def document_properties
  {
    font_size: font_size,
    hyperlinks: hyperlinks
  }
end
each_paragraph → Enumerator click to toggle source

Deprecated

Iterates over paragraphs within document

# File lib/docx/document.rb, line 103
def each_paragraph
  paragraphs.each { |p| yield(p) }
end
font_size() click to toggle source

Some documents have this set, others don't. Values are returned as half-points, so to get points, that's why it's divided by 2.

# File lib/docx/document.rb, line 79
def font_size
  return nil unless @styles

  size_tag = @styles.xpath('//w:docDefaults//w:rPrDefault//w:rPr//w:sz').first
  size_tag ? size_tag.attributes['val'].value.to_i / 2 : nil
end
paragraphs() click to toggle source
# File lib/docx/document.rb, line 60
def paragraphs
  @doc.xpath('//w:document//w:body/w:p').map { |p_node| parse_paragraph_from p_node }
end
replace_entry(entry_path, file_contents) click to toggle source
# File lib/docx/document.rb, line 162
def replace_entry(entry_path, file_contents)
  @replace[entry_path] = file_contents
end
save(filepath) → void click to toggle source

Save document to provided path

# File lib/docx/document.rb, line 121
def save(path)
  update
  Zip::OutputStream.open(path) do |out|
    zip.each do |entry|
      next unless entry.file?

      out.put_next_entry(entry.name)

      if @replace[entry.name]
        out.write(@replace[entry.name])
      else
        out.write(zip.read(entry.name))
      end
    end
  end
  zip.close
end
stream() click to toggle source

Output entire document as a StringIO object

# File lib/docx/document.rb, line 140
def stream
  update
  stream = Zip::OutputStream.write_buffer do |out|
    zip.each do |entry|
      next unless entry.file?

      out.put_next_entry(entry.name)

      if @replace[entry.name]
        out.write(@replace[entry.name])
      else
        out.write(zip.read(entry.name))
      end
    end
  end

  stream.rewind
  stream
end
tables() click to toggle source
# File lib/docx/document.rb, line 73
def tables
  @doc.xpath('//w:document//w:body//w:tbl').map { |t_node| parse_table_from t_node }
end
Alias for: to_s
to_html() click to toggle source

Output entire document as a String HTML fragment

# File lib/docx/document.rb, line 114
def to_html
  paragraphs.map(&:to_html).join("\n")
end
to_s → string click to toggle source
# File lib/docx/document.rb, line 109
def to_s
  paragraphs.map(&:to_s).join("\n")
end
Also aliased as: text

Private Instance Methods

load_rels() click to toggle source
# File lib/docx/document.rb, line 177
def load_rels
  rels_entry = @zip.glob('word/_rels/document*.xml.rels').first
  raise Errno::ENOENT unless rels_entry

  @rels_xml = rels_entry.get_input_stream.read
  @rels = Nokogiri::XML(@rels_xml)
end
load_styles() click to toggle source
# File lib/docx/document.rb, line 168
def load_styles
  @styles_xml = @zip.read('word/styles.xml')
  @styles = Nokogiri::XML(@styles_xml)
  load_rels
rescue Errno::ENOENT => e
  warn e.message
  nil
end
parse_bookmark_from(b_node) click to toggle source

generate Elements::Bookmark from bookmark XML node

# File lib/docx/document.rb, line 200
def parse_bookmark_from(b_node)
  Elements::Bookmark.new(b_node)
end
parse_paragraph_from(p_node) click to toggle source

generate Elements::Containers::Paragraph from paragraph XML node

# File lib/docx/document.rb, line 195
def parse_paragraph_from(p_node)
  Elements::Containers::Paragraph.new(p_node, document_properties)
end
parse_table_from(t_node) click to toggle source
# File lib/docx/document.rb, line 204
def parse_table_from(t_node)
  Elements::Containers::Table.new(t_node)
end
update() click to toggle source
# File lib/docx/document.rb, line 190
def update
  replace_entry 'word/document.xml', doc.serialize(save_with: 0)
end