class ReVIEW::Epub2Html

Public Class Methods

execute(*args) click to toggle source
# File lib/review/epub2html.rb, line 17
def self.execute(*args)
  new.execute(*args)
end
new() click to toggle source
# File lib/review/epub2html.rb, line 49
def initialize
  @opfxml = nil
  @htmls = {}
  @head = nil
  @tail = nil
  @inline_footnote = nil
end

Public Instance Methods

execute(*args) click to toggle source
# File lib/review/epub2html.rb, line 21
    def execute(*args)
      opts = OptionParser.new

      opts.banner = <<EOT
Usage: review-epub2html [options] EPUBfile [file_for_head_and_foot] > HTMLfile
       file_for_head_and_foot: HTML file to extract header and footer area.
                               This file must be contained in the EPUB.
                               If omitted, the first found file is used.

EOT
      opts.version = ReVIEW::VERSION
      opts.on('--help', 'Prints this message and quit.') do
        puts opts.help
        exit 0
      end
      opts.on('--inline-footnote', 'Embed footnote blocks in paragraph.') { @inline_footnote = true }

      opts.parse!(args)

      if args[0].nil? || !File.exist?(args[0])
        puts opts.help
        exit 1
      end

      parse_epub(args[0])
      puts join_html(args[1])
    end
join_html(reffile) click to toggle source
# File lib/review/epub2html.rb, line 140
def join_html(reffile)
  body = []
  make_list.each do |fname|
    if @head.nil? && (reffile.nil? || reffile == fname)
      take_headtail(@htmls[fname])
    end

    body << modify_html(fname, @htmls[fname])
  end
  "#{@head}\n#{body.join("\n")}\n#{@tail}"
end
make_list() click to toggle source
# File lib/review/epub2html.rb, line 152
def make_list
  items = {}
  @opfxml.each_element("/package/manifest/item[@media-type='application/xhtml+xml']") do |e|
    items[e.attributes['id']] = e.attributes['href']
  end

  files = []
  @opfxml.each_element('/package/spine/itemref') do |e|
    files.push(items[e.attributes['idref']])
  end

  files
end
modify_html(fname, html) click to toggle source
# File lib/review/epub2html.rb, line 83
def modify_html(fname, html)
  doc = REXML::Document.new(html)
  doc.context[:attribute_quote] = :quote

  ids = {}

  doc.each_element('//*[@id]') do |e|
    sid = "#{sanitize(fname)}_#{sanitize(e.attributes['id'])}"
    while ids[sid]
      sid += 'E'
    end
    ids[sid] = true
    e.attributes['id'] = sid
  end

  doc.each_element('//a[@href]') do |e|
    href = e.attributes['href']
    if href.start_with?('http:', 'https:', 'ftp:', 'ftps:', 'mailto:')
      next
    end

    file, anc = href.split('#', 2)
    if anc
      if file.empty?
        anc = "#{sanitize(fname)}_#{sanitize(anc)}"
      else
        anc = "#{sanitize(file)}_#{sanitize(anc)}"
      end
    else
      anc = sanitize(file)
    end

    e.attributes['href'] = "##{anc}"
  end

  if @inline_footnote
    # move footnotes to inline as same as LaTeX.
    footnotes = {}

    doc.each_element("//div[@class='footnote']") do |e|
      e.name = 'span'
      e.attributes.delete('epub:type')
      footnotes[e.attributes['id']] = e
      e.remove
    end

    doc.each_element("//a[@class='noteref']") do |e|
      e.parent.insert_after(e, footnotes[e.attributes['href'].sub('#', '')])
      e.remove
    end
  end

  doc.to_s.
    sub(/.*(<body.*?>)/m, %Q(<section id="#{sanitize(fname)}">)).
    sub(%r{(</body>).*}m, '</section>')
end
parse_epub(epubname) click to toggle source
# File lib/review/epub2html.rb, line 57
def parse_epub(epubname)
  Zip::File.open(epubname) do |zio|
    zio.each do |entry|
      if entry.name =~ /.+\.opf\Z/
        opf = entry.get_input_stream.read
        @opfxml = REXML::Document.new(opf)
      elsif entry.name =~ /.+\.x?html\Z/
        @htmls[entry.name.sub('OEBPS/', '')] = entry.get_input_stream.read.force_encoding('utf-8')
      end
    end
  end
  nil
end
sanitize(s) click to toggle source
# File lib/review/epub2html.rb, line 76
def sanitize(s)
  s = s.sub(/\.x?html\Z/, '').
      sub(%r{\A\./}, '')
  's_' + CGI.escape(s).
         gsub(/[.,+%]/, '_')
end
take_headtail(html) click to toggle source
# File lib/review/epub2html.rb, line 71
def take_headtail(html)
  @head = html.sub(/(<body.*?>).*/m, '\1')
  @tail = html.sub(%r{.*(</body>)}m, '\1')
end