class ReVIEW::Epub2Html
Public Class Methods
execute(*args)
click to toggle source
# File lib/review/epub2html.rb, line 22 def self.execute(*args) new.execute(*args) end
new()
click to toggle source
# File lib/review/epub2html.rb, line 54 def initialize @opfxml = nil @htmls = {} @head = nil @tail = nil @inline_footnote = nil end
Public Instance Methods
execute(*args)
click to toggle source
# File lib/review/epub2html.rb, line 26 def execute(*args) opts = OptionParser.new opts.banner = <<EOT Usage: review-epub2html [options] EPUBfile [file_for_head_and_foot] > HTMLfile file_for_head_and_foot: HTML file to extract header and footer area. This file must be contained in the EPUB. If omitted, the first found file is used. EOT opts.version = ReVIEW::VERSION opts.on('--help', 'Prints this message and quit.') do puts opts.help exit 0 end opts.on('--inline-footnote', 'Embed footnote blocks in paragraph.') { @inline_footnote = true } opts.parse!(args) if args[0].nil? || !File.exist?(args[0]) puts opts.help exit 1 end parse_epub(args[0]) puts join_html(args[1]) end
join_html(reffile)
click to toggle source
# File lib/review/epub2html.rb, line 145 def join_html(reffile) body = [] make_list.each do |fname| if @head.nil? && (reffile.nil? || reffile == fname) take_headtail(@htmls[fname]) end body << modify_html(fname, @htmls[fname]) end "#{@head}\n#{body.join("\n")}\n#{@tail}" end
make_list()
click to toggle source
# File lib/review/epub2html.rb, line 157 def make_list items = {} @opfxml.each_element("/package/manifest/item[@media-type='application/xhtml+xml']") do |e| items[e.attributes['id']] = e.attributes['href'] end files = [] @opfxml.each_element('/package/spine/itemref') do |e| files.push(items[e.attributes['idref']]) end files end
modify_html(fname, html)
click to toggle source
# File lib/review/epub2html.rb, line 88 def modify_html(fname, html) doc = REXML::Document.new(html) doc.context[:attribute_quote] = :quote ids = {} doc.each_element('//*[@id]') do |e| sid = "#{sanitize(fname)}_#{sanitize(e.attributes['id'])}" while ids[sid] sid += 'E' end ids[sid] = true e.attributes['id'] = sid end doc.each_element('//a[@href]') do |e| href = e.attributes['href'] if href.start_with?('http:', 'https:', 'ftp:', 'ftps:', 'mailto:') next end file, anc = href.split('#', 2) if anc if file.empty? anc = "#{sanitize(fname)}_#{sanitize(anc)}" else anc = "#{sanitize(file)}_#{sanitize(anc)}" end else anc = sanitize(file) end e.attributes['href'] = "##{anc}" end if @inline_footnote # move footnotes to inline as same as LaTeX. footnotes = {} doc.each_element("//div[@class='footnote']") do |e| e.name = 'span' e.attributes.delete('epub:type') footnotes[e.attributes['id']] = e e.remove end doc.each_element("//a[@class='noteref']") do |e| e.parent.insert_after(e, footnotes[e.attributes['href'].sub('#', '')]) e.remove end end doc.to_s. sub(/.*(<body.*?>)/m, %Q(<section id="#{sanitize(fname)}">)). sub(%r{(</body>).*}m, '</section>') end
parse_epub(epubname)
click to toggle source
# File lib/review/epub2html.rb, line 62 def parse_epub(epubname) Zip::File.open(epubname) do |zio| zio.each do |entry| if entry.name =~ /.+\.opf\Z/ opf = entry.get_input_stream.read @opfxml = REXML::Document.new(opf) elsif entry.name =~ /.+\.x?html\Z/ @htmls[entry.name.sub('OEBPS/', '')] = entry.get_input_stream.read.force_encoding('utf-8') end end end nil end
sanitize(s)
click to toggle source
# File lib/review/epub2html.rb, line 81 def sanitize(s) s = s.sub(/\.x?html\Z/, ''). sub(%r{\A\./}, '') 's_' + CGI.escape(s). gsub(/[.,+%]/, '_') end
take_headtail(html)
click to toggle source
# File lib/review/epub2html.rb, line 76 def take_headtail(html) @head = html.sub(/(<body.*?>).*/m, '\1') @tail = html.sub(%r{.*(</body>)}m, '\1') end