module AnyStyle::PDFUtils
Public Instance Methods
pdf_info(path, pdfinfo: 'pdfinfo', **opts)
click to toggle source
# File lib/anystyle/utils.rb 72 def pdf_info(path, pdfinfo: 'pdfinfo', **opts) 73 raise "pdfinfo is tainted" if pdfinfo.tainted? 74 Hash[%x{#{pdfinfo} "#{path}"}.split("\n").map { |ln| 75 ln.split(/:\s+/, 2) 76 }] 77 end
pdf_page_size(path)
click to toggle source
# File lib/anystyle/utils.rb 84 def pdf_page_size(path) 85 pdf_info(path)['Page size'].scan(/\d+/)[0, 2].map(&:to_i) 86 end
pdf_to_text(path, pdftotext: 'pdftotext', **opts)
click to toggle source
# File lib/anystyle/utils.rb 65 def pdf_to_text(path, pdftotext: 'pdftotext', **opts) 66 raise "pdftotext is tainted" if pdftotext.tainted? 67 text = %x{#{pdftotext} #{pdf_opts(path, **opts).join(' ')} "#{path}" -} 68 raise "pdftotext failed with error code #{$?.exitstatus}" unless $?.success? 69 text.force_encoding(opts[:encoding] || 'UTF-8') 70 end
Private Instance Methods
pdf_crop(path, args)
click to toggle source
# File lib/anystyle/utils.rb 100 def pdf_crop(path, args) 101 (x, y, w, h) = case args.length 102 when 1 103 [args[0], args[0], -args[0], -args[0]] 104 when 2 105 [args[0], args[1], -args[0], -args[1]] 106 when 4 107 args 108 else 109 raise "invalid crop option: #{args}" 110 end 111 112 if w < 0 || h < 0 113 (width, height) = pdf_page_size(path) 114 w = width - x + w if w < 0 115 h = height - y + h if h < 0 116 end 117 118 "-x #{x} -y #{y} -W #{w} -H #{h}" 119 end
pdf_opts(path, layout: true, encoding: 'UTF-8', **opts)
click to toggle source
# File lib/anystyle/utils.rb 90 def pdf_opts(path, layout: true, encoding: 'UTF-8', **opts) 91 [ 92 layout ? '-layout' : '', 93 opts[:crop] ? pdf_crop(path, opts[:crop]) : '', 94 '-eol unix', 95 "-enc #{encoding[/[\w-]+/]}", 96 '-q' 97 ] 98 end