module AnyStyle::PDFUtils

Public Instance Methods

pdf_info(path, pdfinfo: 'pdfinfo', **opts) click to toggle source
   # File lib/anystyle/utils.rb
72 def pdf_info(path, pdfinfo: 'pdfinfo', **opts)
73   raise "pdfinfo is tainted" if pdfinfo.tainted?
74   Hash[%x{#{pdfinfo} "#{path}"}.split("\n").map { |ln|
75     ln.split(/:\s+/, 2)
76   }]
77 end
pdf_page_size(path) click to toggle source
   # File lib/anystyle/utils.rb
84 def pdf_page_size(path)
85   pdf_info(path)['Page size'].scan(/\d+/)[0, 2].map(&:to_i)
86 end
pdf_to_text(path, pdftotext: 'pdftotext', **opts) click to toggle source
   # File lib/anystyle/utils.rb
65 def pdf_to_text(path, pdftotext: 'pdftotext', **opts)
66   raise "pdftotext is tainted" if pdftotext.tainted?
67   text = %x{#{pdftotext} #{pdf_opts(path, **opts).join(' ')} "#{path}" -}
68   raise "pdftotext failed with error code #{$?.exitstatus}" unless $?.success?
69   text.force_encoding(opts[:encoding] || 'UTF-8')
70 end

Private Instance Methods

pdf_crop(path, args) click to toggle source
    # File lib/anystyle/utils.rb
100 def pdf_crop(path, args)
101   (x, y, w, h) = case args.length
102     when 1
103       [args[0], args[0], -args[0], -args[0]]
104     when 2
105       [args[0], args[1], -args[0], -args[1]]
106     when 4
107       args
108     else
109       raise "invalid crop option: #{args}"
110     end
111 
112   if w < 0 || h < 0
113     (width, height) = pdf_page_size(path)
114     w = width - x + w if w < 0
115     h = height - y + h if h < 0
116   end
117 
118   "-x #{x} -y #{y} -W #{w} -H #{h}"
119 end
pdf_opts(path, layout: true, encoding: 'UTF-8', **opts) click to toggle source
   # File lib/anystyle/utils.rb
90 def pdf_opts(path, layout: true, encoding: 'UTF-8', **opts)
91   [
92     layout ? '-layout' : '',
93     opts[:crop] ? pdf_crop(path, opts[:crop]) : '',
94     '-eol unix',
95     "-enc #{encoding[/[\w-]+/]}",
96     '-q'
97   ]
98 end