class PdfExtractor::OutputParser

Parse PDFTk outputs

Public Class Methods

both_not_nil?(a = nil, b = nil) click to toggle source
# File lib/pdf_extractor/output_parser.rb, line 58
def self.both_not_nil?(a = nil, b = nil)
  !a.nil? && !b.nil?
end
dump_data(output = nil) click to toggle source
# File lib/pdf_extractor/output_parser.rb, line 6
def self.dump_data(output = nil)
  fields = {}
  pending_key = pending_value = nil
  output.each do |line|
    key, value = line.split(': ').map(&:strip)
    next if %w[InfoBegin PageMediaBegin].include? key

    if key == 'InfoKey'
      pending_key = value
      if both_not_nil?(pending_key, pending_value)
        fields[pending_key] = pending_value
        pending_key = pending_value = nil
      end
    elsif key == 'InfoValue'
      pending_value = value
      if both_not_nil?(pending_key, pending_value)
        fields[pending_key] = pending_value
        pending_key = pending_value = nil
      end
    else
      fields[key] = value
    end
  end
  fields
end
dump_data_fields(output = nil) click to toggle source
# File lib/pdf_extractor/output_parser.rb, line 32
def self.dump_data_fields(output = nil)
  return if output.nil?

  fields = []
  field = {}
  output.each do |line|
    if line.strip == '---'
      fields << field unless field.empty?
      field = {}
    else
      key, value = line.split(': ')
      field[key] = value.strip
    end
  end
  fields << field unless field.empty?
  fields
end
dump_data_fields_key_value(output = nil) click to toggle source
# File lib/pdf_extractor/output_parser.rb, line 50
def self.dump_data_fields_key_value(output = nil)
  field_array = dump_data_fields(output)
  return if field_array.empty?

  form = field_array.map { |i| { i['FieldName'] => i['FieldValue'] } }.each {  hash }
  Hash[*form.collect(&:to_a).flatten]
end