class Pdfmdrename

Class: pdfmdrename

Class for renaming the file according to the metadata

Attributes

allkeywords[RW]
copy[RW]
dryrun[RW]
filename[RW]
nrkeywords[RW]
outputdir[RW]

Public Class Methods

new(filename) click to toggle source
Calls superclass method Pdfmd::new
# File lib/pdfmd/pdfmdrename.rb, line 23
def initialize(filename)
  super(filename)
  @nrkeywords     ||= 3

  # Find the valid keymapping
  # Use @@keymapping as default and only overwrite when provided by hiera.
  hierakeymapping = self.determineValidSetting(nil, 'rename:keys')
  hierakeymapping ?  @@keymapping = hierakeymapping : ''

  # FIXME: this default doctype assignment might need to be rewritten as the keymapping above.
  @defaultDoctype = self.determineValidSetting('doc', 'rename:defaultdoctype')
  @fileextension  = 'pdf'
end

Public Instance Methods

get_author() click to toggle source

Get the author from the metatags and normalize the string

# File lib/pdfmd/pdfmdrename.rb, line 272
def get_author()
  author = @@metadata['author'].gsub(/\./,'_').gsub(/\&/,'').gsub(/\-/,'_').gsub(/\s|\//,'_').gsub(/\,/,'_').gsub(/\_\_/,'_')
  I18n.enforce_available_locales = false
  I18n.transliterate(author).downcase # Normalising
end
get_doctype() click to toggle source

Get the doctype from the title

# File lib/pdfmd/pdfmdrename.rb, line 259
def get_doctype()
  doctype = @defaultDoctype
  @@keymapping.each do |key,value|
    value.kind_of?(String) ? value = value.split : ''
    value.each do |keyword|
      @@metadata['title'].match(/#{keyword}/i) ? doctype = key : ''
    end
  end
  doctype.downcase
end
get_filename(filedata = {}) click to toggle source

Return the filename from the available filedata

# File lib/pdfmd/pdfmdrename.rb, line 107
def get_filename(filedata = {})

  if filedata.size > 0

    # Create the filename out of all with some exceptions
    #
    # If the doctype is the default one, the first keywords are the
    # title and the subject
    if filedata[:doctype] == @defaultDoctype

      # The subject and title is part of the keywords and handled there.
      filedata[:outputdir] + '/' +
        filedata[:date] + '-' +
        filedata[:author] + '-' +
        filedata[:doctype] + '-' +
        filedata[:keywords] + '.' +
        filedata[:extension]
    else
      filedata[:outputdir] + '/' + filedata.except(:extension, :title, :subject, :outputdir).values.join('-') + '.' + filedata[:extension]
    end

  else

    false

  end

end
get_keywords(preface = '') click to toggle source

Get the keywords This methods is trying in a way to intelligently handle the keywords and return them back to. While doing this, the abbreviations are also being taken into account. Wordcombinations on the other hand, that contain some keywords for the abbreviation, should not be changed. That's what makes it a bit tricky.

# File lib/pdfmd/pdfmdrename.rb, line 160
def get_keywords(preface = '')

  if !@@metadata['keywords'].empty?

    keywordsarray = @@metadata['keywords'].split(',')
    # Replace leading spaces and strings from the keymappings
    # if the value is identical it will be placed at the beginning
    # of the array (and therefore be right after the preface in the filename)
    keywordsarraySorted = Array.new
    keywordsarray.each_with_index do |value,index|
      value = value.lstrip.chomp

      @@keymapping.each do |abbreviation,keyvaluearray|
        if keyvaluearray.kind_of?(String)
          keyvaluearray = keyvaluearray.split(',')
        end
        keyvaluearray = keyvaluearray.sort_by{|size| -size.length}
        keyvaluearray.each do |keystring|
          value = value.gsub(/^#{keystring.lstrip.chomp}\s?/i, abbreviation.to_s)
        end
      end

      # Remove special characters from string
      keywordsarray[index] = value.gsub(/\s|\/|\-|\./,'_').gsub(/_+/,'_')

      # If the current value matches some of the replacement abbreviations,
      # put the value at index 0 in the array. It will then be listed earlier in the filename.
      if value.match(/^#{@@keymapping.keys.join('|')} /i)
        keywordsarraySorted.insert(0, keywordsarray[index])
      else
        keywordsarraySorted.push(keywordsarray[index])
      end

    end

    # Insert the preface if it is not empty
    if !preface.to_s.empty?
      keywordsarraySorted.insert(0, preface)
    end

    # Convert the keywordarray to a string an limit the number
    # of keywords according to @nrkeywords or the parameter 'all'
    if @@metadata['keywords'] = !@allkeywords
      keywords = keywordsarraySorted.values_at(*(0..@nrkeywords-1)).join('-')
    else
      keywords = keywordsarraySorted.join('-')
    end

    # Normalize all keywords and return value
    I18n.enforce_available_locales = false
    I18n.transliterate(keywords).downcase.chomp('-')

  else # Keywords metafield is empty :(
    # So we return nothing or the preface (if available)

    !preface.empty? ? preface : ''

  end

end
get_keywordsPreface(filedata = {}) click to toggle source

Get the preface for the keywords If the title is meaningful, then the subject will become the preface ( = first keyword) If the subject matches number/character combination and contains no spaces, the preface will be combined with the doctype. If not: The preface will contain the whole subject with dots and spaces being replaced with underscores.

# File lib/pdfmd/pdfmdrename.rb, line 230
def get_keywordsPreface(filedata = {})

  I18n.enforce_available_locales = false
  if filedata[:doctype].nil? or filedata[:doctype].empty?
    filedata[:doctype] = @defaultDoctype
  end

  if !filedata[:subject].nil? and !filedata[:subject].empty? and
    filedata[:doctype] != @defaultDoctype

    I18n.transliterate(filedata[:subject])

  else

    # Document matches standard document type.
    # title and subject are being returned.

    # Normalize special characters
    title   = filedata[:title].downcase
    subject = !filedata[:subject].empty? ? '_' + filedata[:subject].downcase : ''
    subject = subject.gsub(/\s|\-|\&/, '_')
    I18n.transliterate(title + subject)

  end

end
get_outputdir(outputdir = '') click to toggle source

Validate the output directory

# File lib/pdfmd/pdfmdrename.rb, line 137
def get_outputdir(outputdir = '')

  if !outputdir # outputdir is set to false, assume pwd
    self.log('debug','No outputdir specified. Taking current pwd of file.')
    outputdir = File.dirname(@filename)
  elsif outputdir and !File.exist?(outputdir)
    puts "Error: output directory '#{outputdir}' not found. Abort."
    self.log('error',"Output directory '#{outputdir}' not accessible. Abort.")
    exit 1
  elsif outputdir and File.exist?(outputdir)
    outputdir
  else
    false
  end

end
rename() click to toggle source
# File lib/pdfmd/pdfmdrename.rb, line 37
def rename

  # Build new filename elements
  newFilename             = Hash.new
  newFilename[:date]      = @@metadata['createdate'].gsub(/\ \d{2}\:\d{2}\:\d{2}.*$/,'').gsub(/\:/,'')
  newFilename[:author]    = get_author().gsub(/\'/,'').gsub(/\_$/, '')
  newFilename[:doctype]   = get_doctype()
  newFilename[:title]     = @@metadata['title'].downcase.gsub(/(\s|\-|\.|\&|\%|\,)/, '_').gsub(/\_+/, '_')
  newFilename[:subject]   = @@metadata['subject'].downcase.gsub(/(\s|\-|\.|\&|\%|\,)/, '_').gsub(/\_+/, '_')
  newFilename[:keywords]  = get_keywords(get_keywordsPreface(newFilename))
  newFilename[:extension] = @fileextension
  newFilename[:outputdir] = get_outputdir(@outputdir)

  # Verify that all data is available for renaming and fail otherwise
  if !verifyDocumentData(newFilename)
    abort 'Document metadata not complete. Abort renaming.'
  end

  command = @copy ? 'cp' : 'mv'

  filetarget  = get_filename(newFilename)
  puts filetarget
  if @dryrun # Do nothing on dryrun
    if @filename == filetarget
      self.log('info', "Dryrun: File '#{@filename}' already has the correct name. Doing nothing.")
    else
      self.log('info',"Dryrun: Renaming '#{@filename}' to '#{get_filename(newFilename)}'.")
    end
  elsif @filename == filetarget # Do nothing when name is already correct.
    self.log('info',"File '#{@filename}' already has the correct name. Doing nothing.")
  else
    self.log('info',"Renaming '#{@filename}' to '#{filetarget}'.")
    command     = command + " '#{@filename}' #{filetarget} 2>/dev/null"
    system(command)
    if !$?.exitstatus
      log('error', "Error renaming '#{@filename}' to '#{filetarget}'.")
      abort
    else
      log('info', "Successfully renamed file to '#{filetarget}'.")
    end

  end
end
verifyDocumentData(filedata = {}) click to toggle source

Data verification returns false is any metatadag is missing other than keywords

# File lib/pdfmd/pdfmdrename.rb, line 85
def verifyDocumentData(filedata = {})

  @@default_tags.each do |current_tag|

    # Skip over keywords (optional tag)
    current_tag.match(/keywords/) ? next : ''

    if not @@metadata[current_tag].nil? and not @@metadata[current_tag] == ''

    else

      return false

    end

  end

end