class Pdfmdstat
Attributes
metadata[RW]
Public Class Methods
new(metadata)
click to toggle source
# File lib/pdfmd/pdfmdstat.rb, line 19 def initialize(metadata) @default_tags = ['author', 'title', 'subject', 'createdate', 'keywords'] @statdata = { 'author' => {}, 'createdate' => {}, 'title' => {}, 'subject' => {}, 'keywords' => {}, } @statdata = count_values(metadata,@default_tags) end
Public Instance Methods
analyse_metadata()
click to toggle source
Run statistical overview about the metadata Count all values in the metatags and summ them up
# File lib/pdfmd/pdfmdstat.rb, line 95 def analyse_metadata() outputHash = Hash.new @default_tags.sort.each do |tagname| outputHash[tagname.capitalize] = @statdata[tagname] end sortedOutputHash = Hash.new outputHash.each do |metatag,statdata| sortedstatdata = Hash.new statdata = statdata.sort.each do |title, amount| title = title.empty? ? '*empty*' : title sortedstatdata[title] = amount end sortedOutputHash[metatag] = sortedstatdata end # Load the class variable with the metadata @metadata_hash = sortedOutputHash end
count_values(metadata, keys = '')
click to toggle source
Counting all values provided as hash in metadata Optional keynames can be handed over as an array
# File lib/pdfmd/pdfmdstat.rb, line 48 def count_values(metadata, keys = '') data = Hash.new if keys == '' data = { 'author' => {}, 'title' => {}, 'createdate' => {}, 'subject' => {}, 'keywords' => {}, } elsif keys.is_a?(Array) keys.each do |keyname| data[keyname] = {} end else puts 'invalid keys provided' exit 1 end # Iterate through all metadata and # count how often the metadata shows up in each # category metadata.each do |value| # Iterate through all metadata tags and count datahash = eval value[1] datahash.keys.each do |tagkey| datahash[tagkey].nil? ? next : '' if data[tagkey][datahash[tagkey]].nil? data[tagkey][datahash[tagkey]] = 1 else data[tagkey][datahash[tagkey]] = data[tagkey][datahash[tagkey]] + 1 end end end data end
output_metadata(format = 'yaml')
click to toggle source
Output the metadata in multiple format Default: yaml
else:
json
# File lib/pdfmd/pdfmdstat.rb, line 125 def output_metadata(format = 'yaml') case format when 'json' require 'json' puts @metadata_hash.to_json when 'hash' puts @metadata_hash else puts @metadata_hash.to_yaml.gsub(/---\n/,'') end end