class GutenbergRdf::Rdf

Attributes

xml[R]

Public Class Methods

new(xml) click to toggle source
# File lib/gutenberg_rdf/rdf.rb, line 9
def initialize(xml)
  @xml = xml.root
end

Public Instance Methods

authors() click to toggle source
# File lib/gutenberg_rdf/rdf.rb, line 29
def authors
  @authors ||= extract_authors
end
covers() click to toggle source
# File lib/gutenberg_rdf/rdf.rb, line 70
def covers
  official_cover_images.concat(other_cover_images).uniq
end
ebooks() click to toggle source
# File lib/gutenberg_rdf/rdf.rb, line 74
def ebooks
  files = Array.new
  xml.elements.each('pgterms:ebook/dcterms:hasFormat') do |format|
    file = format.elements['pgterms:file']
    files << Media.new(file) if file.elements['dcterms:format/rdf:Description/rdf:value'].text.match(/\Atext|\Aapplication/)
  end
  files
end
id() click to toggle source
# File lib/gutenberg_rdf/rdf.rb, line 13
def id
  xml.elements['pgterms:ebook'].attributes['about'].match(/\Aebooks\/(.+)\z/)[1]
end
language() click to toggle source
# File lib/gutenberg_rdf/rdf.rb, line 52
def language
  # xml.elements['pgterms:ebook/dcterms:language/rdf:Description/rdf:value'].text
  languages.first
end
languages() click to toggle source
# File lib/gutenberg_rdf/rdf.rb, line 58
def languages
  langs = []
  xml.elements.each('pgterms:ebook/dcterms:language') do |language|
    langs << language.elements['rdf:Description/rdf:value'].text
  end
  langs
end
published() click to toggle source
# File lib/gutenberg_rdf/rdf.rb, line 44
def published
  xml.elements['pgterms:ebook/dcterms:issued'].text
end
publisher() click to toggle source
# File lib/gutenberg_rdf/rdf.rb, line 48
def publisher
  xml.elements['pgterms:ebook/dcterms:publisher'].text
end
rights() click to toggle source
# File lib/gutenberg_rdf/rdf.rb, line 66
def rights
  xml.elements['pgterms:ebook/dcterms:rights'].text
end
subjects() click to toggle source
# File lib/gutenberg_rdf/rdf.rb, line 33
def subjects
  entries = Array.new
  xml.elements.each('pgterms:ebook/dcterms:subject') do |entry|
    next unless entry.elements['rdf:Description/dcam:memberOf'].attributes['resource'].match(/LCSH\z/)
    entry.elements.each('rdf:Description//rdf:value') do |value|
      entries << value.text
    end
  end
  entries
end
subtitle() click to toggle source
# File lib/gutenberg_rdf/rdf.rb, line 25
def subtitle
  titles[1..-1].join(' - ')
end
title() click to toggle source
# File lib/gutenberg_rdf/rdf.rb, line 21
def title
  titles.first
end
type() click to toggle source
# File lib/gutenberg_rdf/rdf.rb, line 17
def type
  xml.elements['pgterms:ebook/dcterms:type/rdf:Description/rdf:value'].text
end

Private Instance Methods

extract_authors() click to toggle source
# File lib/gutenberg_rdf/rdf.rb, line 101
def extract_authors
  agents = Array.new
  xml.elements.each('pgterms:ebook/dcterms:creator') do |contributor|
    agent = Agent.new(contributor.elements['pgterms:agent'])
    agent.role = 'aut'
    agents << agent
  end
  xml.elements.each('pgterms:ebook/marcrel:*') do |contributor|
    agent = Agent.new(contributor.elements['pgterms:agent'])
    agent.role = contributor.name
    agents << agent
  end
  agents
end
file_is_image?(node) click to toggle source
# File lib/gutenberg_rdf/rdf.rb, line 125
def file_is_image?(node)
  node.elements.each('dcterms:format/rdf:Description/rdf:value') do |value|
    return true if value.text.match(/image/)
  end
  false
end
official_cover_images() click to toggle source
# File lib/gutenberg_rdf/rdf.rb, line 116
def official_cover_images
  entries = Array.new
  xml.elements.each('pgterms:ebook/dcterms:hasFormat') do |format|
    file = format.elements['pgterms:file']
    entries << file.attributes['about'] if file_is_image?(file)
  end
  entries.sort
end
other_cover_images() click to toggle source
# File lib/gutenberg_rdf/rdf.rb, line 132
def other_cover_images
  entries = Array.new
  xml.elements.each('pgterms:ebook/pgterms:marc901') do |node|
    cover = node.text
    cover.sub!(/\Afile:\/\/\/public\/vhost\/g\/gutenberg\/html/, 'http://www.gutenberg.org')
    entries << cover
  end
  entries.sort
end
split_title_and_subtitle() click to toggle source
# File lib/gutenberg_rdf/rdf.rb, line 89
def split_title_and_subtitle
  # NOTE: this gsub is replacing UTF-8 hyphens with normal ASCII ones
  t = xml.elements['pgterms:ebook/dcterms:title'].text.gsub(/—/, '-')

  title_array = t.split(/\n/)
  title_array = title_array.first.split(/:/) if title_array.count == 1
  title_array = title_array.first.split(/;/) if title_array.count == 1
  title_array = title_array.first.split(/, or,/) if title_array.count == 1

  title_array.map(&:strip)
end
titles() click to toggle source
# File lib/gutenberg_rdf/rdf.rb, line 85
def titles
  @titles ||= split_title_and_subtitle
end