module DataKitten::PublishingFormats::CKAN
Private Class Methods
# File lib/data_kitten/publishing_formats/ckan.rb, line 57 def self.get_base(uri) *base, package = uri.path.split('/') if base.last == "dataset" uri.merge(base[0...-1].join('/') + '/') else uri.merge("/") end end
# File lib/data_kitten/publishing_formats/ckan.rb, line 12 def self.supported?(instance) uri = instance.uri base_uri = instance.base_uri *base, package = uri.path.split('/') if uri.path =~ %r{api/\d+/action/package_show/?$} result = JSON.parse(RestClient.get(uri.to_s))['result'] instance.identifier = result['id'] result['extras'] = CKAN3Hash.new(result['extras'], 'key', 'value') result['tags'] = CKAN3Hash.new(result['tags'], 'name', 'display_name').values instance.metadata = result elsif uri.path =~ %r{api/\d+/rest/dataset/} result = JSON.parse(RestClient.get(uri.to_s)) instance.identifier = result['id'] instance.metadata = result else # If the 2nd to last element in the path is 'dataset' then it's probably # the CKAN dataset view page, the last element will be the dataset id # or name if base.last == "dataset" instance.identifier = package # build a base URI ending with a / base_uri = get_base(uri) # If the package is a UUID - it's more than likely to be a CKAN ID elsif package.match(/[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}/) instance.identifier = package else results = begin RestClient.get base_uri.merge("api/3/action/package_show").to_s, {:params => {:id => package}} rescue RestClient::Exception RestClient.get base_uri.merge("api/2/rest/dataset/#{package}").to_s end result = JSON.parse results instance.identifier = result.fetch("result", result)["id"] end instance.metadata = JSON.parse RestClient.get base_uri.merge("api/rest/package/#{instance.identifier}").to_s end instance.metadata.extend(GuessableLookup) instance.source = instance.metadata return true rescue false end
Public Instance Methods
# File lib/data_kitten/publishing_formats/ckan.rb, line 246 def base_uri DataKitten::PublishingFormats::CKAN.get_base(self.uri) end
# File lib/data_kitten/publishing_formats/ckan.rb, line 139 def contributors extract_agent('author', 'author_email') end
The human-readable title of the dataset.
@see Dataset#data_title
# File lib/data_kitten/publishing_formats/ckan.rb, line 78 def data_title metadata.lookup("title") end
A brief description of the dataset
@see Dataset#description
# File lib/data_kitten/publishing_formats/ckan.rb, line 85 def description metadata.lookup("notes") || metadata.lookup("description") rescue nil end
A list of distributions, referred to as resources
by Datapackage
.
# File lib/data_kitten/publishing_formats/ckan.rb, line 160 def distributions distributions = [] metadata.lookup("resources").each do |resource| distribution = { :title => resource["description"], :accessURL => landing_page, :downloadURL => resource["url"], :format => resource["format"], :mediaType => resource["mimetype"] || resource["content_type"], } distribution[:issued] = Date.parse(resource["created"]) rescue nil distribution[:modified] = Date.parse(resource["last_modified"] || resource["revision_timestamp"]) rescue nil distribution[:byteSize] = Integer(resource["size"]) rescue nil distributions << Distribution.new(self, ckan_resource: distribution) end return distributions rescue nil end
An identifier for the dataset
@see Dataset#identifier
# File lib/data_kitten/publishing_formats/ckan.rb, line 94 def identifier metadata.lookup("name") || @identifier end
Date the dataset was released
@see Dataset#issued
# File lib/data_kitten/publishing_formats/ckan.rb, line 194 def issued Date.parse metadata.lookup("metadata_created") rescue nil end
Keywords for the dataset
@see Dataset#keywords
# File lib/data_kitten/publishing_formats/ckan.rb, line 110 def keywords keywords = [] metadata.lookup("tags").each do |tag| keywords << tag end return keywords rescue [] end
A web page which can be used to gain access to the dataset
@see Dataset#landing_page
# File lib/data_kitten/publishing_formats/ckan.rb, line 101 def landing_page metadata.lookup("extras", "landing_page") || metadata.lookup("url") || metadata.lookup("ckan_url") end
The language of the dataset
@see Dataset#language
# File lib/data_kitten/publishing_formats/ckan.rb, line 221 def language metadata.lookup("language") || metadata.lookup("metadata_language") || metadata.lookup("extras", "metadata_language") || metadata.lookup("extras", "language", 0) || metadata.lookup("extras", "language") end
A list of licenses.
@see Dataset#licenses
# File lib/data_kitten/publishing_formats/ckan.rb, line 146 def licenses id = metadata.lookup("license_id") uri = metadata.lookup("license_url") || metadata.lookup("extras", "licence_url") name = metadata.lookup("license_title") || metadata.lookup("extras", "licence_url_title") if [id, uri, name].any? [License.new(:id => id, :uri => uri, :name => name)] else [] end end
# File lib/data_kitten/publishing_formats/ckan.rb, line 135 def maintainers extract_agent('maintainer', 'maintainer_email') end
Date the dataset was modified
@see Dataset#modified
# File lib/data_kitten/publishing_formats/ckan.rb, line 201 def modified Date.parse metadata.lookup("metadata_modified") rescue nil end
A list of publishers.
@see Dataset#publishers
# File lib/data_kitten/publishing_formats/ckan.rb, line 123 def publishers org = fetch_organization result = if org [org] elsif group_id = metadata.lookup('groups', 0, 'id') [fetch_publisher(group_id)] else [] end result.compact end
The publishing format for the dataset. @return [Symbol] :ckan
@see Dataset#publishing_format
# File lib/data_kitten/publishing_formats/ckan.rb, line 71 def publishing_format :ckan end
Spatial coverage of the dataset
@see Dataset#spatial
# File lib/data_kitten/publishing_formats/ckan.rb, line 242 def spatial extract_spatial || extract_bbox end
The temporal coverage of the dataset
@see Dataset#temporal
# File lib/data_kitten/publishing_formats/ckan.rb, line 208 def temporal from = metadata.lookup("extras", "temporal_coverage-from") || metadata.lookup("extras", "temporal-extent-begin") to = metadata.lookup("extras", "temporal_coverage-to") || metadata.lookup("extras", "temporal-extent-end") start_date = Date.parse from rescue nil end_date = Date.parse to rescue nil Temporal.new(:start => start_date, :end => end_date) end
The main category of the dataset
@see Dataset#theme
# File lib/data_kitten/publishing_formats/ckan.rb, line 232 def theme metadata.lookup("extras", "theme", 0) || metadata.lookup("extras", "theme-primary") || metadata.lookup("groups", 0, "name") || metadata.lookup("groups", 0) end
How frequently the data is updated.
# File lib/data_kitten/publishing_formats/ckan.rb, line 183 def update_frequency metadata.lookup("extras", "update_frequency") || metadata.lookup("extras", "frequency-of-update") || metadata.lookup("extras", "accrual_periodicity") rescue nil end
Private Instance Methods
# File lib/data_kitten/publishing_formats/ckan.rb, line 341 def extract_agent(name_field, email_field) name = metadata.lookup(name_field) email = metadata.lookup(email_field) if [name, email].any? [Agent.new(name: name, mbox: email)] else [] end end
# File lib/data_kitten/publishing_formats/ckan.rb, line 271 def extract_bbox west = Float(metadata.lookup("extras", "bbox-west-long")) east = Float(metadata.lookup("extras", "bbox-east-long")) north = Float(metadata.lookup("extras", "bbox-north-lat")) south = Float(metadata.lookup("extras", "bbox-south-lat")) { "type" => "Polygon", "coordinates" => [ [ [west, north], [east, north], [east, south], [west, south], [west, north] ] ] } rescue nil end
# File lib/data_kitten/publishing_formats/ckan.rb, line 264 def extract_spatial geometry = JSON.parse metadata.lookup("extras", "spatial") return geometry if !geometry["type"].nil? rescue nil end
# File lib/data_kitten/publishing_formats/ckan.rb, line 290 def fetch_organization if org = metadata['organization'] begin uri = base_uri.merge("api/3/action/organization_show") result = RestClient.get(uri.to_s, params: {id: org['id']}) org_data = JSON.parse(result)['result'] extras = CKAN3Hash.new(without_empty_values(org_data['extras']), "key", "value") rescue uri = base_uri.merge("api/rest/group/#{org['id']}") result = RestClient.get(uri.to_s) org_data = JSON.parse(result) extras = without_empty_values(org_data['extras']) end Agent.new( :name => org_data['title'], :mbox => (org_data['contact-email'] || extras['contact-email']), :homepage => extras['website-url'] || base_uri.to_s ) end rescue nil end
# File lib/data_kitten/publishing_formats/ckan.rb, line 313 def fetch_publisher(id) uri = parsed_uri [ "api/3/action/organization_show?id=#{id}", "api/3/action/group_show?id=#{id}", "api/rest/group/#{id}" ].each do |uri| begin @group = JSON.parse RestClient.get base_uri.merge(uri).to_s break rescue # FakeWeb raises FakeWeb::NetConnectNotAllowedError, whereas # RestClient raises RestClient::ResourceNotFound in the "real world". nil end end if @group Agent.new(:name => @group["display_name"] || @group["result"]["title"], :homepage => select_extras(@group, "website-url"), :mbox => select_extras(@group, "contact-email")) end end
# File lib/data_kitten/publishing_formats/ckan.rb, line 337 def parsed_uri URI(self.uri) end
# File lib/data_kitten/publishing_formats/ckan.rb, line 256 def select_extras(group, key) extra = group["extras"][key] rescue "" if extra == "" extra = group['result']['extras'].select {|e| e["key"] == key }.first['value'] rescue "" end extra end
# File lib/data_kitten/publishing_formats/ckan.rb, line 252 def without_empty_values(h) h.reject { |k, v| v.nil? || v.empty? } end