class Alexandria::BookProviders::AdLibrisProvider
Constants
- BASE_SEARCH_URL
- PRODUCT_URL
- SITE
Public Class Methods
new()
click to toggle source
Calls superclass method
# File lib/alexandria/book_providers/adlibris.rb, line 29 def initialize super("AdLibris", "AdLibris (Sweden)") prefs.read # @ent = HTMLEntities.new end
Public Instance Methods
search(criterion, type)
click to toggle source
search (copied from new WorldCat search)
# File lib/alexandria/book_providers/adlibris.rb, line 36 def search(criterion, type) req = create_search_uri(type, criterion) log.info { "Fetching #{req} " } html_data = transport.get_response(URI.parse(req)) if type == SEARCH_BY_ISBN parse_result_data(html_data.body) else results = parse_search_result_data(html_data.body) raise NoResultsError if results.empty? results.map { |result| get_book_from_search_result(result) } end end
url(book)
click to toggle source
url
# File lib/alexandria/book_providers/adlibris.rb, line 52 def url(book) create_search_uri(SEARCH_BY_ISBN, book.isbn) rescue StandardError => ex log.warn { "Cannot create url for book #{book}; #{ex.message}" } nil end
Private Instance Methods
create_search_uri(search_type, search_term)
click to toggle source
# File lib/alexandria/book_providers/adlibris.rb, line 61 def create_search_uri(search_type, search_term) if search_type == SEARCH_BY_ISBN PRODUCT_URL % Library.canonicalise_isbn(search_term) else (search_type_code = { SEARCH_BY_AUTHORS => "author", SEARCH_BY_TITLE => "title", SEARCH_BY_KEYWORD => "keyword" }[search_type]) || "keyword" search_term_encoded = CGI.escape(search_term) format(BASE_SEARCH_URL, search_type_code, search_term_encoded) end end
get_book_from_search_result(rslt)
click to toggle source
TODO: use Iconv to pre-convert the html.body to UTF-8 everywhere before sending it to the parser methods
# File lib/alexandria/book_providers/adlibris.rb, line 78 def get_book_from_search_result(rslt) html_data = transport.get_response(URI.parse(rslt[:lookup_url])) parse_result_data(html_data.body) end
normalize(text)
click to toggle source
# File lib/alexandria/book_providers/adlibris.rb, line 103 def normalize(text) text end
parse_result_data(html)
click to toggle source
# File lib/alexandria/book_providers/adlibris.rb, line 107 def parse_result_data(html) doc = html_to_doc(html) begin h1 = doc.at("div.productTitleFormat h1") raise NoResultsError, _("title not found on page") unless h1 title = text_of(h1) product = doc.at("div.product") ul_info = doc.at("ul.info") # NOTE, two of these author_cells = ul_info.search("li.liAuthor") # css-like search authors = [] author_cells.each do |li| author_name = text_of(li.search("h2 > a")[0]) authors << author_name end publisher = nil if (publisher_elem = product.search('li[@id$="liPublisher"] a').first) publisher = text_of(publisher_elem) end binding = nil if (format = doc.search("div.productTitleFormat span").first) binding = text_of(format) binding = Regexp.last_match[1] if binding =~ /\(([^)]+)\)/ end year = nil if (published = product.search('span[@id$="Published"]').first) publication = published.inner_text year = Regexp.last_match[1].to_i if publication =~ /([12][0-9]{3})/ end isbns = [] isbn_tds = doc.search("li[@id *= 'liISBN'] td[text()]") isbn_tds.each do |isbn_td| isbn = isbn_td.inner_text next unless isbn =~ /[0-9x]{10,13}/i isbn.gsub(/(\n|\r)/, " ") isbn = Regexp.last_match[1] if isbn =~ /:\s*([0-9x]+)/i isbns << isbn end isbn = isbns.first isbn = Library.canonicalise_isbn(isbn) if isbn # cover image_url = nil cover_img = doc.search('span.imageWithShadow img[@id$="ProductImageNotLinked"]').first if cover_img image_url = if cover_img["src"].start_with?("http://") cover_img["src"] else "#{SITE}/#{cover_img['src']}" # HACK: use html base end if /noimage.gif$/.match?(image_url) # no point downloading a "no image" graphic # Alexandria has its own generic book icon... image_url = nil end end book = Book.new(title, authors, isbn, publisher, year, binding) [book, image_url] rescue StandardError => ex raise ex if ex.instance_of? NoResultsError trace = ex.backtrace.join("\n> ") log.warn do "Failed parsing search results for AdLibris " \ "#{ex.message} #{trace}" end raise NoResultsError end end
parse_search_result_data(html)
click to toggle source
# File lib/alexandria/book_providers/adlibris.rb, line 83 def parse_search_result_data(html) doc = html_to_doc(html) book_search_results = [] search_hit = doc.search("div'searchResult")[0] return [] unless search_hit (search_hit / "ul.ulSearch table").each do |t| result = {} if (title_data = (t % "div.divTitle")) result[:title] = (title_data % :a).inner_text lookup_url = (title_data % :a)["href"] end result[:lookup_url] = "#{SITE}#{lookup_url}" book_search_results << result end book_search_results end