class Alexandria::BookProviders::AdLibrisProvider

Constants

BASE_SEARCH_URL
PRODUCT_URL
SITE

Public Class Methods

new() click to toggle source
Calls superclass method
# File lib/alexandria/book_providers/adlibris.rb, line 29
def initialize
  super("AdLibris", "AdLibris (Sweden)")
  prefs.read
  # @ent = HTMLEntities.new
end

Public Instance Methods

url(book) click to toggle source

url

# File lib/alexandria/book_providers/adlibris.rb, line 52
def url(book)
  create_search_uri(SEARCH_BY_ISBN, book.isbn)
rescue StandardError => ex
  log.warn { "Cannot create url for book #{book}; #{ex.message}" }
  nil
end

Private Instance Methods

create_search_uri(search_type, search_term) click to toggle source
# File lib/alexandria/book_providers/adlibris.rb, line 61
def create_search_uri(search_type, search_term)
  if search_type == SEARCH_BY_ISBN
    PRODUCT_URL % Library.canonicalise_isbn(search_term)
  else
    (search_type_code = {
      SEARCH_BY_AUTHORS => "author",
      SEARCH_BY_TITLE   => "title",
      SEARCH_BY_KEYWORD => "keyword"
    }[search_type]) || "keyword"
    search_term_encoded = CGI.escape(search_term)
    format(BASE_SEARCH_URL, search_type_code, search_term_encoded)
  end
end
get_book_from_search_result(rslt) click to toggle source

TODO: use Iconv to pre-convert the html.body to UTF-8 everywhere before sending it to the parser methods

# File lib/alexandria/book_providers/adlibris.rb, line 78
def get_book_from_search_result(rslt)
  html_data = transport.get_response(URI.parse(rslt[:lookup_url]))
  parse_result_data(html_data.body)
end
normalize(text) click to toggle source
# File lib/alexandria/book_providers/adlibris.rb, line 103
def normalize(text)
  text
end
parse_result_data(html) click to toggle source
# File lib/alexandria/book_providers/adlibris.rb, line 107
def parse_result_data(html)
  doc = html_to_doc(html)
  begin
    h1 = doc.at("div.productTitleFormat h1")
    raise NoResultsError, _("title not found on page") unless h1

    title = text_of(h1)

    product = doc.at("div.product")
    ul_info = doc.at("ul.info") # NOTE, two of these

    author_cells = ul_info.search("li.liAuthor") # css-like search
    authors = []
    author_cells.each do |li|
      author_name = text_of(li.search("h2 > a")[0])

      authors << author_name
    end

    publisher = nil
    if (publisher_elem = product.search('li[@id$="liPublisher"] a').first)
      publisher = text_of(publisher_elem)
    end

    binding = nil
    if (format = doc.search("div.productTitleFormat span").first)
      binding = text_of(format)
      binding = Regexp.last_match[1] if binding =~ /\(([^)]+)\)/
    end

    year = nil
    if (published = product.search('span[@id$="Published"]').first)
      publication = published.inner_text
      year = Regexp.last_match[1].to_i if publication =~ /([12][0-9]{3})/
    end

    isbns = []
    isbn_tds = doc.search("li[@id *= 'liISBN'] td[text()]")

    isbn_tds.each do |isbn_td|
      isbn = isbn_td.inner_text
      next unless isbn =~ /[0-9x]{10,13}/i

      isbn.gsub(/(\n|\r)/, " ")
      isbn = Regexp.last_match[1] if isbn =~ /:\s*([0-9x]+)/i
      isbns << isbn
    end
    isbn = isbns.first
    isbn = Library.canonicalise_isbn(isbn) if isbn

    # cover
    image_url = nil
    cover_img =
      doc.search('span.imageWithShadow img[@id$="ProductImageNotLinked"]').first
    if cover_img
      image_url = if cover_img["src"].start_with?("http://")
                    cover_img["src"]
                  else
                    "#{SITE}/#{cover_img['src']}" # HACK: use html base
                  end
      if /noimage.gif$/.match?(image_url)
        # no point downloading a "no image" graphic
        # Alexandria has its own generic book icon...
        image_url = nil
      end

    end

    book = Book.new(title, authors, isbn, publisher, year, binding)

    [book, image_url]
  rescue StandardError => ex
    raise ex if ex.instance_of? NoResultsError

    trace = ex.backtrace.join("\n> ")
    log.warn do
      "Failed parsing search results for AdLibris " \
      "#{ex.message} #{trace}"
    end
    raise NoResultsError
  end
end
parse_search_result_data(html) click to toggle source
# File lib/alexandria/book_providers/adlibris.rb, line 83
def parse_search_result_data(html)
  doc = html_to_doc(html)
  book_search_results = []

  search_hit = doc.search("div'searchResult")[0]
  return [] unless search_hit

  (search_hit / "ul.ulSearch table").each do |t|
    result = {}
    if (title_data = (t % "div.divTitle"))
      result[:title] = (title_data % :a).inner_text
      lookup_url = (title_data % :a)["href"]
    end
    result[:lookup_url] = "#{SITE}#{lookup_url}"

    book_search_results << result
  end
  book_search_results
end