class Alexandria::BookProviders::SicilianoProvider

Constants

BASE_SEARCH_URL

The string interpolations in this URL are the search term and search type, respectively.

SITE

Public Class Methods

new() click to toggle source
Calls superclass method
# File lib/alexandria/book_providers/siciliano.rb, line 47
def initialize
  super("Siciliano", "Livraria Siciliano (Brasil)")
  # no preferences for the moment
  prefs.read
end

Public Instance Methods

get_book_from_search_result(result) click to toggle source
# File lib/alexandria/book_providers/siciliano.rb, line 53
def get_book_from_search_result(result)
  log.info { "Fetching book from #{result[:url]}" }
  html_data = transport.get(URI.parse(result[:url]))
  parse_result_data(html_data, result)
end
url(_book) click to toggle source

the new Siciliano website no longer has direct links to books by their ISBN (the permalink now seems to be based on the product id)

# File lib/alexandria/book_providers/siciliano.rb, line 86
def url(_book)
  nil
end

Private Instance Methods

create_search_uri(search_type, search_term, trying_again = false) click to toggle source
# File lib/alexandria/book_providers/siciliano.rb, line 92
def create_search_uri(search_type, search_term, trying_again = false)
  (search_type_code = { SEARCH_BY_ISBN    => "G",
                        SEARCH_BY_TITLE   => "A",
                        SEARCH_BY_AUTHORS => "B",
                        SEARCH_BY_KEYWORD => "X" }[search_type]) || "X"
  search_term_encoded = if search_type == SEARCH_BY_ISBN
                          if trying_again
                            # on second attempt, try ISBN-10...
                            Library.canonicalise_isbn(search_term) # isbn-10
                          else
                            # search by ISBN-13 first
                            Library.canonicalise_ean(search_term) # isbn-13
                          end
                        else
                          CGI.escape(search_term)
                        end

  format(BASE_SEARCH_URL, search_term_encoded, search_type_code)
end
first_non_empty_text_node(elem) click to toggle source
# File lib/alexandria/book_providers/siciliano.rb, line 211
def first_non_empty_text_node(elem)
  text = ""
  elem.children.each do |node|
    next unless node.text?

    text = node.to_s.strip
    break unless text.empty?
  end
  text
end
lines_of_text_as_array(elem) click to toggle source
# File lib/alexandria/book_providers/siciliano.rb, line 222
def lines_of_text_as_array(elem)
  lines = []
  current_text = ""
  elem.children.each do |e|
    if e.text?
      current_text += e.to_s
    elsif e.name == "br"
      lines << current_text.strip
      current_text = ""
    else
      current_text += e.inner_text
    end
  end
  lines << current_text.strip
  lines.delete("")
  lines
end
parse_result_data(html, search_result) click to toggle source
# File lib/alexandria/book_providers/siciliano.rb, line 158
def parse_result_data(html, search_result)
  # checked against Siciliano website 21 Feb 2009
  doc = html_to_doc(html)
  # title
  title_div = doc % "div#conteudo//div.titulo"
  raise NoResultsError unless title_div

  title_h = title_div % "h2"
  title = title_h.inner_text if title_h
  # title = first_non_empty_text_node(title_div)
  # author_spans = doc/'span.rotulo'
  author_hs = title_div / "h3.autor"
  authors = []
  author_hs.each do |h|
    authors << h.inner_text.strip
  end
  ## synopsis_div = doc % 'div#sinopse'
  details_div = doc % "div#tab-caracteristica"
  details = string_array_to_map(lines_of_text_as_array(details_div))
  # ISBN
  isbn =  details["ISBN"]
  ## ean = details["CdBarras"]
  translator = details["Tradutor"]
  authors << translator if translator
  binding = details["Acabamento"]
  publisher = search_result[:publisher]
  # publish year
  publish_year = nil
  edition = details["Edio"]
  # publication date
  publish_year = Regexp.last_match[1].to_i if edition && edition =~ /([12][0-9]{3})/
  # cover
  # ImgSrc[1]="/imagem/imagem.dll?pro_id=1386929&PIM_Id=658849";
  image_urls = []
  (doc / "script").each do |script|
    next if script.children.nil?

    script.children.each do |ch|
      ch_text = ch.to_s
      if ch_text =~ /ImgSrc\[\d\]="(.+)";/
        img_link = Regexp.last_match[1]
        image_urls << img_link
      end
    end
  end
  book = Book.new(title, authors, isbn, publisher, publish_year, binding)
  [book, image_urls.first]
rescue StandardError => ex
  trace = ex.backtrace.join("\n> ")
  log.error { "Failed parsing Siciliano product page #{ex.message}\n#{trace}" }
  nil
end
parse_search_result_data(html) click to toggle source
# File lib/alexandria/book_providers/siciliano.rb, line 112
def parse_search_result_data(html)
  # The layout...
  # td[@class="normal"]
  #   span[@class="vitrine_nome_produto"]
  #      a (title and link to 'product page')
  #   br
  #   TEXT --> author / publisher
  #   br
  #   div[@class="vitrine_preco_por"] (price info)

  doc = html_to_doc(html)
  book_search_results = []
  # each result will be a dict with keys :title, :author, :publisher, :url

  list_items = doc.search("div.pesquisa-item-lista-conteudo")
  list_items.each do |item|
    result = {}

    # author & publisher
    author_publisher = ""
    item.children.each do |node|
      author_publisher += node.to_s if node.text?
      author_publisher.strip!
      break unless author_publisher.empty?
    end
    author, publisher = author_publisher.split("/")
    result[:author] = author.strip if author
    result[:publisher] = publisher.strip if publisher

    # title & url
    link = item % "a"
    result[:title] = link.inner_text.strip
    link_to_description = link["href"]
    slash = ""
    slash = "/" unless link_to_description.start_with?("/")
    result[:url] = "#{SITE}#{slash}#{link_to_description}"

    book_search_results << result
  rescue StandardError => ex
    trace = ex.backtrace.join("\n> ")
    log.error { "Failed parsing Siciliano search page #{ex.message}\n#{trace}" }
  end

  book_search_results
end
string_array_to_map(arr) click to toggle source
# File lib/alexandria/book_providers/siciliano.rb, line 240
def string_array_to_map(arr)
  map = {}
  arr.each do |str|
    key, val = str.split(":")
    # a real hack for not handling encoding properly :^)
    map[key.gsub(/[^a-zA-Z]/, "")] = val.strip if val
  end
  map
end