module ParseHtml

Public Instance Methods

amazon_points() click to toggle source
# File lib/amazomania/parse_html.rb, line 92
def amazon_points
  nodesets = @nokogiried_doc.xpath("//*/span[@class='a-color-secondary']")
  amazon_points = []
  next_nodeset_is_skipped = false
  nodesets.each do |nodeset|
    if next_nodeset_is_skipped
        next_nodeset_is_skipped = false
        next
    end
    if nodeset.to_s.include?("color: #990000;")
      amazon_point = nodeset.inner_text
      amazon_point = amazon_point.strip.gsub(/,/, '').sub(/([0-9]+)pt.*/, '\1')
      amazon_points.push(amazon_point)
      next_nodeset_is_skipped = true # 「ポイント」と「送料」に共通の class="a-color-secondary" ではポイントの方が先に来る
    else
      amazon_points.push("0")
    end
  end
  amazon_points
end
main_conditions() click to toggle source
# File lib/amazomania/parse_html.rb, line 43
def main_conditions
  nodesets = @nokogiried_doc.xpath("//*/span[@class='a-size-medium olpCondition a-text-bold']")
  main_conditions = []
  nodesets.each do |nodeset|
    condition_tmp = nodeset.inner_text.gsub(" ", "").gsub(/\n/, "")
    if condition_tmp.to_s.include?("中古品") # to_s するのはとても荒い調べ方
      main_condition = "中古品"
    elsif
      main_condition = nodeset.inner_text.gsub(" ", "").strip
    end
    main_conditions.push(main_condition)
  end
  main_conditions
end
parse_html(asin) click to toggle source
# File lib/amazomania/parse_html.rb, line 6
def parse_html(asin)
  @nokogiried_doc   = scraping(asin)
  @shop_names       = shop_names
  @prices           = prices
  @main_conditions  = main_conditions
  @sub_conditions   = sub_conditions
  @shipping_prices  = shipping_prices
  @amazon_points    = amazon_points
end
prices() click to toggle source
# File lib/amazomania/parse_html.rb, line 34
def prices
  nodesets = @nokogiried_doc.xpath("//*/span[@class='a-size-large a-color-price olpOfferPrice a-text-bold']")
  prices = []
  nodesets.each do |nodeset|
    prices.push(nodeset.inner_text.lstrip.gsub(/[^\d]/, ""))
  end
  prices
end
shipping_prices() click to toggle source
# File lib/amazomania/parse_html.rb, line 74
def shipping_prices
  nodesets = @nokogiried_doc.xpath("//span[@class='a-color-secondary']")
  shipping_prices = []
  nodesets.each do |nodeset|
    if nodeset.to_s.include?("color: #990000;") # to_s するのはとても荒い調べ方
      next
    end
    if nodeset.to_s.include?("olpShippingPrice")
      shipping_price = nodeset.inner_text.chomp.gsub(" ", "").gsub(/(\r\n|\r|\n|\f)/,"")
      shipping_price = shipping_price.gsub(/[^\d]/, "")
      shipping_prices.push(shipping_price)
    else
      shipping_prices.push("0")
    end
  end
  shipping_prices
end
shop_names() click to toggle source
# File lib/amazomania/parse_html.rb, line 16
def shop_names
  seller_count = 0
  nodesets = @nokogiried_doc.xpath("//*/h3[@class='a-spacing-none olpSellerName']") # 浅くしないと Amazon のネイティブ出品 および Amazonプライム の場合が判別できない
  shop_names = []
  nodesets.each do |nodeset| # ネストがやや深い
    if nodeset.to_s.include?("seller") # to_s するのはとても荒い調べ方
      shop_name = @nokogiried_doc.xpath("//*/h3[@class='a-spacing-none olpSellerName']/span/a")[seller_count].inner_text
      shop_names.push(shop_name)
      seller_count += 1
    elsif nodeset.to_s.include?("amazon.co.jp/shops/") # Amazonアウトレット の場合
      shop_names.push("Amazonアウトレット")
    else
      shop_names.push("Amazon.co.jp") # Amazon のネイティブ出品 or Amazonプライム の場合
    end
  end
  shop_names
end
sub_conditions() click to toggle source
# File lib/amazomania/parse_html.rb, line 58
def sub_conditions
  nodesets = @nokogiried_doc.xpath("//*/span[@class='a-size-medium olpCondition a-text-bold']")
  sub_conditions = []
  nodesets.each do |nodeset|
    condition_tmp = nodeset.inner_text.gsub(" ", "").gsub(/\n/, "")
    if condition_tmp.to_s.include?("中古品") # to_s するのはとても荒い調べ方
      condition_tmp =~ /中古品\-(.*)/
      sub_condition = $1
    else
      sub_condition = "新品"
    end
    sub_conditions.push(sub_condition)
  end
  sub_conditions
end