module GooglePlay::Parser
Public Instance Methods
parse_app(html)
click to toggle source
# File lib/google-play/parser.rb, line 5 def parse_app(html) doc = Nokogiri.HTML(html) GooglePlay::App.new( :name => parse_app_name(doc), :image_url => parse_app_image_url(doc), :developer => parse_app_developer(doc), :developer_mail => parse_app_developer_mail(doc), :developer_web => parse_app_developer_web(doc), :category => parse_app_category(doc), :rating_count => parse_app_rating_count(doc), :rating_counts => parse_app_rating_counts(doc), :rating_average => parse_app_rating_average(doc), :description => parse_app_description(doc), :recent_change => parse_app_recent_change(doc), :last_update => parse_app_last_update(doc), :file_size => parse_app_file_size(doc), :downloads => parse_app_downloads(doc), :version => parse_app_version(doc), :os_requried => parse_app_os_required(doc) ) end
parse_review(html)
click to toggle source
# File lib/google-play/parser.rb, line 27 def parse_review(html) doc = Nokogiri.HTML(html) doc.xpath("//div[@class='single-review']").map do |node| GooglePlay::Review.new( :id => parse_review_id(node), :user => parse_review_user(node), :user_id => parse_review_user_id(node), :date => parse_review_date(node), :rating => parse_review_rating(node), :title => parse_review_title(node), :text => parse_review_text(node) ) end end
Private Instance Methods
parse_app_category(node)
click to toggle source
# File lib/google-play/parser.rb, line 111 def parse_app_category(node) a = node.xpath("//a[@class='document-subtitle category']").first a['href'].match(/category\/(.+)$/)[1].downcase end
parse_app_description(node)
click to toggle source
# File lib/google-play/parser.rb, line 128 def parse_app_description(node) node.xpath("//div[@class='id-app-orig-desc']").inner_html end
parse_app_developer(node)
click to toggle source
# File lib/google-play/parser.rb, line 93 def parse_app_developer(node) node.xpath("//a[@class='document-subtitle primary']/span[@itemprop='name']").text end
parse_app_developer_mail(node)
click to toggle source
# File lib/google-play/parser.rb, line 102 def parse_app_developer_mail(node) a = node.xpath("//a[@class='dev-link']") if a.size == 0 nil else a[a.size - 1]['href'].sub('mailto:', '') end end
parse_app_developer_web(node)
click to toggle source
# File lib/google-play/parser.rb, line 97 def parse_app_developer_web(node) a = node.xpath("//a[@class='dev-link']") (a.size == 2) ? a.first['href'] : nil end
parse_app_downloads(node)
click to toggle source
# File lib/google-play/parser.rb, line 153 def parse_app_downloads(node) node.xpath("//div[@itemprop='numDownloads']").text.strip end
parse_app_file_size(node)
click to toggle source
# File lib/google-play/parser.rb, line 148 def parse_app_file_size(node) file_size = node.xpath("//div[@itemprop='fileSize']").text.strip (file_size =~ /\d+/) ? file_size : :device_dependent end
parse_app_image_url(node)
click to toggle source
# File lib/google-play/parser.rb, line 89 def parse_app_image_url(node) node.xpath("//img[@class='cover-image']").first['src'] end
parse_app_last_update(node)
click to toggle source
# File lib/google-play/parser.rb, line 138 def parse_app_last_update(node) text = node.xpath("//div[@itemprop='datePublished']").text begin Date.parse(text) rescue text =~ /(\d+)\D+(\d+)\D+(\d+)/ Date.new($1.to_i, $2.to_i, $3.to_i) end end
parse_app_name(node)
click to toggle source
# File lib/google-play/parser.rb, line 85 def parse_app_name(node) node.xpath("//div[@class='document-title']/div").text end
parse_app_os_required(node)
click to toggle source
# File lib/google-play/parser.rb, line 162 def parse_app_os_required(node) text = node.xpath("//div[@itemprop='operatingSystems']").text if text =~ /([\d\.]+)/ $1 else :device_dependent end end
parse_app_rating_average(node)
click to toggle source
# File lib/google-play/parser.rb, line 124 def parse_app_rating_average(node) node.xpath("//div[@class='score']").text.to_f end
parse_app_rating_count(node)
click to toggle source
# File lib/google-play/parser.rb, line 116 def parse_app_rating_count(node) node.xpath("//div[@class='stars-count']").text.match(/(\d+)/)[1] end
parse_app_rating_counts(node)
click to toggle source
# File lib/google-play/parser.rb, line 120 def parse_app_rating_counts(node) node.xpath("//span[@class='bar-number']").map { |n| n.text.to_i } end
parse_app_recent_change(node)
click to toggle source
# File lib/google-play/parser.rb, line 132 def parse_app_recent_change(node) node.xpath("//div[@class='recent-change']") .map { |n| n.text } .join("\n") end
parse_app_version(node)
click to toggle source
# File lib/google-play/parser.rb, line 157 def parse_app_version(node) version = node.xpath("//div[@itemprop='softwareVersion']").text.strip (version =~ /\d+/) ? version : :device_dependent end
parse_review_date(node)
click to toggle source
# File lib/google-play/parser.rb, line 58 def parse_review_date(node) text = node.xpath(".//span[@class='review-date']").text begin Date.parse(text) rescue text =~ /(\d+)\D+(\d+)\D+(\d+)/ Date.new($1.to_i, $2.to_i, $3.to_i) end end
parse_review_id(node)
click to toggle source
# File lib/google-play/parser.rb, line 43 def parse_review_id(node) a = node.xpath(".//a[@class='reviews-permalink']").first a['href'].match(/reviewId=(\w+)/)[1] end
parse_review_rating(node)
click to toggle source
# File lib/google-play/parser.rb, line 68 def parse_review_rating(node) div = node.xpath(".//div[@class='current-rating']").first width = div['style'].match(/width: (\d+)%/)[1].to_i width / 20 end
parse_review_text(node)
click to toggle source
# File lib/google-play/parser.rb, line 78 def parse_review_text(node) dup = node.dup dup.xpath(".//div[@class='review-link']").remove dup.xpath(".//span[@class='review-title']").remove dup.xpath(".//div[@class='review-body']").text.strip end
parse_review_title(node)
click to toggle source
# File lib/google-play/parser.rb, line 74 def parse_review_title(node) node.xpath(".//span[@class='review-title']").text end
parse_review_user(node)
click to toggle source
# File lib/google-play/parser.rb, line 48 def parse_review_user(node) a = node.xpath(".//span[@class='author-name']/a").first a.nil? ? '' : a.text end
parse_review_user_id(node)
click to toggle source
# File lib/google-play/parser.rb, line 53 def parse_review_user_id(node) a = node.xpath(".//span[@class='author-name']/a").first a.nil? ? nil : a['href'].match(/id=(\w+)/)[1].to_i end