class FacebookDumper::FacebookFriendsDumper
Public Class Methods
new()
click to toggle source
# File lib/facebook_dumper.rb, line 41 def initialize @friends = [] end
Public Instance Methods
extract_from_file(file)
click to toggle source
# File lib/facebook_dumper.rb, line 183 def extract_from_file(file) charset = nil html = "" open(file) {|f| html = f.read } #open("t1.html", "w") {|f| f.print html } doc = Nokogiri::HTML.parse(html, nil, charset) #open("t2.html", "w") {|f| f.print doc.to_s } doc.search(:meta).map &:remove doc.search(:link).map &:remove doc.search(:style).map &:remove doc.search(:script).map &:remove doc.search(:svg).map &:remove doc.search(:div).each {|node| node.remove_attribute("class") } doc.search(:span).each {|node| node.remove_attribute("class") } doc.search(:i).each {|node| node.remove_attribute("class") } doc.search(:a).each {|node| node.remove_attribute("class") } doc.search(:label).each {|node| node.remove_attribute("class") } doc.search(:img).each {|node| node.remove_attribute("class") } doc.search(:input).each {|node| node.remove_attribute("class") } doc.search(:ul).each {|node| node.remove_attribute("class") } doc.search(:li).each {|node| node.remove_attribute("class") } #open("t3.html", "w") {|f| f.print doc.to_s } #$f = open("t4.html", "w") num = 0 @friends = [] doc.xpath('//div[@aria-label="友達"]').each {|node| gpa = node.parent.parent # get grand parent person = parse_a_person(gpa) #$f.puts person.inspect #$f.puts "----------------------------------------------------------------------" @friends << person num += 1 } #$f.puts num # 4993 #$f.close return @friends end
friends_list()
click to toggle source
# File lib/facebook_dumper.rb, line 255 def friends_list out = "" @friends.sort.each {|p| out << p.inspect + "\n" } return out end
friends_list_take1()
click to toggle source
# File lib/facebook_dumper.rb, line 246 def friends_list_take1 ar << [p.url, p.name, p.num] if num < 0 out << "#{p.url} #{p.name} -1\n" else out << "#{p.url} #{p.name}\n" end end
parse_a_person(gpa)
click to toggle source
# File lib/facebook_dumper.rb, line 148 def parse_a_person(gpa) person = Person.new gpa.search(:img).each {|img| person.imgsrc = img.attribute('src').value pa = img.parent if pa.name != "a" gpa.search(:span).each {|span| text = span.inner_text unless text =~ /^友達$/ person.name = text person.url = "no_longer_on_Facebook" end } return person # return here, since the user is no longer on Facebook. end } gpa.search(:a).each {|a| if a.attribute('tabindex').value == "0" url = a.attribute('href').value text = a.inner_text if url =~ /friends_mutual/ if text =~ /共通の友達(.+)人/ person.friends_mutual = $1.gsub(",", "").to_i end else person.url = url person.name = text end #f.puts [person.url, person.name] end #f.puts a.to_html } return person end
run(argv)
click to toggle source
# File lib/facebook_dumper.rb, line 45 def run(argv) file = argv[0] p file extract_from_file file out = friends_list open("facebook-friends.txt", "w") {|f| f.print out} end
take1()
click to toggle source
# File lib/facebook_dumper.rb, line 53 def take1 doc.search(:img).each {|node| node.remove_attribute("alt") node.remove_attribute("role") } doc.search(:div).each {|node| node.remove_attribute("data-ft") node.remove_attribute("data-testid") node.remove_attribute("style") } doc.search(:noscript).map &:remove doc.search(:a).each {|node| =begin node.remove_attribute("data-gt") node.remove_attribute("ajaxify") node.remove_attribute("rel") node.remove_attribute("role") node.remove_attribute("data-hover") node.remove_attribute("data-tooltip-uri") node.remove_attribute("tabindex") node.remove_attribute("aria-hidden") node.remove_attribute("data-profileid") node.remove_attribute("data-flloc") node.remove_attribute("data-unref") node.remove_attribute("data-floc") =end node.remove_attribute("data-hovercard") node.remove_attribute("data-hovercard-prefer-more-content-show") node.remove_attribute("style") node.remove_attribute("aria-haspopup") } doc.search(:li).each {|node| node.remove_attribute("data-ft") node.remove_attribute("data-gt") node.remove_attribute("data-alert-id") } doc.search(:button).each {|node| node.remove_attribute("class") node.remove_attribute("data-flloc") node.remove_attribute("data-profileid") node.remove_attribute("type") node.remove_attribute("data-cancelref") node.remove_attribute("data-floc") } #open("t3.html", "w") {|f| f.print doc.to_s } open("t4.html", "w") {|f| f.print doc.to_xml(:indent => 2) } num = 0 @friends = [] doc.xpath('//li[@class="_698"]').each {|node| #<li class="_698"> node.search(:button).map &:remove node.search(:span).each {|n| n.remove_attribute("class") n.remove_attribute("aria-hidden") } person = Person.new node.search(:div).each {|n| if n.attribute('class').value == "uiProfileBlockContent" n.search(:a).each {|a| text = a.inner_text if text =~ /共通の友達(.+)人/ person.mutual_friends_num = $1.gsub(",", "").to_i elsif text =~ /友達(.+)人/ person.friends_num = $1.gsub(",", "").to_i elsif a.attribute('ajaxify') && a.attribute('ajaxify').value =~ /\/ajax\/friends\/inactive\/dialog\?id=(.+)/ #<a href="https://www.facebook.com/etocom/friends#" rel="dialog" ajaxify="/ajax/friends/inactive/dialog?id=100008321654013" role="button">干場 隆志 #ajaxify="/ajax/browser/dialog/mutual_friends/?uid=1036721103" uid = $1.to_i person.uid = uid person.url = "https://www.facebook.com/profile.php?id=#{uid}" person.name = a.inner_text.chomp person.friends_num = -1 elsif a.attribute('data-gt') person.name = text.chomp url = a.attribute('href').value url.gsub!("?fref=profile_friend_list&hc_location=friends_tab", "") url.gsub!("&fref=profile_friend_list&hc_location=friends_tab", "") url.gsub!("?fref=pb&hc_location=friends_tab", "") url.gsub!("&fref=pb&hc_location=friends_tab", "") person.url = url else # ignore end } #friends << [friend_url, friend_name, friend_num, friend_mutual] @friends << person end } num += 1 } #p num #4995 return @friends end