class TwitterParser
Public Class Methods
new(tweet)
click to toggle source
# File lib/twitter_parser.rb, line 7 def initialize(tweet) @tweet = Nokogiri::HTML.parse(tweet) end
Public Instance Methods
get_conversation_id()
click to toggle source
# File lib/twitter_parser.rb, line 95 def get_conversation_id @tweet.css(".tweet")[0]["data-conversation-id"] end
get_favorite_count()
click to toggle source
# File lib/twitter_parser.rb, line 91 def get_favorite_count @tweet.css(".ProfileTweet-action--favorite")[0].css("span")[0]['data-tweet-stat-count'] end
get_fullname()
click to toggle source
# File lib/twitter_parser.rb, line 61 def get_fullname @tweet.css(".fullname").text end
get_is_reply_to()
click to toggle source
# File lib/twitter_parser.rb, line 99 def get_is_reply_to @tweet.css(".tweet")[0]["data-is-reply-to"] end
get_mentioned_urls()
click to toggle source
Get URLS in the tweet
# File lib/twitter_parser.rb, line 46 def get_mentioned_urls tweet = get_tweet_text return extract_urls(tweet) end
get_mentions()
click to toggle source
Get account names and uids that are mentioned
# File lib/twitter_parser.rb, line 120 def get_mentions mentions = @tweet.css(".twitter-atreply") if !mentions.empty? mention_names = mentions.map{|t| t.css("b").text} mention_uids = mentions.map{|t| t['data-mentioned-user-id']} return mention_names, mention_uids else return nil, nil end end
get_profile_pic()
click to toggle source
Get URL to the profile pic
# File lib/twitter_parser.rb, line 41 def get_profile_pic @tweet.css("img.avatar")[0]['src'] end
get_reply_count()
click to toggle source
# File lib/twitter_parser.rb, line 103 def get_reply_count @tweet.css(".ProfileTweet-action--reply")[0].css("span")[0]['data-tweet-stat-count'] end
get_reply_to_user()
click to toggle source
The user of the tweet that is being replied to (if any)
# File lib/twitter_parser.rb, line 108 def get_reply_to_user reply_to = @tweet.css("span").select{|s| s.text.include?("In reply")}[0] if reply_to reply_to_user = reply_to.css("a")[0]['href'].gsub("/", "") reply_to_uid = reply_to.css("a")[0]['data-user-id'] return reply_to_user, reply_to_uid else return nil, nil end end
get_retweet_count()
click to toggle source
# File lib/twitter_parser.rb, line 87 def get_retweet_count @tweet.css(".ProfileTweet-action--retweet")[0].css("span")[0]['data-tweet-stat-count'] end
get_tweet_id()
click to toggle source
# File lib/twitter_parser.rb, line 79 def get_tweet_id @tweet.css(".tweet")[0]["data-tweet-id"] end
get_tweet_link()
click to toggle source
# File lib/twitter_parser.rb, line 83 def get_tweet_link "https://twitter.com"+@tweet.css(".tweet")[0]["data-permalink-path"] end
get_tweet_text()
click to toggle source
Get the tweet text
# File lib/twitter_parser.rb, line 70 def get_tweet_text @tweet.css(".js-tweet-text-container").text.lstrip.strip end
get_tweet_time()
click to toggle source
Get the time of the tweet
# File lib/twitter_parser.rb, line 75 def get_tweet_time DateTime.parse(@tweet.css(".tweet-timestamp")[0]["title"]).strftime('%d %b %Y %H:%M:%S') end
get_user_id()
click to toggle source
# File lib/twitter_parser.rb, line 65 def get_user_id @tweet.css(".js-user-profile-link").css(".account-group")[0]["data-user-id"] end
get_username()
click to toggle source
# File lib/twitter_parser.rb, line 57 def get_username @tweet.css(".tweet")[0]["data-screen-name"] end
parse_tweet()
click to toggle source
Parse the individual tweet
# File lib/twitter_parser.rb, line 12 def parse_tweet if !@tweet.text.empty? return { tweet_text: get_tweet_text, username: get_username, fullname: get_fullname, user_id: get_user_id, profile_pic: get_profile_pic, hashtags: get_hashtags, mentioned_urls: get_mentioned_urls, conversation_id: get_conversation_id, is_reply_to: get_is_reply_to, reply_to_user: get_reply_to_user[0], reply_to_uid: get_reply_to_user[1], tweet_id: get_tweet_id, tweet_time: get_tweet_time, tweet_link: get_tweet_link, retweet_count: get_retweet_count, favorite_count: get_favorite_count, reply_count: get_reply_count, mention_names: get_mentions[0], mention_uids: get_mentions[1], time_collected: Time.now, date_searchable: get_tweet_time } end end