class TwitterParser

Public Class Methods

new(tweet) click to toggle source
# File lib/twitter_parser.rb, line 7
def initialize(tweet)
  @tweet = Nokogiri::HTML.parse(tweet)
end

Public Instance Methods

get_conversation_id() click to toggle source
# File lib/twitter_parser.rb, line 95
def get_conversation_id
      @tweet.css(".tweet")[0]["data-conversation-id"]
end
get_favorite_count() click to toggle source
# File lib/twitter_parser.rb, line 91
def get_favorite_count
  @tweet.css(".ProfileTweet-action--favorite")[0].css("span")[0]['data-tweet-stat-count']
end
get_fullname() click to toggle source
# File lib/twitter_parser.rb, line 61
def get_fullname
  @tweet.css(".fullname").text
end
get_hashtags() click to toggle source

Get hashtags in the tweet

# File lib/twitter_parser.rb, line 52
def get_hashtags
  tweet = get_tweet_text
  return extract_hashtags(tweet)
end
get_is_reply_to() click to toggle source
# File lib/twitter_parser.rb, line 99
def get_is_reply_to
  @tweet.css(".tweet")[0]["data-is-reply-to"]
end
get_mentioned_urls() click to toggle source

Get URLS in the tweet

# File lib/twitter_parser.rb, line 46
def get_mentioned_urls
  tweet = get_tweet_text
  return extract_urls(tweet)
end
get_mentions() click to toggle source

Get account names and uids that are mentioned

# File lib/twitter_parser.rb, line 120
def get_mentions
  mentions = @tweet.css(".twitter-atreply")
  if !mentions.empty?
    mention_names = mentions.map{|t| t.css("b").text}
    mention_uids = mentions.map{|t| t['data-mentioned-user-id']}
    return mention_names, mention_uids
  else
    return nil, nil
  end
end
get_profile_pic() click to toggle source

Get URL to the profile pic

# File lib/twitter_parser.rb, line 41
def get_profile_pic
  @tweet.css("img.avatar")[0]['src']
end
get_reply_count() click to toggle source
# File lib/twitter_parser.rb, line 103
def get_reply_count
  @tweet.css(".ProfileTweet-action--reply")[0].css("span")[0]['data-tweet-stat-count']
end
get_reply_to_user() click to toggle source

The user of the tweet that is being replied to (if any)

# File lib/twitter_parser.rb, line 108
def get_reply_to_user
  reply_to = @tweet.css("span").select{|s| s.text.include?("In reply")}[0]
  if reply_to
    reply_to_user = reply_to.css("a")[0]['href'].gsub("/", "")
    reply_to_uid = reply_to.css("a")[0]['data-user-id']
    return reply_to_user, reply_to_uid
  else
    return nil, nil
  end
end
get_retweet_count() click to toggle source
# File lib/twitter_parser.rb, line 87
def get_retweet_count
  @tweet.css(".ProfileTweet-action--retweet")[0].css("span")[0]['data-tweet-stat-count']
end
get_tweet_id() click to toggle source
# File lib/twitter_parser.rb, line 79
def get_tweet_id
  @tweet.css(".tweet")[0]["data-tweet-id"]
end
get_tweet_text() click to toggle source

Get the tweet text

# File lib/twitter_parser.rb, line 70
def get_tweet_text
  @tweet.css(".js-tweet-text-container").text.lstrip.strip
end
get_tweet_time() click to toggle source

Get the time of the tweet

# File lib/twitter_parser.rb, line 75
def get_tweet_time
  DateTime.parse(@tweet.css(".tweet-timestamp")[0]["title"]).strftime('%d %b %Y %H:%M:%S')
end
get_user_id() click to toggle source
# File lib/twitter_parser.rb, line 65
def get_user_id
  @tweet.css(".js-user-profile-link").css(".account-group")[0]["data-user-id"]
end
get_username() click to toggle source
# File lib/twitter_parser.rb, line 57
def get_username
  @tweet.css(".tweet")[0]["data-screen-name"]
end
parse_tweet() click to toggle source

Parse the individual tweet

# File lib/twitter_parser.rb, line 12
def parse_tweet
  if !@tweet.text.empty?
    return {
      tweet_text: get_tweet_text,
      username: get_username,
      fullname: get_fullname,
      user_id: get_user_id,
      profile_pic: get_profile_pic,
      hashtags: get_hashtags,
      mentioned_urls: get_mentioned_urls,
      conversation_id: get_conversation_id,
      is_reply_to: get_is_reply_to,
      reply_to_user: get_reply_to_user[0],
      reply_to_uid: get_reply_to_user[1],
      tweet_id: get_tweet_id,
      tweet_time: get_tweet_time,
      tweet_link: get_tweet_link,
      retweet_count: get_retweet_count,
      favorite_count: get_favorite_count,
      reply_count: get_reply_count,
      mention_names: get_mentions[0],
      mention_uids: get_mentions[1],
      time_collected: Time.now,
      date_searchable: get_tweet_time
    }
  end
end