class Empyrean::TweetParser

Public Class Methods

merge_parsed(parsed) click to toggle source

Merges an array which contains dicts returned by self.parse() Increases all counters.

# File lib/empyrean/tweetparser.rb, line 182
def merge_parsed(parsed)
  retdict = {
    mentions: {},
    hashtags: {},
    clients: {},
    smileys: {},
    times_of_day: [0] * 24,
    tweet_count: 0,
    retweet_count: 0,
    selftweet_count: 0,
  }
  parsed.each do |elem|
    retdict[:tweet_count] += elem[:tweet_count]
    retdict[:retweet_count] += elem[:retweet_count]
    retdict[:selftweet_count] += elem[:selftweet_count]

    elem[:mentions].each do |user, data|
      retdict[:mentions][user] ||= { count: 0 }
      retdict[:mentions][user][:count] += data[:count]
      retdict[:mentions][user][:name] = data[:name]
      retdict[:mentions][user][:examples] ||= []
      retdict[:mentions][user][:examples] += data[:examples]
    end

    elem[:hashtags].each do |hashtag, data|
      retdict[:hashtags][hashtag] ||= { count: 0 }
      retdict[:hashtags][hashtag][:count] += data[:count]
      retdict[:hashtags][hashtag][:hashtag] = data[:hashtag]
      retdict[:hashtags][hashtag][:examples] ||= []
      retdict[:hashtags][hashtag][:examples] += data[:examples]
    end

    elem[:smileys].each do |smile, data|
      retdict[:smileys][smile] ||= { count: 0 }
      retdict[:smileys][smile][:frown] ||= data[:frown]
      retdict[:smileys][smile][:count] += data[:count]
      retdict[:smileys][smile][:smiley] ||= data[:smiley]
      retdict[:smileys][smile][:examples] ||= []
      retdict[:smileys][smile][:examples] += data[:examples]
    end

    elem[:clients].each do |client, data|
      retdict[:clients][client] ||= { count: 0 }
      retdict[:clients][client][:count] += data[:count]
      retdict[:clients][client][:name] = data[:name]
      retdict[:clients][client][:url] = data[:url]
    end

    elem[:times_of_day].each_with_index do |count, index|
      retdict[:times_of_day][index] += elem[:times_of_day][index]
    end
  end

  # take only one example
  retdict[:mentions].each do |user, data|
    retdict[:mentions][user][:example] = retdict[:mentions][user][:examples].sample
    retdict[:mentions][user].delete(:examples)
  end
  retdict[:hashtags].each do |hashtag, data|
    retdict[:hashtags][hashtag][:example] = retdict[:hashtags][hashtag][:examples].sample
    retdict[:hashtags][hashtag].delete(:examples)
  end
  retdict[:smileys].each do |smile, data|
    retdict[:smileys][smile][:example] = retdict[:smileys][smile][:examples].sample
    retdict[:smileys][smile].delete(:examples)
  end

  retdict[:mentions] = retdict[:mentions].sort_by { |k, v| v[:count] }.reverse
  retdict[:hashtags] = retdict[:hashtags].sort_by { |k, v| v[:count] }.reverse
  retdict[:clients]  = retdict[:clients].sort_by  { |k, v| v[:count] }.reverse
  retdict[:smileys]  = retdict[:smileys].sort_by  { |k, v| v[:count] }.reverse

  retdict
end
new(options, config) click to toggle source
# File lib/empyrean/tweetparser.rb, line 24
def initialize(options, config)
  @options = options
  @config = config
end

Public Instance Methods

parse(tweets) click to toggle source

Parses an array of tweets

Returns a dict of things

# File lib/empyrean/tweetparser.rb, line 32
def parse(tweets)
  retdict = {
    mentions: {},
    hashtags: {},
    clients: {},
    smileys: {},
    times_of_day: [0] * 24,
    tweet_count: 0,
    retweet_count: 0,
    selftweet_count: 0,
  }
  tweets.each do |tweet|
    parsed_tweet = self.parse_one tweet

    if parsed_tweet[:retweet]  # the tweet was a retweet
      # increase retweeted tweets count
      retdict[:retweet_count] += 1
    else
      parsed_tweet[:mentions].each do |user, data|  # add mentions to the mentions dict
        retdict[:mentions][user] ||= { count: 0 }
        retdict[:mentions][user][:count] += data[:count]
        retdict[:mentions][user][:name] ||= data[:name]
        retdict[:mentions][user][:examples] ||= []
        retdict[:mentions][user][:examples] << data[:example]
      end
      parsed_tweet[:hashtags].each do |hashtag, data|  # add hashtags to the hashtags dict
        retdict[:hashtags][hashtag] ||= { count: 0 }
        retdict[:hashtags][hashtag][:count] += data[:count]
        retdict[:hashtags][hashtag][:hashtag] ||= data[:hashtag]
        retdict[:hashtags][hashtag][:examples] ||= []
        retdict[:hashtags][hashtag][:examples] << data[:example]
      end

      parsed_tweet[:smileys].each do |smile, data|
        retdict[:smileys][smile] ||= { count: 0 }
        retdict[:smileys][smile][:frown] ||= data[:frown]
        retdict[:smileys][smile][:count] += data[:count]
        retdict[:smileys][smile][:smiley] ||= data[:smiley]
        retdict[:smileys][smile][:examples] ||= []
        retdict[:smileys][smile][:examples] << data[:example]
      end

      # increase self tweeted tweets count
      retdict[:selftweet_count] += 1
    end

    # add client to the clients dict
    client_dict = parsed_tweet[:client][:name]
    retdict[:clients][client_dict] ||= { count: 0 }
    retdict[:clients][client_dict][:count] += 1
    retdict[:clients][client_dict][:name] = parsed_tweet[:client][:name]
    retdict[:clients][client_dict][:url] = parsed_tweet[:client][:url]

    retdict[:times_of_day][parsed_tweet[:time_of_day]] += 1

    # increase tweet count
    retdict[:tweet_count] += 1
  end

  retdict
end
parse_one(tweet) click to toggle source

Parses a single tweet object

Returns a dict of things.

# File lib/empyrean/tweetparser.rb, line 97
def parse_one(tweet)
  puts "==> #{tweet['id']}" if @options.verbose
  retdict = {
    mentions: {},
    hashtags: {},
    time_of_day: 0,
    retweet: false,
    client: {
      name: "",
      url: "",
    },
    smileys: {}
  }

  # check if the tweet is actually a retweet and ignore the status text
  unless tweet['retweeted_status'].nil?
    retdict[:retweet] = true
  else
    # scan for mentions
    tweet['text'].scan USERNAME_REGEX do |user|
      hash_user = user[0].downcase
      puts "===> mentioned: #{user[0]}" if @options.verbose
      unless @config[:ignored_users].include? hash_user
        if @config[:renamed_users].include? hash_user.to_sym
          hash_user = @config[:renamed_users][hash_user.to_sym]
        end
        retdict[:mentions][hash_user] ||= {}
        retdict[:mentions][hash_user][:name] ||= user[0]
        retdict[:mentions][hash_user][:count] = retdict[:mentions][hash_user][:count].to_i.succ
        retdict[:mentions][hash_user][:example] ||= { text: tweet['text'], id: tweet['id'] }
      end
    end

    # scan for hashtags
    tweet['text'].scan HASHTAG_REGEX do |hashtag|
      hash_hashtag = hashtag[0].downcase
      puts "===> hashtag: ##{hashtag[0]}" if @options.verbose
      retdict[:hashtags][hash_hashtag] ||= {}
      retdict[:hashtags][hash_hashtag][:hashtag] ||= hashtag[0]
      retdict[:hashtags][hash_hashtag][:count] = retdict[:hashtags][hash_hashtag][:count].to_i.succ
      retdict[:hashtags][hash_hashtag][:example] ||= { text: tweet['text'], id: tweet['id'] }
    end

    # Smileys :^)
    eyes = "[xX8;:=%]"
    nose = "[-oc*^]"
    smile_regex = /(>?#{eyes}'?#{nose}[\)pPD\}\]>]|[\(\{\[<]#{nose}'?#{eyes}<?|[;:][\)pPD\}\]\>]|\([;:]|\^[_o-]*\^[';]|\\[o.]\/)/
    frown_regex = /(#{eyes}'?#{nose}[\(\[\\\/\{|]|[\)\]\\\/\}|]#{nose}'?#{eyes}|[;:][\(\/]|[\)D]:|;_+;|T_+T|-[._]+-)/

    unescaped_tweet = tweet['text'].gsub("&amp;", "&").gsub("&lt;", "<").gsub("&gt;", ">")

    unescaped_tweet.scan smile_regex do |smile|
      smile = smile[0]
      puts "===> smile: #{smile}" if @options.verbose
      retdict[:smileys][smile] ||= {frown: false}
      retdict[:smileys][smile][:smiley] ||= smile
      retdict[:smileys][smile][:count] = retdict[:smileys][smile][:count].to_i.succ
      retdict[:smileys][smile][:example] ||= { text: tweet['text'], id: tweet['id'] }
    end

    unescaped_tweet.scan frown_regex do |frown|
      break unless unescaped_tweet !~ /\w+:\/\// # http:// :^)
      frown = frown[0]
      puts "===> frown: #{frown}" if @options.verbose
      retdict[:smileys][frown] ||= {frown: true}
      retdict[:smileys][frown][:smiley] ||= frown
      retdict[:smileys][frown][:count] = retdict[:smileys][frown][:count].to_i.succ
      retdict[:smileys][frown][:example] ||= { text: tweet['text'], id: tweet['id'] }
    end
  end

  # Tweet source (aka. the client the (re)tweet was made with)
  source_matches = tweet['source'].match SOURCE_REGEX
  retdict[:client][:url]  = source_matches[1]
  retdict[:client][:name] = source_matches[2]

  # Time of day
  retdict[:time_of_day] = (tweet['created_at'].match(/^\d{4}-\d{2}-\d{2} (\d{2})/)[1].to_i + @config[:timezone_difference]) % 24

  retdict
end