class TwitterStreamingApi::FetchTweets
Public Class Methods
new(time: 300, word_counts: @@WORDS_COUNT)
click to toggle source
# File lib/twitter_streaming_api.rb, line 12 def initialize(time: 300, word_counts: @@WORDS_COUNT) # build option hash to load the config file and time to fetch the tweets # default time is 5 minutes options = {:config_file => 'config.yml', :time => time} # Load in Twitter connection info from config.yaml file. puts "Loading config from #{options[:config_file]} and running for #{options[:time]} seconds" yaml_config = YAML.load_file(options[:config_file]) # establish connection with twitter end point client = Twitter::Streaming::Client.new do |config| config.consumer_key = yaml_config['consumer_key'] config.consumer_secret = yaml_config['consumer_secret'] config.access_token = yaml_config['access_token'] config.access_token_secret = yaml_config['access_token_secret'] end # pick the ignore words from config yml as we will be excluding it from result list ignore_list = yaml_config['ignore_words'].downcase.split(',') # List when we're starting & ending collection of Tweets start_time = Time.now end_time = start_time + Integer(options[:time]) puts "Analysing Twitter tweets starting at #{start_time} ending at #{end_time}" tweets = 0 # Sample Tweets from Twitter stream until time is up. progress_show_at = Time.now client.sample do |object| current_time = Time.now break if current_time >= end_time if progress_show_at.sec != current_time.sec print '*' progress_show_at = current_time end # ignoring non-English tweets # Unicode could still have some weird stuff show up like 💦 so ignoring them as well if object.is_a?(Twitter::Tweet) && object.lang == 'en' tweets += 1 words = object.text.split(' ') words.each do |word| # making case insensitive so that it will be easier to compare the values word.downcase! unless word_counts.has_key?(word) next if ignore_list.include?(word) word_counts[word] = 0 end word_counts[word] += 1 end end # assigning word count to class variable so that if instance reinitialize then we will have # previous count value @@WORDS_COUNT = word_counts end # displaying result at console puts "\nTop 10 words by count in #{tweets} Tweets: #{word_counts.sort_by{|word, count| count}.reverse.take(10)}" end