class Birdwatcher::Modules::Statuses::WordCloud

Public Class Methods

info() click to toggle source
# File lib/birdwatcher/modules/statuses/word_cloud.rb, line 98
        def self.info
<<-INFO
The Word Cloud module can generate a classic weighted word cloud from words used
in statuses across all or specific users and between different times.

The module is heavily configurable; have a look at the options with #{'show options'.bold}

Please note that configuring the module with a long timespan might result in a
very long execution time when the word cloud image is generated.

The generated image will be in PNG format.
INFO
        end

Public Instance Methods

run() click to toggle source
# File lib/birdwatcher/modules/statuses/word_cloud.rb, line 112
def run
  if option_setting("USERS")
    user_ids = current_workspace.users_dataset.where("screen_name IN ?", option_setting("USERS").split(" ").map(&:strip)).map(&:id)
    statuses = current_workspace.statuses_dataset.where("user_id IN ?", user_ids)
  else
    statuses = current_workspace.statuses_dataset
  end
  if option_setting("SINCE")
    since = parse_time(option_setting("SINCE")).strftime("%Y-%m-%d")
  else
    since = (Date.today - 7).strftime("%Y-%m-%d")
  end
  if option_setting("BEFORE")
    before = parse_time(option_setting("BEFORE")).strftime("%Y-%m-%d")
  else
    before = Time.now.strftime("%Y-%m-%d")
  end
  statuses = statuses.where("DATE(posted_at) >= DATE(?) AND DATE(posted_at) <= DATE(?)", since, before).all
  if statuses.count.zero?
    error("There are no statuses to process")
    return false
  end
  word_list = make_word_list(
    :min_word_count       => option_setting("MIN_WORD_COUNT"),
    :min_word_length      => option_setting("MIN_WORD_LENGTH"),
    :exclude_words        => option_setting("EXCLUDE_WORDS").to_s.split(" ").map(&:strip),
    :exclude_stopwords    => option_setting("EXCLUDE_STOPWORDS"),
    :exclude_common_words => option_setting("EXCLUDE_COMMON"),
    :exclude_hashtags     => option_setting("EXCLUDE_HASHTAGS"),
    :exclude_mentions     => option_setting("EXCLUDE_MENTIONS"),
    :word_cap             => option_setting("WORD_CAP"),
    :stopwords_file       => File.join(DATA_DIRECTORY, "english_stopwords.txt"),
    :common_words_file    => File.join(DATA_DIRECTORY, "top100Kenglishwords.txt")
  )
  task("Processing #{statuses.count.to_s.bold} statuses...") do
    statuses.each do |status|
      word_list.add_to_corpus(status.text)
      if option_setting("INCLUDE_PAGE_TITLES")
        status.urls_dataset
          .where("title IS NOT NULL")
          .where("final_url NOT LIKE 'https://twitter.com/%'")
          .map(&:title).each do |page_title|
          word_list.add_to_corpus(page_title)
        end
      end
    end
    word_list.process
  end
  task("Generating word cloud, patience please...") do
    cloud = MagicCloud::Cloud.new(word_list.word_list,
      :rotate  => :none,
      :palette => option_setting("PALETTE").split(" ").map(&:strip)
    ).draw(option_setting("IMAGE_WIDTH").to_i, option_setting("IMAGE_HEIGHT").to_i).to_blob { self.format = "png" }
    File.open(option_setting("DEST"), "wb") { |f| f.write(cloud) }
  end
  info("Word cloud written to #{option_setting('DEST').bold}")
end