class Birdwatcher::Modules::Statuses::Wordlist
Public Class Methods
info()
click to toggle source
# File lib/birdwatcher/modules/statuses/word_list.rb, line 79 def self.info <<-INFO The Word List module can generate a simple word list or dictionary from words used in statuses across all or specific users. Since users Tweet about their hobbies, interests, work, etc. generating a word list from statuses can be very effective for password cracking. INFO end
Public Instance Methods
run()
click to toggle source
# File lib/birdwatcher/modules/statuses/word_list.rb, line 89 def run if option_setting("USERS") screen_names = option_setting("USERS").split(" ").map(&:strip) user_ids = current_workspace.users_dataset.where("screen_name IN ?", screen_names).map(&:id) statuses = current_workspace.statuses_dataset.where("user_id IN ?", user_ids) else statuses = current_workspace.statuses_dataset end if statuses.count.zero? error("There are no statuses to process") return false end word_list = make_word_list( :min_word_count => option_setting("MIN_WORD_COUNT"), :min_word_length => option_setting("MIN_WORD_LENGTH"), :exclude_words => option_setting("EXCLUDE_WORDS").to_s.split(" ").map(&:strip), :exclude_stopwords => option_setting("EXCLUDE_STOPWORDS"), :exclude_common_words => option_setting("EXCLUDE_COMMON"), :exclude_hashtags => option_setting("EXCLUDE_HASHTAGS"), :exclude_mentions => option_setting("EXCLUDE_MENTIONS"), :word_cap => option_setting("WORD_CAP"), :stopwords_file => File.join(DATA_DIRECTORY, "english_stopwords.txt"), :common_words_file => File.join(DATA_DIRECTORY, "top100Kenglishwords.txt") ) task("Processing #{statuses.count.to_s.bold} statuses...") do statuses.each do |status| word_list.add_to_corpus(status.text) if option_setting("INCLUDE_PAGE_TITLES") status.urls_dataset .where("title IS NOT NULL") .where("final_url NOT LIKE 'https://twitter.com/%'") .map(&:title).each do |page_title| word_list.add_to_corpus(page_title) end end end word_list.process end task("Writing #{pluralize(word_list.word_list.length, 'word', 'words')} to file...") do File.open(option_setting("DEST"), "w") do |f| word_list.word_list.each do |word_and_count| word, count = word_and_count if option_setting("INCLUDE_COUNT") f.puts("#{word}, #{count}") else f.puts(word) end end end end file_size = number_to_human_size(File.size(option_setting("DEST"))) info("Wrote #{file_size.bold} to #{option_setting('DEST').bold}") end