class RubberbandFlamethrower::DataGenerator
Constants
- WORD_FILES
the
WORD_FILES
constant is an array of included word files which will be used to create the pool of random words used for data generation. You can uncomment or comment particular files to change the size of the pool of words. Please see the README file in the words folder for more information about the lists.
Attributes
Public Class Methods
Will initialize the word_list
variable with an array of all the words contained in the WORD_FILES
array
# File lib/rubberband_flamethrower/data_generator.rb, line 31 def initialize self.word_list = [] WORD_FILES.each do |word_file| contents = File.read(File.dirname(__FILE__)+word_file) self.word_list = word_list + contents.split(/\n/) end end
Public Instance Methods
create an Elastic Search friendly timestamp for right now @return [String]
# File lib/rubberband_flamethrower/data_generator.rb, line 59 def current_timestamp Time.now.strftime "%Y%m%dT%H:%M:%S" end
# File lib/rubberband_flamethrower/data_generator.rb, line 71 def generate_dataset(batch_size, filename, num_words=nil) File.open(filename, 'w') do |file| batch_size.to_i.times do |i| file.write(generate_random_insert_data(num_words)+"\n") end end end
generate a JSON object that contains a message, username, and post_date intended to be passed as insert data to an elastic search server @return [JSON]
# File lib/rubberband_flamethrower/data_generator.rb, line 66 def generate_random_insert_data(num_words) (num_words = 6 + rand(10)) if num_words.nil? || num_words.empty? {message: "#{random_message(num_words.to_i)}", username: "#{random_username}", post_date: "#{current_timestamp}"}.to_json end
# File lib/rubberband_flamethrower/data_generator.rb, line 46 def random_message(num_words) num_words.times.map{word_list.sample}.join(" ") end
create a message from between 6 and 16 random words that maxes at 140 characters and ends with a period @return [String]
# File lib/rubberband_flamethrower/data_generator.rb, line 41 def random_tweet number_of_words = 6 + rand(10) ((number_of_words.times.map{word_list.sample}.join(" "))[0,139])+"." end
create a random value to be used as a username the return value is one random word, only letters and numbers allowed @return [String]
# File lib/rubberband_flamethrower/data_generator.rb, line 53 def random_username word_list.sample.gsub(/[^0-9a-z]/i, '') end