class Embulk::Input::TwitterarchiveInputPlugin

Public Class Methods

new(task, schema, index, page_builder) click to toggle source
Calls superclass method
# File lib/embulk/input/twitterarchive.rb, line 43
def initialize(task, schema, index, page_builder)
  super
  @file = task['files'][index]
  @directory = task['directory']
end
resume(task, columns, count) { |task, columns, count| ... } click to toggle source
# File lib/embulk/input/twitterarchive.rb, line 34
def self.resume(task, columns, count, &control)
  puts "Twitter Archive input started."
  commit_reports = yield(task, columns, count)
  puts "Twitter Archive input finished. Commit reports = #{commit_reports.to_json}"

  next_config_diff = {}
  return next_config_diff
end
transaction(config, &control) click to toggle source
# File lib/embulk/input/twitterarchive.rb, line 11
def self.transaction(config, &control)
  directory = config.param('directory', :string, default: nil)
  index_file = File.read(File.join(directory, 'data/js/tweet_index.js'))
  files = []
  JSON.parse(index_file.gsub(/var tweet_index =  /, '')).each do |file_meta|
    files.push(file_meta['file_name'])
  end
  task = {
    'files' => files,
    'directory' => directory
  }

  columns = [
    Column.new(0, 'id', :long),
    Column.new(1, 'text', :string),
    Column.new(2, 'source', :string),
    Column.new(3, 'in_reply_to_status_id', :long),
    Column.new(4, 'created_at', :timestamp),
  ]

  resume(task, columns, files.length, &control)
end

Public Instance Methods

run() click to toggle source
# File lib/embulk/input/twitterarchive.rb, line 49
def run
  puts "Twitter Archive input thread #{@index}..."

  tweet_file = File.read(File.join(@directory, @file))
  JSON.parse(tweet_file.gsub(/Grailbird\.data\.tweets_[0-9]+_[0-9]+ = /, '')).each do |tweet|
    @page_builder.add([tweet['id'], tweet['text'], tweet['source'], tweet['in_reply_to_status_id'], Time.parse(tweet['created_at'])])
  end
  @page_builder.finish  # don't forget to call finish :-)

  commit_report = {}
  return commit_report
end