class Embulk::Guess::Regex

TODO implement guess plugin to make this command work:

$ embulk guess -g "regex" partial-config.yml

Depending on the file format the plugin uses, you can use choose
one of binary guess (GuessPlugin), text guess (TextGuessPlugin),
or line guess (LineGuessPlugin).

Public Instance Methods

apache_combined(config, sample_lines) click to toggle source
# File lib/embulk/guess/regex.rb, line 40
def apache_combined(config, sample_lines)
  apache_common(config, sample_lines)
    .string(:referer).string(:user_agent, regex_name: 'userAgent')
end
apache_combinedio(config, sample_lines) click to toggle source
# File lib/embulk/guess/regex.rb, line 45
def apache_combinedio(config, sample_lines)
  apache_combined(config, sample_lines)
    .integer(:in_byte, regex_name: 'inByte').integer(:out_byte, regex_name: 'outByte')
end
apache_common(config, sample_lines) click to toggle source
# File lib/embulk/guess/regex.rb, line 32
def apache_common(config, sample_lines)
  RegexApacheLogGuesser.new
    .ip(:remote_host, regex_name: 'remoteHost').token(:identity).token(:user)
    .kakko(:datetime, format: '%d/%b/%Y:%H:%M:%S %z', type: 'timestamp')
    .method_path_protocol
    .integer(:status).integer_or_minus(:size)
end
apache_x_forwarded_for() click to toggle source
# File lib/embulk/guess/regex.rb, line 27
def apache_x_forwarded_for
  RegexApacheLogGuesser.new
    .ip_or_minus(:x_forwarded_for, regex_name: 'forwardedFor')
end
guess_lines(config, sample_lines) click to toggle source
# File lib/embulk/guess/regex.rb, line 14
def guess_lines(config, sample_lines)
  guesser_list = []
  guesser_list << apache_common(config, sample_lines)
  guesser_list << apache_combined(config, sample_lines)
  guesser_list << apache_combinedio(config, sample_lines)
  guesser_list << apache_x_forwarded_for + apache_combined(config, sample_lines)
  guesser_list << apache_x_forwarded_for + apache_combinedio(config, sample_lines)
  guesser_list.each do |g|
    return {"parser" => g.guessed} if g.match_all?(sample_lines)
  end
  return {}
end