class Embulk::Guess::FluentdOutFileGuessPlugin

Constants

DELIMITER_CANDIDATES

Public Instance Methods

guess_lines(config, sample_lines) click to toggle source
# File lib/embulk/guess/fluentd_out_file.rb, line 12
def guess_lines(config, sample_lines)
  return {} unless config.fetch("parser", {}).fetch("type", "fluentd_out_file") == "fluentd_out_file"

  parser_config = config["parser"] || {}

  # guess delimiter
  if parser_config["type"] == "fluentd_out_file" && parser_config["delimiter"]
    delim = parser_config["delimiter"]
  else
    delim = guess_delimiter(sample_lines)
    unless delim
      # not fluentd_out_file file
      return {}
    end
  end

  parser_guessed = DataSource.new.merge(parser_config).merge({"type" => "fluentd_out_file", "delimiter" => delim})

  # guess schema
  sample_records = sample_lines.map {|line| line.split(delim)}
  column_types = SchemaGuess.types_from_array_records(sample_records || [])
  if column_types.size > 3
    # not fluentd_out_file file
    return {}
  end
  schema = []
  column_types.each do |type|
    if type.is_a?(SchemaGuess::TimestampTypeMatch)
      schema << {"name" => "time", "type" => type, "format" => type.format}
    elsif type == "string"
      schema << {"name" => "tag", "type" => type}
    elsif type == "json"
      schema << {"name" => "record", "type" => type}
    else
      # not fluentd_out_file file
      return {}
    end
  end
  parser_guessed["columns"] = schema

  return {"parser" => parser_guessed}
end