class Embulk::Guess::FluentdOutFileGuessPlugin
Constants
- DELIMITER_CANDIDATES
Public Instance Methods
guess_lines(config, sample_lines)
click to toggle source
# File lib/embulk/guess/fluentd_out_file.rb, line 12 def guess_lines(config, sample_lines) return {} unless config.fetch("parser", {}).fetch("type", "fluentd_out_file") == "fluentd_out_file" parser_config = config["parser"] || {} # guess delimiter if parser_config["type"] == "fluentd_out_file" && parser_config["delimiter"] delim = parser_config["delimiter"] else delim = guess_delimiter(sample_lines) unless delim # not fluentd_out_file file return {} end end parser_guessed = DataSource.new.merge(parser_config).merge({"type" => "fluentd_out_file", "delimiter" => delim}) # guess schema sample_records = sample_lines.map {|line| line.split(delim)} column_types = SchemaGuess.types_from_array_records(sample_records || []) if column_types.size > 3 # not fluentd_out_file file return {} end schema = [] column_types.each do |type| if type.is_a?(SchemaGuess::TimestampTypeMatch) schema << {"name" => "time", "type" => type, "format" => type.format} elsif type == "string" schema << {"name" => "tag", "type" => type} elsif type == "json" schema << {"name" => "record", "type" => type} else # not fluentd_out_file file return {} end end parser_guessed["columns"] = schema return {"parser" => parser_guessed} end