class Fluent::Plugin::XmlParser

Public Instance Methods

configure(conf) click to toggle source

This method is called after config_params have read configuration parameters

Calls superclass method
# File lib/fluent/plugin/parser_xml.rb, line 37
def configure(conf)
  super

  if conf['time_xpath'].nil?
    @time_xpath = nil
  else
    @time_xpath = json_parse(@time_xpath)
  end
  @time_parser = Fluent::TimeParser.new(@time_format)
  @attr_xpaths = json_parse(@attr_xpaths)
  @value_xpaths = json_parse(@value_xpaths)
  # TimeParser class is already given. It takes a single argument as the time format
  # to parse the time string with.
end
format_time(time) click to toggle source
# File lib/fluent/plugin/parser_xml.rb, line 106
def format_time(time)
  if @time_format.nil?
    Time.at(time).iso8601
  else
    Time.at(time).strftime(@time_format)
  end
end
json_parse(message) click to toggle source
# File lib/fluent/plugin/parser_xml.rb, line 114
def json_parse message
  begin
    y = Yajl::Parser.new
    y.parse(message)
  rescue
    $log.error "JSON parse error", :error => $!.to_s, :error_class => $!.class.to_s
    $log.warn_backtrace $!.backtrace
  end
end
parse(text) { |time, record| ... } click to toggle source

This is the main method. The input “text” is the unit of data to be parsed. If this is the in_tail plugin, it would be a line. If this is for in_syslog, it is a single syslog message.

# File lib/fluent/plugin/parser_xml.rb, line 55
def parse(text)
  begin
    doc = REXML::Document.new(text)
    $log.debug doc
    # parse time field
    if @time_xpath.nil?
      time = Fluent::Engine.now
    else
      time = @time_parser.parse(doc.elements[@time_xpath[0]].method(@time_xpath[1]).call)
    end
    record = {}
    if !@time_key.nil?
      record = {@time_key => format_time(@time)}
    end
    attrs = @attr_xpaths.map do |attr_xpath|
      if attr_xpath[0].nil? # when null is specified
        attr_xpath[1] # second parameter is used as the attribute name
      else # otherwise, the target attribute name is extracted from XML
        el = doc.elements[attr_xpath[0]]
        unless el.nil? and attr_xpath.size > 2
          el.method(attr_xpath[1]).call
        else # unless it's not in the XML and we have a third parameter
          attr_xpath[2] # then the third parameter is used as the target value
        end
      end
    end
    values = @value_xpaths.map do |value_xpath|
      if value_xpath[0].nil? # when null is specified
        value_xpath[1] # second parameter is used as the target value
      else # otherwise, the target value is extracted from XML
        el = doc.elements[value_xpath[0]]
        unless el.nil? and value_xpath.size > 2
          el.method(value_xpath[1]).call
        else # unless it's not in the XML and we have a third parameter
          value_xpath[2] # then the third parameter is used as the target value
        end
      end
    end
    attrs.size.times do |i|
      record[attrs[i]] = values[i]
    end
    yield time, record
  rescue REXML::ParseException => e
    $log.warn "Parse error", :error => e.to_s
    $log.debug_backtrace(e.backtrace)
  rescue Exception => e
    $log.warn "error", :error => e.to_s
    $log.debug_backtrace(e.backtrace)
  end
end