class See5::RulesOutputParser

Read See5 rules output and return an array of hashes representing the rules Note that this is the output normally sent to stdout, NOT the .rules file! The .rules file lacks some important information like confidence.

Public Class Methods

new(fname) click to toggle source
# File lib/see5/rules_output_parser.rb, line 14
def initialize(fname)
  @file = File.open(fname)
  @rules = []

  parse_file
end
parse_file(fname) click to toggle source
# File lib/see5/rules_output_parser.rb, line 10
def self.parse_file(fname)
  new(fname).model
end

Public Instance Methods

model() click to toggle source
# File lib/see5/rules_output_parser.rb, line 21
def model
  {
    default_classification: @default_classification,
    rules: @rules
  }
end
parse_file() click to toggle source
# File lib/see5/rules_output_parser.rb, line 28
def parse_file
  discard_header

  while (line = lines.next)
    if line.start_with?("Rule ")
      @rules << parse_rule(line)
    elsif line.start_with?("Default class:")
      @default_classification = line.split(":").last.strip

      break
    end
  end
end

Private Instance Methods

discard_header() click to toggle source

Discard the file header and advance to the rules section TODO: save the data from the header, in case user wants it?

# File lib/see5/rules_output_parser.rb, line 51
def discard_header
  while (line = lines.next)
    break if line == "Rules:\n"
  end
  # discard the final blank line
  lines.next
end
lines() click to toggle source
# File lib/see5/rules_output_parser.rb, line 44
def lines
  # TODO: lazy unnecessary given that rules are small?
  @file.each_line.lazy
end
parse_class_line(line) click to toggle source
# File lib/see5/rules_output_parser.rb, line 76
def parse_class_line(line)
  matches = line.match(/class ([\w]+)  \[(.+)\]/)

  {
    classification: matches[1],
    confidence: matches[2].to_f
  }
end
parse_condition_line(line) click to toggle source
# File lib/see5/rules_output_parser.rb, line 95
def parse_condition_line(line)
  (attr, val) = line.split("=").map(&:strip)

  [attr.to_sym, val]
end
parse_rule(line) click to toggle source
# File lib/see5/rules_output_parser.rb, line 59
def parse_rule(line)
  rule_info = parse_rule_info_line(line)
  conditions = []

  while (line = lines.next.strip)
    if line.start_with?("->")
      class_info = parse_class_line(line)

      break
    end

    conditions << parse_condition_line(line)
  end

  Rule.new(rule_info, conditions.to_h, class_info)
end
parse_rule_info_line(line) click to toggle source
# File lib/see5/rules_output_parser.rb, line 85
def parse_rule_info_line(line)
  matches = line.match(%r{Rule \d+: \((\d+)(?:/)?([^,]*), lift (.+)\)})

  {
    cases_covered: matches[1].to_i,
    cases_not_covered: matches[2]&.to_i,
    lift: matches[3].to_f
  }
end