class Tefil::ColumnAnalyzer
Public Class Methods
new(keys = [])
click to toggle source
keys should be array including keys or key=value items) as like:
['1', '2'] ['1=str1', '2=str2'] ['1=str1', '2']
Calls superclass method
Tefil::TextFilterBase::new
# File lib/tefil/columnanalyzer.rb, line 33 def initialize(keys = []) @nums_values = {} @keys = [] keys.each do |str| if str.include? '=' key, value = str.split('=') @nums_values[key.to_i-1] = value else @keys << str end end super({}) end
Private Instance Methods
get_ranges(ary)
click to toggle source
true の範囲を示す二重配列を返す。 各要素は 始点..終点 の各インデックスで出来た範囲。
各要素は[始点, 終点] の各インデックス。
# File lib/tefil/columnanalyzer.rb, line 127 def get_ranges(ary) results = [] start = nil prev = false ary << false # for true in final item ary.each_with_index do |cur, i| if prev == false && cur == true start = i prev = cur elsif prev == true && cur == false results << (start..(i - 1)) prev = cur else next end end results end
process_stream(in_io, out_io)
click to toggle source
# File lib/tefil/columnanalyzer.rb, line 49 def process_stream(in_io, out_io) lines = in_io.readlines # delete line consist of one character lines.delete_if {|line| line.split.uniq.size == 1} ranges = get_ranges(projection_ary(lines)) items_list = lines.map do |line| ranges.map { |range| line[range].strip } end # screen items items_head = items_list[0] items_list.select! do |items| flag = true @nums_values.each do |key, value| if items[key] != value flag = false break end end flag end # output head results = [] results << (1..(items_head.size)).to_a.map{|v| v.to_s} #pp items_list[0] #pp items_head #exit results << items_head unless items_list[0] == items_head results += items_list Tefil::ColumnFormer.new.form(results, out_io) out_io.puts out_io.puts "All: #{lines.size}" out_io.print "Extracted: #{items_list.size}" conditions = [] @nums_values.each do |key, val| conditions << "#{key}=#{val}" end out_io.puts " (#{conditions.join(' ')})" out_io.puts if items_list.size != 0 results = [] results << %w(key head types) ranges.each_with_index do |range, i| results << [(i+1).to_s, lines[0][range].strip, items_list.map {|items| items[i]}.sort.uniq.size.to_s ] end Tefil::ColumnFormer.new.form(results, out_io) end unless @keys.empty? out_io.puts out_io.puts "key analysis" @keys.each do |key| out_io.puts "(key=#{key})" values = items_list.map{|items| items[key.to_i-1]} names = values.sort.uniq results = [] names.each do |name| results << [name, values.count(name).to_s] end results.sort_by!{|v| v[1].to_i} Tefil::ColumnFormer.new.form(results, out_io) out_io.puts end end end
projection_ary(lines)
click to toggle source
全ての文字列の最大長を要素数とする配列で、 空白文字以外があれば true, 全て空白文字ならば false にした配列。
# File lib/tefil/columnanalyzer.rb, line 148 def projection_ary(lines) max_length = lines.max_by{|line| line.size}.size results = Array.new(max_length).fill(false) lines.each do |line| line.chomp.size.times do |i| c = line[i] next if results[i] == true if c == ' ' next else results[i] = true end end end results end