class HTTPLogAnalyzer::Importer

Constants

StatusKeys

Public Class Methods

new( domain:, ignored_ips: nil, ignored_cities: nil, ignored_browsers: nil, ignored_referers: nil, ignored_paths: nil, period: nil ) click to toggle source
# File lib/http-log-analyzer/importer.rb, line 10
def initialize(
  domain:,
  ignored_ips: nil,
  ignored_cities: nil,
  ignored_browsers: nil,
  ignored_referers: nil,
  ignored_paths: nil,
  period: nil
)
  @domain = domain
  @ignored_ips = ignored_ips || {}
  @ignored_cities = ignored_cities || {}
  @ignored_browsers = ignored_browsers || {}
  @ignored_referers = ignored_referers || {}
  @ignored_paths = ignored_paths || {}
  @period = period ? parse_period(period) : nil
  @log_parser = HttpLogParser.new
  $user_agent_parser = UserAgentParser::Parser.new
  $geo_ip = GeoIP.new(File.expand_path('/usr/local/var/GeoIP/GeoLiteCity.dat'))
  @stats = Stats.new
end
parse_list(text) click to toggle source
# File lib/http-log-analyzer/importer.rb, line 82
def self.parse_list(text)
  Hash[
    text.split(/\n/).map { |s| s.sub(/#.*/, '').strip }.reject(&:empty?).map { |s| [s, true] }
  ]
end

Public Instance Methods

ignore?(entry) click to toggle source
# File lib/http-log-analyzer/importer.rb, line 64
def ignore?(entry)
  @ignored_ips[entry.source.address] ||
    @ignored_browsers[entry.user_agent.browser] ||
    @ignored_cities[entry.source.city] ||
    (entry.referer&.uri && @ignored_referers[entry.referer&.uri&.host]) ||
    @ignored_paths[entry.request.uri.path]
end
page?(entry) click to toggle source
# File lib/http-log-analyzer/importer.rb, line 72
def page?(entry)
  entry.request.uri.path !~ %r{/.*?https?://} &&    # bad URL construction (from bot)
    !entry.request.uri.path.empty? &&
    entry.request.mime_types.empty?  # pages have empty MIME types
end
parse_period(period) click to toggle source
# File lib/http-log-analyzer/importer.rb, line 32
def parse_period(period)
  Range.new(*period.split(' - ', 2).map { |d| DateTime.parse(d) })
end
process_line(file, line_num, line) click to toggle source
# File lib/http-log-analyzer/importer.rb, line 36
def process_line(file, line_num, line)
  begin
    data = @log_parser.parse_line(line)
  rescue => e
    raise ParseError, "Can't parse line: #{line}"
  end
  entry = Entry.new(data)
  if @period && !@period.cover?(entry.timestamp)
    # ignore timestamp out of specified period
  elsif ignore?(entry)
    @ignored_ips[entry.source.address] = true
  elsif page?(entry)
    if (statuses_key = StatusKeys[entry.status.class])
      @stats.add(statuses_key, "#{entry.status.code}: #{entry.request.uri.path}")
    end
    @stats.add(:source_country, entry.source.country)
    @stats.add(:source_region, entry.source.region)
    @stats.add(:source_city, entry.source.city)
    @stats.add(:wv_source_city, entry.source.city) if entry.source.region == 'West Virginia, United States'
    @stats.add(:pages, entry.request.uri.path)
    @stats.add(:via, entry.referer.uri) unless entry.referer&.uri&.host == @domain
    @stats.add(:searches, entry.referer.query) if entry.referer&.query
    @stats.add(:browsers, entry.user_agent.browser)
    @stats.add(:systems, entry.user_agent.system)
    @stats.add(:dates, entry.calendar_week)
  end
end
report() click to toggle source
# File lib/http-log-analyzer/importer.rb, line 78
def report
  @stats.report
end