class HTTPLogAnalyzer::Importer
Constants
- StatusKeys
Public Class Methods
new( domain:, ignored_ips: nil, ignored_cities: nil, ignored_browsers: nil, ignored_referers: nil, ignored_paths: nil, period: nil )
click to toggle source
# File lib/http-log-analyzer/importer.rb, line 10 def initialize( domain:, ignored_ips: nil, ignored_cities: nil, ignored_browsers: nil, ignored_referers: nil, ignored_paths: nil, period: nil ) @domain = domain @ignored_ips = ignored_ips || {} @ignored_cities = ignored_cities || {} @ignored_browsers = ignored_browsers || {} @ignored_referers = ignored_referers || {} @ignored_paths = ignored_paths || {} @period = period ? parse_period(period) : nil @log_parser = HttpLogParser.new $user_agent_parser = UserAgentParser::Parser.new $geo_ip = GeoIP.new(File.expand_path('/usr/local/var/GeoIP/GeoLiteCity.dat')) @stats = Stats.new end
parse_list(text)
click to toggle source
# File lib/http-log-analyzer/importer.rb, line 82 def self.parse_list(text) Hash[ text.split(/\n/).map { |s| s.sub(/#.*/, '').strip }.reject(&:empty?).map { |s| [s, true] } ] end
Public Instance Methods
ignore?(entry)
click to toggle source
# File lib/http-log-analyzer/importer.rb, line 64 def ignore?(entry) @ignored_ips[entry.source.address] || @ignored_browsers[entry.user_agent.browser] || @ignored_cities[entry.source.city] || (entry.referer&.uri && @ignored_referers[entry.referer&.uri&.host]) || @ignored_paths[entry.request.uri.path] end
page?(entry)
click to toggle source
# File lib/http-log-analyzer/importer.rb, line 72 def page?(entry) entry.request.uri.path !~ %r{/.*?https?://} && # bad URL construction (from bot) !entry.request.uri.path.empty? && entry.request.mime_types.empty? # pages have empty MIME types end
parse_period(period)
click to toggle source
# File lib/http-log-analyzer/importer.rb, line 32 def parse_period(period) Range.new(*period.split(' - ', 2).map { |d| DateTime.parse(d) }) end
process_line(file, line_num, line)
click to toggle source
# File lib/http-log-analyzer/importer.rb, line 36 def process_line(file, line_num, line) begin data = @log_parser.parse_line(line) rescue => e raise ParseError, "Can't parse line: #{line}" end entry = Entry.new(data) if @period && !@period.cover?(entry.timestamp) # ignore timestamp out of specified period elsif ignore?(entry) @ignored_ips[entry.source.address] = true elsif page?(entry) if (statuses_key = StatusKeys[entry.status.class]) @stats.add(statuses_key, "#{entry.status.code}: #{entry.request.uri.path}") end @stats.add(:source_country, entry.source.country) @stats.add(:source_region, entry.source.region) @stats.add(:source_city, entry.source.city) @stats.add(:wv_source_city, entry.source.city) if entry.source.region == 'West Virginia, United States' @stats.add(:pages, entry.request.uri.path) @stats.add(:via, entry.referer.uri) unless entry.referer&.uri&.host == @domain @stats.add(:searches, entry.referer.query) if entry.referer&.query @stats.add(:browsers, entry.user_agent.browser) @stats.add(:systems, entry.user_agent.system) @stats.add(:dates, entry.calendar_week) end end
report()
click to toggle source
# File lib/http-log-analyzer/importer.rb, line 78 def report @stats.report end