class Hubba::Stats
keep track of repo stats over time (with history hash)
Public Class Methods
new( full_name )
click to toggle source
# File lib/hubba/stats.rb, line 8 def initialize( full_name ) @data = {} @data['full_name'] = full_name # e.g. poole/hyde etc. @cache = {} ## keep a lookup cache - why? why not? end
Public Instance Methods
read()
click to toggle source
# File lib/hubba/stats.rb, line 239 def read ## note: skip reading if file not present basename = @data['full_name'].gsub( '/', '~' ) ## e.g. poole/hyde become poole~hyde letter = basename[0] ## use first letter as index dir e.g. p/poole~hyde data_dir = "#{Hubba.config.data_dir}/#{letter}" path = "#{data_dir}/#{basename}.json" if File.exist?( path ) puts " reading stats from #{basename} (#{data_dir})..." json = File.open( path, 'r:utf-8' ) { |f| f.read } @data = JSON.parse( json ) ## reset (invalidate) cached values from data hash ## use after reading or fetching @cache = {} else puts "!! WARN: - skipping reading stats from #{basename} -- file not found" end self ## return self for (easy chaining) end
update( repo, commits: nil, topics: nil, languages: nil )
click to toggle source
# File lib/hubba/stats.rb, line 110 def update( repo, commits: nil, topics: nil, languages: nil ) ## update stats / fetch data from github via api raise ArgumentError, "Github::Resource expected; got #{repo.class.name}" unless repo.is_a?( Github::Resource ) ## e.g. 2015-05-11T20:21:43Z ## puts Time.iso8601( repo.data['created_at'] ) @data['created_at'] = repo.data['created_at'] @data['updated_at'] = repo.data['updated_at'] @data['pushed_at'] = repo.data['pushed_at'] @data['size'] = repo.data['size'] # note: size in kb (kilobyte) @data['description'] = repo.data['description'] ### todo/check - remove language (always use languages - see below) - why? why not? @data['language'] = repo.data['language'] ## note: might be nil!!! ######################################## #### history / by date record rec = {} rec['stargazers_count'] = repo.data['stargazers_count'] rec['forks_count'] = repo.data['forks_count'] today = Date.today.strftime( '%Y-%m-%d' ) ## e.g. 2016-09-27 puts "add record #{today} to history..." pp rec # check if stargazers_count is a number (NOT a string) history = @data[ 'history' ] ||= {} item = history[ today ] ||= {} ## note: merge "in-place" (overwrite with new - but keep other key/value pairs if any e.g. pageviews, clones, etc.) item.merge!( rec ) ########################## ## also check / keep track of (latest) commit if commits raise ArgumentError, "Github::Resource expected; got #{commits.class.name}" unless commits.is_a?( Github::Resource ) puts "update - last commit:" ## pp commits commit = { 'committer' => { 'date' => commits.data[0]['commit']['committer']['date'], 'name' => commits.data[0]['commit']['committer']['name'] }, 'author' => { 'date' => commits.data[0]['commit']['author']['date'], 'name' => commits.data[0]['commit']['author']['name'] }, 'message' => commits.data[0]['commit']['message'] } ## for now store only the latest commit (e.g. a single commit in an array) @data[ 'commits' ] = [commit] end if topics raise ArgumentError, "Github::Resource expected; got #{topics.class.name}" unless topics.is_a?( Github::Resource ) puts "update - topics:" ## e.g. # {"names"=> # ["opendata", # "football", # "seriea", # "italia", # "italy", # "juve", # "inter", # "napoli", # "roma", # "sqlite"]} # # {"names"=>[]} @data[ 'topics' ] = topics.data['names'] end if languages raise ArgumentError, "Github::Resource expected; got #{languages.class.name}" unless languages.is_a?( Github::Resource ) puts "update - languages:" ## e.g. ## {"Ruby"=>1020599, "HTML"=>3219, "SCSS"=>508, "CSS"=>388} ## or might be empty ## {} @data[ 'languages' ] = languages.data end pp @data ## reset (invalidate) cached values from data hash ## use after reading or fetching @cache = {} self ## return self for (easy chaining) end
update_traffic( clones: nil, views: nil, paths: nil, referrers: nil )
click to toggle source
update
# File lib/hubba/stats.rb, line 18 def update_traffic( clones: nil, views: nil, paths: nil, referrers: nil ) traffic = @data[ 'traffic' ] ||= {} summary = traffic['summary'] ||= {} history = traffic['history'] ||= {} if views raise ArgumentError, "Github::Resource expected; got #{views.class.name}" unless views.is_a?( Github::Resource ) =begin {"count"=>1526, "uniques"=>287, "views"=> [{"timestamp"=>"2020-09-27T00:00:00Z", "count"=>52, "uniques"=>13}, {"timestamp"=>"2020-09-28T00:00:00Z", "count"=>108, "uniques"=>28}, ... ]}> =end ## keep lastest (summary) record of last two weeks (14 days) summary['views'] = { 'count' => views.data['count'], 'uniques' => views.data['uniques'] } ## update history / day-by-day items / timeline views.data['views'].each do |view| # e.g. "2020-09-27T00:00:00Z" timestamp = DateTime.strptime( view['timestamp'], '%Y-%m-%dT%H:%M:%S%z' ) item = history[ timestamp.strftime( '%Y-%m-%d' ) ] ||= {} ## e.g. 2016-09-27 ## note: merge "in-place" item.merge!( { 'views' => { 'count' => view['count'], 'uniques' => view['uniques'] }} ) end end if clones raise ArgumentError, "Github::Resource expected; got #{clones.class.name}" unless clones.is_a?( Github::Resource ) =begin {"count"=>51, "uniques"=>17, "clones"=> [{"timestamp"=>"2020-09-26T00:00:00Z", "count"=>1, "uniques"=>1}, {"timestamp"=>"2020-09-27T00:00:00Z", "count"=>2, "uniques"=>1}, ... ]} =end ## keep lastest (summary) record of last two weeks (14 days) summary['clones'] = { 'count' => clones.data['count'], 'uniques' => clones.data['uniques'] } ## update history / day-by-day items / timeline clones.data['clones'].each do |clone| # e.g. "2020-09-27T00:00:00Z" timestamp = DateTime.strptime( clone['timestamp'], '%Y-%m-%dT%H:%M:%S%z' ) item = history[ timestamp.strftime( '%Y-%m-%d' ) ] ||= {} ## e.g. 2016-09-27 ## note: merge "in-place" item.merge!( { 'clones' => { 'count' => clone['count'], 'uniques' => clone['uniques'] }} ) end end if paths raise ArgumentError, "Github::Resource expected; got #{paths.class.name}" unless paths.is_a?( Github::Resource ) =begin [{"path"=>"/openfootball/england", "title"=> "openfootball/england: Free open public domain football data for England (and ...", "count"=>394, "uniques"=>227}, =end summary['paths'] = paths.data end if referrers raise ArgumentError, "Github::Resource expected; got #{referrers.class.name}" unless referrers.is_a?( Github::Resource ) =begin [{"referrer"=>"github.com", "count"=>327, "uniques"=>198}, {"referrer"=>"openfootball.github.io", "count"=>71, "uniques"=>54}, {"referrer"=>"Google", "count"=>5, "uniques"=>5}, {"referrer"=>"reddit.com", "count"=>4, "uniques"=>4}] =end summary['referrers'] = referrers.data end end
write()
click to toggle source
read / write methods / helpers
# File lib/hubba/stats.rb, line 223 def write basename = @data['full_name'].gsub( '/', '~' ) ## e.g. poole/hyde become poole~hyde letter = basename[0] ## use first letter as index dir e.g. p/poole~hyde data_dir = "#{Hubba.config.data_dir}/#{letter}" path = "#{data_dir}/#{basename}.json" puts " writing stats to #{basename} (#{data_dir})..." FileUtils.mkdir_p( File.dirname( path )) ## make sure path exists File.open( path, 'w:utf-8' ) do |f| f.write( JSON.pretty_generate( @data )) end self ## return self for (easy chaining) end