class StudentProgress::Scraper
Attributes
current_lab[RW]
first_name[RW]
last_name[RW]
profile_url[RW]
total_labs_complete[RW]
total_lessons_complete[RW]
Public Class Methods
login_user(email, password)
click to toggle source
# File lib/student_progress/scraper.rb, line 33 def self.login_user(email, password) @@session = Capybara::Session.new(:poltergeist) @@session.visit('https://learn.co') @@session.fill_in "user-email", with: email @@session.fill_in "user-password", with: password puts "Signing in now..." sleep 0.5 @@session.click_button "Sign in" if @@session.current_path == "/account_check/failure" || @@session.current_path == "/" puts "Invalid email or password" puts "\n" false else true end end
scrape_lessons()
click to toggle source
# File lib/student_progress/scraper.rb, line 51 def self.scrape_lessons doc = Nokogiri::HTML(@@session.body) script = doc.css('script')[8].text data = script.gsub("\n//<![CDATA[",'').gsub("//]]>",'') match_data = /track_nav_data=(.+);/.match(data) string = match_data.captures.first.split(';').first hash = JSON.parse(string) hash['topics'].each.with_index(1) do |topic_hash, index| topic = StudentProgress::Topic.create_from_hash({ title: topic_hash['title'], id: topic_hash['id'] }) topic_hash['units'].each do |unit_hash| unit = StudentProgress::Unit.create_from_hash({ title: unit_hash['title'], id: unit_hash['id'], topic: topic }) unit_hash['lessons'].each do |lesson_hash| lesson = StudentProgress::Lesson.create_from_hash({ id: lesson_hash['id'], title: lesson_hash['title'], content_type: lesson_hash['content_type'], repo: "https://github.com/#{lesson_hash['github_repo_name']}", unit: unit }) end end end end
scrape_progress(cohort)
click to toggle source
# File lib/student_progress/scraper.rb, line 4 def self.scrape_progress(cohort) print "#students loaded: " report = StudentProgress::ProgressReport.create(created_at: DateTime.now, cohort_id: cohort.id) cohort.students.collect do |s| s.github_username end.each.with_index(1) do |username, index| @@session.visit("https://learn.co/#{username}") student = StudentProgress::Student.find_or_create( github_username: username.downcase ) student.first_name = @@session.first('.media-block__content--fill').text.split("\n").first.split(' ').first student.last_name = @@session.first('.media-block__content--fill').text.split("\n").first.split(' ').slice(1) student.save StudentProgress::StudentReport.create( current_lab: @@session.find('h4 a').text, lessons_complete: @@session.all('.flex-grid__item__stretcher h3.heading').map{|n| n.text}[1].split(' / ').first.to_i, created_at: DateTime.now, labs_complete: @@session.all('.flex-grid__item__stretcher h3.heading').map{|n| n.text}[3].split(' / ').first.to_i, student_id: student.id, progress_report_id: report.id ) print "#{index}.." end if cohort.periscope_url StudentProgress::ProgressImporter.import(cohort.periscope_url) end puts "Done!" end