module YoutubeChannel
Grep youtube channels
Public Instance Methods
grep_youtube_channel(response)
click to toggle source
# File lib/parsers/youtube_channel.rb, line 5 def grep_youtube_channel(response) return if response !~ %r{(?im)https?:\/\/(?:www\.)?youtube\.com\/} first_regex = %r{(?im)(https?:\/\/(?:www\.)?youtube\.com\/(?!\?gl=\w{2}|(?:embed|feeds)\/|(?:player_api|iframe_api)(?:"|'|\/|\?)|watch\?|user\/#)[^"'\&<>\s]+)} second_regex = %r{(?im)(https?:\/\/(?:www\.)?youtube\.com\/watch?\S*v=[^<>&'"]+)} third_regex = %r{(?im)(https?:\/\/(?:www\.)?youtube\.com\/embed\/(?!id|{|}|\[|\]|\$|\?|\\|%|\+)[^"'\?<>\s]+)} youtube_channels = scrape_profile(response, [first_regex, second_regex, third_regex]) youtube_channels.compact.uniq end
Private Instance Methods
scrape_profile(response, regexes)
click to toggle source
# File lib/parsers/youtube_channel.rb, line 17 def scrape_profile(response, regexes) return if response.to_s.empty? || regexes.empty? profiles = [] regexes.each do |regex| profiles = response.scan(regex).flatten.compact break unless profiles.empty? end return [] if profiles.none? profiles end