module DirectLink
Constants
- NORMAL_EXCEPTIONS
Attributes
logger[RW]
reddit_bot[RW]
silent[RW]
timeout[RW]
Public Class Methods
_500px(link)
click to toggle source
# File lib/directlink.rb, line 160 def self._500px link raise ErrorBadLink.new link unless %r{\Ahttps://500px\.com/photo/(?<id>[^/]+)/[-[a-zA-Z0-9]%]+\/?\z} =~ link require "nokogiri" f = lambda do |form| JSON.load(NetHTTPUtils.request_data "https://api.500px.com/v1/photos", form: form).fetch("photos").values.first end w, h = f[{"ids" => id }].values_at("width", "height") # we need the above request to find the real resolution otherwise the "url" in the next request will be wrong u, f = f[{"ids" => id, "image_size[]" => w}].fetch("images").first.values_at("url", "format") [w, h, u, f] end
flickr(link)
click to toggle source
# File lib/directlink.rb, line 172 def self.flickr link raise ErrorBadLink.new link unless %r{\Ahttps://www\.flickr\.com/photos/[^/]+/(?<id>[^/]+)} =~ link || %r{\Ahttps://flic\.kr/p/(?<id>[^/]+)\z} =~ link raise ErrorMissingEnvVar.new "define FLICKR_API_KEY env var" unless ENV["FLICKR_API_KEY"] flickr = lambda do |id, method| JSON.load NetHTTPUtils.request_data "https://api.flickr.com/services/rest/", form: { api_key: ENV["FLICKR_API_KEY"], format: "json", nojsoncallback: 1, photo_id: id, method: "flickr.photos.#{method}", } end json = flickr.call id, "getSizes" raise ErrorNotFound.new link.inspect if json == {"stat"=>"fail", "code"=>1, "message"=>"Photo not found"} raise ErrorAssert.new "unhandled API response stat for #{link}: #{json}" unless json["stat"] == "ok" json["sizes"]["size"].map do |_| w, h, u = _.values_at("width", "height", "source") [w.to_i, h.to_i, u] end.max_by{ |w, h, u| w * h } end
google(src, width = 0)
click to toggle source
# File lib/directlink.rb, line 54 def self.google src, width = 0 # this can handle links without schema because it's used for parsing community HTML pages case src # Google Plus post image when /\A(https:\/\/lh3\.googleusercontent\.com\/-[a-zA-Z0-9_-]{11}\/[WX][a-zA-Z0-9_-]{9}I\/AAAAAAA[a-zA-Z0-9_-]{4}\/[a-zA-Z0-9_-]{33}(?:[gwAQ]?CJoC|CL0B(?:GAs)?)\/)w[1-7]\d\d(?:-d)?-h[1-9]\d\d\d?-n(?:-k-no|-rw|)\/[^\/]+\z/ "#{$1}s#{width}/" when /\A(\/\/lh3\.googleusercontent\.com\/proxy\/[a-zA-Z0-9_-]{66,523}=)(?:w(?:[45]\d\d)-h\d\d\d-[np]|s530-p|s110-p-k)\z/ "https:#{$1}s#{width}/" when /\A(\/\/lh3\.googleusercontent\.com\/[a-zA-Z0-9]{24}_[a-zA-Z]{30}7zGIDTJfkc1YZFX2MhgKnjA=)w530-h398-p\z/ "https:#{$1}s#{width}/" when /\A(\/\/lh3\.googleusercontent\.com\/-[a-zA-Z0-9-]{11}\/[VW][a-zA-Z0-9_-]{9}I\/AAAAAAA[AC][a-zA-Z0-9]{3}\/[a-zA-Z0-9_-]{32}[gwAQ]CJoC\/)w530-h[23]\d\d-p\/[^\/]+\z/, /\A(?:https?:)?(\/\/[1-4]\.bp\.blogspot\.com\/-[a-zA-Z0-9_-]{11}\/[UVWX][a-zA-Z0-9_-]{9}I\/AAAAAAAA[A-Z][a-zA-Z0-9_-]{2}\/[a-zA-Z0-9_-]{33}C(?:EwYBhgL|(?:Lc|Kg)BGAs(?:YHQ)?)\/)(?:s640|w\d{2,4}-h\d\d\d?-p(?:-k-no-nu)?)\/[^\/]+\z/, /\A(?:https?:)?(\/\/[1-4]\.bp\.blogspot\.com\/-[a-zA-Z0-9-]{11}\/[UV][a-zA-Z0-9_-]{9}I\/AAAAAAAA[A-Z][a-zA-Z0-9]{2}\/[a-zA-Z0-9-]{11}\/)w72-h72-p-k-no-nu\/[^\/]+\z/ "https:#{$1}s#{width}/" when /\A(https:\/\/lh3\.googleusercontent\.com\/-[a-zA-Z0-9_]{11}\/AAAAAAAAAAI\/AAAAAAAAAAQ\/[a-zA-Z0-9_]{11}\/)w530-h[13]\d\d-n\/[^\/]+\z/, /\A(https:\/\/lh3\.googleusercontent\.com\/-[a-zA-Z0-9_]{11}\/V[a-zA-Z0-9-]{9}I\/AAAAAAAA[ML][c-q4][so0]\/[a-zA-Z0-9_]{11}\/)w530(?:-d)?-h3\d\d-n\/[^\/]+\z/ "#{$1}s#{width}/" # high res (s0) Google Plus post image when /\Ahttps:\/\/lh3\.googleusercontent\.com\/-[a-zA-Z0-9_-]{11}\/[a-zA-Z0-9_-]{10}I\/AAAAAAA[a-zA-Z0-9_-]{4}\/[a-zA-Z0-9_-]{33}CJoC\/s0\/[^\/]+\z/ src # Google Plus userpic when /\A(https:\/\/lh3\.googleusercontent\.com\/-[a-zA-Z0-9-]{11}\/AAAAAAAAAAI\/AAAAAAAA[a-zA-Z0-9]{3}\/[a-zA-Z0-9_-]{11}\/)s\d\d-p(?:-k)?-rw-no\/photo\.jpg\z/ "#{$1}s#{width}/" # Hangout userpic when /\A(https:\/\/lh[356]\.googleusercontent\.com\/-[a-zA-Z0-9]{11}\/AAAAAAAAAAI\/AAAAAAAA[a-zA-Z0-9]{3}\/[a-zA-Z0-9-]{11}\/)s\d\d-c-k-no\/photo\.jpg\z/, /\A(https:\/\/lh[356]\.googleusercontent\.com\/-[a-zA-Z0-9]{11}\/AAAAAAAAAAI\/AAAAAAAAAAA\/[a-zA-Z0-9]{11}\/)s64-c-k\/photo\.jpg\z/, /\A(https:\/\/lh[356]\.googleusercontent\.com\/-[a-zA-Z0-9]{11}\/AAAAAAAAAAI\/AAAAAAAAAAA\/[a-zA-Z0-9_]{34}\/)s(?:46|64)-c(?:-k(?:-no)?)?-mo\/photo\.jpg\z/ "#{$1}s#{width}/" # Google Keep when /\A(https:\/\/lh\d\.googleusercontent\.com\/[a-zA-Z0-9_-]{104,106}=s)\d\d\d\d?\z/ "#{$1}#{width}" # opensea when /\A(https:\/\/lh3\.googleusercontent\.com\/[a-zA-Z0-9]{78}-nGx_jf_XGqqiVANe_Jr8u2g=)w1400-k\z/ "#{$1}s#{width}" # mp4 when /\A(https:\/\/lh3\.googleusercontent\.com\/-[a-zA-Z]{11}\/W[a-zA-Z0-9]{9}I\/AAAAAAAAODw\/[a-zA-Z0-9]{32}QCJoC\/)w530-h883-n-k-no\/[^\/]+\.mp4\z/ "#{$1}s#{width}/" # something else when /\A(https:\/\/lh3\.googleusercontent\.com\/-[a-zA-Z0-9_]{11}\/X-[a-zA-Z0-9]{8}I\/AAAAAAAAALE\/[a-zA-Z0-9]{23}_[a-zA-Z0-9]{19}\/)w1200-h630-p-k-no-nu\/[\d-]+\.png\z/ "#{$1}s#{width}/" else raise ErrorBadLink.new src end end
imgur(link, timeout = 1000)
click to toggle source
TODO make the timeout handling respect the way the Directlink method works with timeouts
# File lib/directlink.rb, line 103 def self.imgur link, timeout = 1000 raise ErrorMissingEnvVar.new "define IMGUR_CLIENT_ID env var" unless ENV["IMGUR_CLIENT_ID"] request_data = lambda do |url| t = 1 begin NetHTTPUtils.request_data url, header: { Authorization: "Client-ID #{ENV["IMGUR_CLIENT_ID"]}" } rescue NetHTTPUtils::Error => e raise ErrorNotFound.new url.inspect if 404 == e.code if t < timeout && [400, 500, 502, 503].include?(e.code) logger.error "retrying in #{t} seconds because of Imgur HTTP ERROR #{e.code}" sleep t t *= 2 retry end raise ErrorAssert.new "unexpected http error #{e.code} for #{url}" end end case link when /\Ahttps?:\/\/(?:(?:i|m|www)\.)?imgur\.com\/(a|gallery)\/([a-zA-Z0-9]{5}(?:[a-zA-Z0-9]{2})?)\z/, /\Ahttps?:\/\/imgur\.com\/(gallery)\/([a-zA-Z0-9]{5}(?:[a-zA-Z0-9]{2})?)\/new\z/ json = request_data["https://api.imgur.com/3/#{$1 == "gallery" ? "gallery" : "album"}/#{$2}/0.json"] data = JSON.load(json)["data"] if data["error"] raise ErrorAssert.new "unexpected error #{data.inspect} for #{link}" elsif data["images"] raise ErrorNotFound.new link.inspect if data["images"].empty? data["images"] elsif data["type"] && %w{ image/jpeg image/png image/gif video/mp4 }.include?(data["type"]) # TODO check if this branch is possible at all [ data ] # elsif data["comment"] # fi["https://imgur.com/" + data["image_id"]] else # one day single-video item should hit this but somehow it didn't yet raise ErrorAssert.new "unknown data format #{json} for #{link}" end when /\Ahttps?:\/\/(?:(?:i|m|www)\.)?imgur\.com\/([a-zA-Z0-9]{7,8})(?:\.(?:gifv|jpe?g(?:\?fb)?|png))?\z/, /\Ahttps?:\/\/(?:(?:i|m|www)\.)?imgur\.com\/([a-zA-Z0-9]{5})\.mp4\z/, /\Ahttps?:\/\/imgur\.com\/([a-zA-Z0-9]{5}(?:[a-zA-Z0-9]{2})?)\z/, /\Ahttps?:\/\/imgur\.com\/([a-zA-Z0-9]{7})(?:\?\S+)?\z/, /\Ahttps?:\/\/imgur\.com\/r\/[0-9_a-z]+\/([a-zA-Z0-9]{7})\z/, /\Ahttps?:\/\/api\.imgur\.com\/3\/image\/([a-zA-Z0-9]{7})\/0\.json\z/ json = request_data["https://api.imgur.com/3/image/#{$1}/0.json"] [ JSON.load(json)["data"] ] else raise ErrorBadLink.new link end.map do |image| case image["type"] when *%w{ image/jpeg image/png image/gif video/mp4 } image.values_at "link", "width", "height", "type" else raise ErrorAssert.new "unknown type of #{link}: #{image}" end end end
new(msg)
click to toggle source
Calls superclass method
# File lib/directlink.rb, line 20 def initialize msg Module.nesting.first.logger.error "#{self.class}: #{msg}" super msg end
reddit(link, timeout = 1000)
click to toggle source
# File lib/directlink.rb, line 213 def self.reddit link, timeout = 1000 return [true, link] if URI(link).host && URI(link).host.split(?.) == %w{ i redd it } && URI(link).path[/\A\/[a-z0-9]{12,13}\.(gif|jpg)\z/] unless id = link[/\Ahttps:\/\/www\.reddit\.com\/gallery\/([0-9a-z]{5,6})\z/, 1] raise DirectLink::ErrorBadLink.new link unless id = URI(link).path[/\A(?:\/r\/[0-9a-zA-Z_]+)?(?:\/comments|\/duplicates)?\/([0-9a-z]{5,6})(?:\/|\z)/, 1] end retry_on_json_parseerror = lambda do |&b| t = 1 begin b.call rescue JSON::ParserError => e raise ErrorBadLink.new link if t > timeout logger.error "#{e.message[0, 500].gsub(/\s+/, " ")}, retrying in #{t} seconds" sleep t t *= 2 retry end end json = if ENV["REDDIT_SECRETS"] require "reddit_bot" RedditBot.logger.level = Logger::ERROR require "yaml" self.reddit_bot ||= RedditBot::Bot.new YAML.load_file ENV["REDDIT_SECRETS"] retry_on_json_parseerror.call{ self.reddit_bot.json :get, "/by_id/t3_#{id}" } else raise ErrorMissingEnvVar.new "defining REDDIT_SECRETS env var is highly recommended" rescue nil json = retry_on_json_parseerror.call{ JSON.load NetHTTPUtils.request_data "https://www.reddit.com/#{id}.json", header: {"User-Agent" => "Mozilla"} } raise ErrorAssert.new "our knowledge about Reddit API seems to be outdated" unless json.size == 2 json.find{ |_| _["data"]["children"].first["kind"] == "t3" } end # TODO: do we handle linking Imgur albums? data = json["data"]["children"].first["data"] if data["media"] return [true, data["media"]["reddit_video"]["fallback_url"]] if data["media"]["reddit_video"] raise ErrorAssert.new "our knowledge about Reddit API seems to be outdated" unless data["media"].keys.sort == %w{ oembed type } && %w{ youtube.com gfycat.com imgur.com }.include?(data["media"]["type"]) return [true, data["media"]["oembed"]["thumbnail_url"]] end if data["media_metadata"] return [true, data["media_metadata"].values.map do |media| next if media == {"status"=>"failed"} || media == {"status"=>"unprocessed"} raise ErrorAssert.new "our knowledge about Reddit API seems to be outdated (media == #{media.inspect})" unless media["status"] == "valid" [media["m"], *media["s"].values_at("x", "y"), CGI.unescapeHTML(media["s"][media["m"]=="image/gif" ? "gif" : "u"])] end.compact] end return [true, "#{"https://www.reddit.com" if /\A\/r\/[0-9a-zA-Z_]+\/comments\/[0-9a-z]{5,6}\// =~ data["url"]}#{data["url"]}"] if data["crosspost_parent"] return [true, data["url"]] unless data["is_self"] raise ErrorAssert.new "our knowledge about Reddit API seems to be outdated" if data["url"] != "https://www.reddit.com" + data["permalink"] return [false, data["selftext"]] end
vk(link)
click to toggle source
# File lib/directlink.rb, line 264 def self.vk link id, mtd, field, f = case link when %r{\Ahttps://vk\.com/id(?<user_id>\d+)\?z=photo(?<id>\k<user_id>_\d+)(%2F(album\k<user_id>_0|photos\k<user_id>))?\z}, %r{\Ahttps://vk\.com/[a-z_]+\?z=photo(?<_>)(?<id>(?<user_id>\d+)_\d+)%2Fphotos\k<user_id>\z}, %r{\Ahttps://vk\.com/photo(?<_>)(?<id>-?\d+_\d+)(\?(?:all|rev)=1)?\z}, %r{\Ahttps://vk\.com/feed\?section=likes&z=photo(?<_>)(?<id>-(?<user_id>\d+)_\d+)%2F(liked\d+|album\k<user_id>_0)\z}, %r{\Ahttps://vk\.com/[a-z_]+\?z=photo(?<_>)(?<id>(?<user_id>-\d+)_\d+)%2F(wall\k<user_id>_\d+|album\k<user_id>_0)\z}, %r{\Ahttps://vk\.com/wall(?<user_id>-\d+)_\d+\?z=photo(?<id>\k<user_id>_\d+)%2F(wall\k<user_id>_\d+|album\k<user_id>_00%2Frev|\d+)\z} [$2, :photos, :photos, lambda do |t| raise ErrorAssert.new "our knowledge about VK API seems to be outdated" unless 1 == t.size t end ] when %r{\Ahttps://vk\.com/wall(?<id>-?\d+_\d+)\z}, %r{\Ahttps://vk\.com/[a-z\.]+\?w=wall(?<id>-?\d+_\d+)\z} [$1, :wall, :posts, lambda do |t| t.first.fetch("attachments").select do |item| case item.keys when %w{ type photo } raise ErrorAssert.new "our knowledge about VK API seems to be outdated" unless item["type"] == "photo" next true when %w{ type audio } raise ErrorAssert.new "our knowledge about VK API seems to be outdated" unless item["type"] == "audio" else raise ErrorAssert.new "our knowledge about VK API seems to be outdated" end end.map{ |i| i.fetch "photo" } end ] else raise ErrorBadLink.new link end raise ErrorMissingEnvVar.new "define VK_ACCESS_TOKEN and VK_CLIENT_SECRET env vars" unless ENV["VK_ACCESS_TOKEN"] && ENV["VK_CLIENT_SECRET"] sleep 0.25 unless ENV["CI"] # "error_msg"=>"Too many requests per second" f.call( JSON.load( NetHTTPUtils.request_data "https://api.vk.com/method/#{mtd}.getById", :POST, form: { field => id, :access_token => ENV["VK_ACCESS_TOKEN"], :client_secret => ENV["VK_CLIENT_SECRET"], :v => "5.101" } ).fetch("response") ).map do |photos| photos.fetch("sizes").map do |size| size.values_at("width", "height", "url").tap do |whu| w, h, u = whu whu[0, 2] = FastImage.new(u, raise_on_failure: true).size if [w, h].include? 0 # wtf? end end.max_by{ |w, h, u| w * h } end end
wiki(link)
click to toggle source
# File lib/directlink.rb, line 196 def self.wiki link raise ErrorBadLink.new link unless %r{\Ahttps?://(?<hostname>([a-z]{2}\.wikipedia|commons.wikimedia)\.org)/wiki(/[^/]+)*/(?<id>File:.+)} =~ link t = JSON.load json = NetHTTPUtils.request_data( "https://#{hostname}/w/api.php", form: { format: "json", action: "query", prop: "imageinfo", iiprop: "url", titles: CGI.unescape(id), } ) imageinfo = t["query"]["pages"].values.first["imageinfo"] raise ErrorAssert.new "unexpected format of API response about #{link}: #{json}" unless imageinfo imageinfo.first["url"] end