class JekyllImageData::Crawler
Public Class Methods
new()
click to toggle source
# File lib/jekyll-image-data/crawler.rb, line 3 def initialize src = %r{(?:https|http|mailto)?(?:\:/)?/\S+} alt = %r{[[[:alnum:]][[:space:]]`~¡!@\#\$%^&*\(\)+=\[\]\{\}\\\|;\:',\.¿\?/_-]+} md_image = %r{!\[(.*)\]\((.*)\)} md_image_ref = %r{!\[(.*)\]\[(.*)\]} html_image = %r{<img.*(src="(#{src})".*alt="(#{alt})"|alt="(#{alt})".*src="(#{src})"|src="(#{src})")} include_image = %r{\{\%\s*include\s*image.(liquid|html)\s*(src="(#{src})".*alt="(#{alt})"|alt="(#{alt})".*src="(#{src})")} @image = %r{#{md_image}|#{md_image_ref}|#{html_image}|#{include_image}} @image_ref = %r{\[(.*)\]:\s*(\S*)} end
Public Instance Methods
crawl(content, config)
click to toggle source
# File lib/jekyll-image-data/crawler.rb, line 16 def crawl(content, config) images = [] exclude = config.dig("image_data", "exclude") || nil content.scan(@image) do |match| src = match[1] || match[5] || match[8] || match[9] || match[12] || match[15] || "" alt = match[0] || match[2] || match[6] || match[7] || match[13] || match[14] || "" ref = match[3] || "" images << { "url" => src, "alt" => alt, "ref" => ref } end content.scan(@image_ref) do |match| images.each do |image| image["url"] = match[1] if image["ref"] == match[0] end end images.each do |image| image.delete("ref") images.delete(image) if exclude and Regexp.new(exclude).match(image["url"]) end images end