class ChupaText::Decomposers::WebKit

Constants

AVAILABLE_ATTRIBUTE_NAME
IN_PROCESS
TARGET_EXTENSIONS
TARGET_MIME_TYPES

Public Instance Methods

decompose(data) { |data| ... } click to toggle source
# File lib/chupa-text/decomposers/webkit.rb, line 66
def decompose(data)
  body = data.source.body
  uri = data.source.uri.to_s
  output = Tempfile.new(["chupa-text-decomposer-webkit", ".png"])
  width, height = data.expected_screenshot_size
  if IN_PROCESS
    screenshoter = ChupaTextDecomposerWebKit::Screenshoter.new(logger)
    screenshoter.run(body, uri, output.path, width, height)
  else
    screenshoter = ExternalScreenshoter.new
    screenshoter.run(data.source.path, uri, output.path, width, height)
  end
  unless File.size(output.path).zero?
    png = output.read
    data.screenshot = Screenshot.new("image/png",
                                     [png].pack("m*"),
                                     "base64")
  end
  data[AVAILABLE_ATTRIBUTE_NAME] = !data.screenshot.nil?
  yield(data)
end
target?(data) click to toggle source
# File lib/chupa-text/decomposers/webkit.rb, line 41
def target?(data)
  return false unless data.need_screenshot?
  return false if data.screenshot
  return false unless data[AVAILABLE_ATTRIBUTE_NAME].nil?

  source = data.source
  return false if source.nil?

  return true if TARGET_EXTENSIONS.include?(source.extension)
  return true if TARGET_MIME_TYPES.include?(source.mime_type)

  source_body = source.body
  return false if source_body.nil?

  return true if source_body.start_with?("<!DOCTYPE html ")
  return true if source_body.start_with?("<html")

  false
end