class EasyCaptchaSolver

Attributes

captcha[R]

Public Class Methods

new( options = {} ) click to toggle source
# File lib/easy_captcha_solver.rb, line 7
def initialize ( options = {} )
  image = options[:image_path] if options[:image_path]
  image = get_captcha_image(options[:image_url]) if options[:image_url]

  throw Exception.new "Image path or image URL must be provided.
  Example:  easy_c = EasyCaptcha.new( image_url: 'http://www.example.com/captcha' )
    or easy_c = EasyCaptcha.new( image_path: './captcha.jpg' )" unless image

  # Try to solve the captcha and delete temp img if necessary
  solve_captcha(image) ensure File.delete(image) if options[:image_url] && File.exist?(image)
end

Private Instance Methods

get_captcha_image( image_url ) click to toggle source
# File lib/easy_captcha_solver.rb, line 20
def get_captcha_image( image_url )
  agent = Mechanize.new { |agent|
    agent.user_agent_alias = 'Mac Safari'
  }

  # Save a file instead of trying to solve the captcha from memory
  # because of tesseract limitations with .png images
  image = agent.get(image_url).save!  "./tmp_image"
  image_extension  = "./tmp_image.#{get_image_extension(image)}"
  File.rename( image,  image_extension)

  image_extension
end
get_image_extension(local_file_path) click to toggle source
# File lib/easy_captcha_solver.rb, line 42
def get_image_extension(local_file_path)
  png = Regexp.new("\x89PNG".force_encoding("binary"))
  jpg = Regexp.new("\xff\xd8\xff\xe0\x00\x10JFIF".force_encoding("binary"))
  jpg2 = Regexp.new("\xff\xd8\xff\xe1(.*){2}Exif".force_encoding("binary"))
  case IO.read(local_file_path, 10)
  when /^GIF8/
    'gif'
  when /^#{png}/
    'png'
  when /^#{jpg}/
    'jpg'
  when /^#{jpg2}/
    'jpg'
  else
    mime_type = `file #{local_file_path} --mime-type`.gsub("\n", '') # Works on linux and mac
    raise UnprocessableEntity, "unknown file type" if !mime_type
    mime_type.split(':')[1].split('/')[1].gsub('x-', '').gsub(/jpeg/, 'jpg').gsub(/text/, 'txt').gsub(/x-/, '')
  end
end
solve_captcha(image) click to toggle source
# File lib/easy_captcha_solver.rb, line 34
def solve_captcha(image)
  e = Tesseract::Engine.new {|e|
    e.language  = :eng
    e.blacklist = '|'
  }
  @captcha = e.text_for(image).strip # => 'ABC'
end