class SpiderHtml

Constants

VERSION

Public Class Methods

phantom_file(url, file_name, opt={}) click to toggle source

SpiderHtml.phantom_file(“www.baidu.com”, “baidu.html”) SpiderHtml.phantom_file(“www.baidu.com”, “baidu.html”, image_dir: “#{Dir.pwd}/image”, html_dir: “#{Dir.pwd}/html”) 可以默认在项目里面constants/spider_html.yml 可以传入image_dir,html_dir,logger

# File lib/spider_html.rb, line 29
def self.phantom_file(url, file_name, opt={})
  spider_html_path = "#{Dir.pwd}/config/constants/spider_html.yml"
  if File.exist?(spider_html_path)
    spider = YAML.load_file(spider_html_path)
  else
    spider = YAML.load_file(File.join(File.dirname(__FILE__), "spider_html.yml"))
  end
  image_dir = opt[:image_dir].nil?? spider["image_dir"] : opt[:image_dir]
  html_dir = opt[:html_dir].nil?? spider["html_dir"] : opt[:html_dir]
  js_path = File.join(File.dirname(__FILE__), "phantom.js")
  logger = opt[:logger]

  if file_name.include?(".png")
    path = "#{image_dir}/#{file_name}"
  else
    path = "#{html_dir}/#{file_name}"
  end

  dir_path = File.dirname(path)
  FileUtils.mkdir_p(dir_path)

  order = "phantomjs #{js_path} #{url} #{path}"
  self.log_info(logger, "system:#{order}")
  result = system order
  if !result
    self.log_error(logger, "phantomjs error:#{order}")
  end
end
request_http(url, opt={}) click to toggle source

SpiderHtml.request_http(“www.baidu.com”) SpiderHtml.request_http(“www.baidu.com”,{method: post}) opt传入method,默认是get方法 return {body: body, code: code}

# File lib/spider_html.rb, line 12
def self.request_http(url, opt={})
  uri = URI(url)
  if opt[:method] == "post"
    req = Net::HTTP::Post.new(uri)
  else
    req = Net::HTTP::Get.new(uri)
  end
  res = Net::HTTP.start(uri.hostname, uri.port, :use_ssl => uri.scheme == 'https', :ssl_verify_mode => OpenSSL::SSL::VERIFY_NONE) {|http|
    http.request(req)
  }
  return {body: res.body, code: res.code}
end

Private Class Methods

log_error(logger, msg) click to toggle source
# File lib/spider_html.rb, line 67
def self.log_error(logger, msg)
  if logger
    logger.error msg
  else
    p msg
  end
end
log_info(logger, msg) click to toggle source
# File lib/spider_html.rb, line 59
def self.log_info(logger, msg)
  if logger
    logger.info msg
  else
    p msg
  end
end