class TaskStruct

爬取任务类

Attributes

callback[RW]
connection_options[RW]
convert_to_utf8[RW]
custom_data[RW]
errback[RW]
href[RW]
http_method[RW]
local_path[RW]
overwrite_exist[RW]
parse_method[RW]
request_object[RW]
request_options[RW]
stream_callback[RW]

Public Class Methods

new(href, local_path = :nil, http_method: :get, custom_data: nil, parse_method: nil, callback: nil, errback: nil, stream_callback: nil, convert_to_utf8: false, overwrite_exist: false, redirects: 3, keepalive: nil, file: nil, path: nil, query: nil, body: nil, head: nil, connect_timeout: 60, inactivity_timeout: nil, ssl: nil, bind: nil, proxy: nil) click to toggle source
  • href 请求链接

  • local_path 保存数据的本地路径(保存文件的情况下此路径作为去重标准)

  • http_method http方法,取值::get, :head, :delete, :put, :post, :patch, :options

  • custom_data 自定义数据

  • parse_method 解析保存文件的回调,参数是TaskStruct对象本身

# File lib/list_spider.rb, line 18
def initialize(href, # 请求链接
               local_path = :nil, # 保存数据的本地路径(保存文件的情况下此路径作为去重标准)
               # http方法,取值::get, :head, :delete, :put, :post, :patch, :options
               http_method: :get,
               custom_data: nil, # 自定义数据
               parse_method: nil, # 解析保存文件的回调,参数是TaskStruct对象本身
               # 请求成功后的回调,此时可能没有保存文件,比如301,404
               # 参数是TaskStruct对象本身和对应的EventMachine::HttpRequest对象
               # http_req.response_header.status 状态码
               # http_req.response_header  返回头
               # http_req.response 返回体
               callback: nil,
               # 请求失败后的回调
               # 参数是TaskStruct对象本身和对应的EventMachine::HttpRequest对象
               errback: nil,
               stream_callback: nil, # 流数据处理回调
               convert_to_utf8: false, # 是否转换为utf8编码
               overwrite_exist: false, # 是否覆盖现有文件
               # 请求设置
               redirects: 3, # 重定向次数
               keepalive: nil, # (暂不支持复用)
               file: nil, # 要上传的文件路径
               path: nil, # 请求路径,在流水线方式请求时有用(暂不支持)
               query: nil, # 查询字符串,可以是string或hash类型
               body: nil, # 请求体,可以是string或hash类型
               head: nil, # 请求头
               # 连接设置
               connect_timeout: 60, # 连接超时时间
               inactivity_timeout: nil, # 连接后超时时间
               # ssl设置
               # ssl: {
               #     :private_key_file => '/tmp/server.key',
               #     :cert_chain_file => '/tmp/server.crt',
               #     :verify_peer => false
               # }
               ssl: nil,
               # bind: {
               #     :host => '123.123.123.123',   # use a specific interface for outbound request
               #     :port => '123'
               # }
               bind: nil,
               # 代理设置
               # proxy: {
               #     :host => '127.0.0.1',    # proxy address
               #     :port => 9000,           # proxy port
               #     :type => :socks5         # default proxy mode is HTTP proxy, change to :socks5 if required

               #     :authorization => ['user', 'pass']  # proxy authorization header
               # }
               proxy: nil)
  @href = href
  @local_path = local_path
  @http_method = http_method
  @custom_data = custom_data
  @parse_method = parse_method
  @callback = callback
  @errback = errback
  @stream_callback = stream_callback
  @convert_to_utf8 = convert_to_utf8
  @overwrite_exist = overwrite_exist

  @request_options = {
    redirects: redirects,
    keepalive: keepalive,
    file: file,
    path: path,
    query: query,
    body: body,
    head: head
  }.compact

  @connection_options = {
    connect_timeout: connect_timeout,
    inactivity_timeout: inactivity_timeout,
    ssl: ssl,
    bind: bind,
    proxy: proxy
  }.compact
end