class TaskStruct
爬取任务类
Attributes
callback[RW]
connection_options[RW]
convert_to_utf8[RW]
custom_data[RW]
errback[RW]
href[RW]
http_method[RW]
local_path[RW]
overwrite_exist[RW]
parse_method[RW]
request_object[RW]
request_options[RW]
stream_callback[RW]
Public Class Methods
new(href, local_path = :nil, http_method: :get, custom_data: nil, parse_method: nil, callback: nil, errback: nil, stream_callback: nil, convert_to_utf8: false, overwrite_exist: false, redirects: 3, keepalive: nil, file: nil, path: nil, query: nil, body: nil, head: nil, connect_timeout: 60, inactivity_timeout: nil, ssl: nil, bind: nil, proxy: nil)
click to toggle source
-
href 请求链接
-
local_path
保存数据的本地路径(保存文件的情况下此路径作为去重标准) -
http_method
http方法,取值::get, :head, :delete, :put, :post, :patch, :options -
custom_data
自定义数据 -
parse_method
解析保存文件的回调,参数是TaskStruct对象本身
# File lib/list_spider.rb, line 18 def initialize(href, # 请求链接 local_path = :nil, # 保存数据的本地路径(保存文件的情况下此路径作为去重标准) # http方法,取值::get, :head, :delete, :put, :post, :patch, :options http_method: :get, custom_data: nil, # 自定义数据 parse_method: nil, # 解析保存文件的回调,参数是TaskStruct对象本身 # 请求成功后的回调,此时可能没有保存文件,比如301,404 # 参数是TaskStruct对象本身和对应的EventMachine::HttpRequest对象 # http_req.response_header.status 状态码 # http_req.response_header 返回头 # http_req.response 返回体 callback: nil, # 请求失败后的回调 # 参数是TaskStruct对象本身和对应的EventMachine::HttpRequest对象 errback: nil, stream_callback: nil, # 流数据处理回调 convert_to_utf8: false, # 是否转换为utf8编码 overwrite_exist: false, # 是否覆盖现有文件 # 请求设置 redirects: 3, # 重定向次数 keepalive: nil, # (暂不支持复用) file: nil, # 要上传的文件路径 path: nil, # 请求路径,在流水线方式请求时有用(暂不支持) query: nil, # 查询字符串,可以是string或hash类型 body: nil, # 请求体,可以是string或hash类型 head: nil, # 请求头 # 连接设置 connect_timeout: 60, # 连接超时时间 inactivity_timeout: nil, # 连接后超时时间 # ssl设置 # ssl: { # :private_key_file => '/tmp/server.key', # :cert_chain_file => '/tmp/server.crt', # :verify_peer => false # } ssl: nil, # bind: { # :host => '123.123.123.123', # use a specific interface for outbound request # :port => '123' # } bind: nil, # 代理设置 # proxy: { # :host => '127.0.0.1', # proxy address # :port => 9000, # proxy port # :type => :socks5 # default proxy mode is HTTP proxy, change to :socks5 if required # :authorization => ['user', 'pass'] # proxy authorization header # } proxy: nil) @href = href @local_path = local_path @http_method = http_method @custom_data = custom_data @parse_method = parse_method @callback = callback @errback = errback @stream_callback = stream_callback @convert_to_utf8 = convert_to_utf8 @overwrite_exist = overwrite_exist @request_options = { redirects: redirects, keepalive: keepalive, file: file, path: path, query: query, body: body, head: head }.compact @connection_options = { connect_timeout: connect_timeout, inactivity_timeout: inactivity_timeout, ssl: ssl, bind: bind, proxy: proxy }.compact end