class WebScrapingAI::SelectedHTMLApi
Attributes
Public Class Methods
# File lib/webscraping_ai/api/selected_html_api.rb, line 19 def initialize(api_client = ApiClient.default) @api_client = api_client end
Public Instance Methods
HTML of a selected page area by URL and CSS selector Returns just HTML on success, JSON on error @param url [String] URL of the target page @param [Hash] opts the optional parameters @option opts [String] :selector CSS selector (null by default, returns whole page HTML) @option opts [Hash<String, String>] :headers HTTP headers to pass to the target page. Can be specified either via a nested query parameter (…&headers=value1&headers==value2) or as a JSON encoded object (…&headers={"One": "value1", "Another": "value2"}) @option opts [Integer] :timeout Maximum processing time in ms. Increase it in case of timeout errors (5000 by default, maximum is 30000) (default to 5000) @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default), costs 2 requests (default to true) @option opts [String] :proxy Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default) (default to 'datacenter') @return [String]
# File lib/webscraping_ai/api/selected_html_api.rb, line 32 def get_selected(url, opts = {}) data, _status_code, _headers = get_selected_with_http_info(url, opts) data end
HTML of multiple page areas by URL and CSS selectors Always returns JSON @param url [String] URL of the target page @param [Hash] opts the optional parameters @option opts [Array<String>] :selectors Multiple CSS selectors (null by default, returns whole page HTML) @option opts [Hash<String, String>] :headers HTTP headers to pass to the target page. Can be specified either via a nested query parameter (…&headers=value1&headers==value2) or as a JSON encoded object (…&headers={"One": "value1", "Another": "value2"}) @option opts [Integer] :timeout Maximum processing time in ms. Increase it in case of timeout errors (5000 by default, maximum is 30000) (default to 5000) @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default), costs 2 requests (default to true) @option opts [String] :proxy Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default) (default to 'datacenter') @return [Array<String>]
# File lib/webscraping_ai/api/selected_html_api.rb, line 122 def get_selected_multiple(url, opts = {}) data, _status_code, _headers = get_selected_multiple_with_http_info(url, opts) data end
HTML of multiple page areas by URL and CSS selectors Always returns JSON @param url [String] URL of the target page @param [Hash] opts the optional parameters @option opts [Array<String>] :selectors Multiple CSS selectors (null by default, returns whole page HTML) @option opts [Hash<String, String>] :headers HTTP headers to pass to the target page. Can be specified either via a nested query parameter (…&headers=value1&headers==value2) or as a JSON encoded object (…&headers={"One": "value1", "Another": "value2"}) @option opts [Integer] :timeout Maximum processing time in ms. Increase it in case of timeout errors (5000 by default, maximum is 30000) @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default), costs 2 requests @option opts [String] :proxy Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default) @return [Array<(Array<String>, Integer, Hash)>] Array<String> data, response status code and response headers
# File lib/webscraping_ai/api/selected_html_api.rb, line 137 def get_selected_multiple_with_http_info(url, opts = {}) if @api_client.config.debugging @api_client.config.logger.debug 'Calling API: SelectedHTMLApi.get_selected_multiple ...' end # verify the required parameter 'url' is set if @api_client.config.client_side_validation && url.nil? fail ArgumentError, "Missing the required parameter 'url' when calling SelectedHTMLApi.get_selected_multiple" end if @api_client.config.client_side_validation && !opts[:'timeout'].nil? && opts[:'timeout'] > 30000 fail ArgumentError, 'invalid value for "opts[:"timeout"]" when calling SelectedHTMLApi.get_selected_multiple, must be smaller than or equal to 30000.' end if @api_client.config.client_side_validation && !opts[:'timeout'].nil? && opts[:'timeout'] < 1 fail ArgumentError, 'invalid value for "opts[:"timeout"]" when calling SelectedHTMLApi.get_selected_multiple, must be greater than or equal to 1.' end allowable_values = ["datacenter", "residential"] if @api_client.config.client_side_validation && opts[:'proxy'] && !allowable_values.include?(opts[:'proxy']) fail ArgumentError, "invalid value for \"proxy\", must be one of #{allowable_values}" end # resource path local_var_path = '/selected-multiple' # query parameters query_params = opts[:query_params] || {} query_params[:'url'] = url query_params[:'selectors'] = @api_client.build_collection_param(opts[:'selectors'], :multi) if !opts[:'selectors'].nil? query_params[:'headers'] = opts[:'headers'] if !opts[:'headers'].nil? query_params[:'timeout'] = opts[:'timeout'] if !opts[:'timeout'].nil? query_params[:'js'] = opts[:'js'] if !opts[:'js'].nil? query_params[:'proxy'] = opts[:'proxy'] if !opts[:'proxy'].nil? # header parameters header_params = opts[:header_params] || {} # HTTP header 'Accept' (if needed) header_params['Accept'] = @api_client.select_header_accept(['application/json']) # form parameters form_params = opts[:form_params] || {} # http body (model) post_body = opts[:body] # return_type return_type = opts[:return_type] || 'Array<String>' # auth_names auth_names = opts[:auth_names] || ['api_key'] new_options = opts.merge( :header_params => header_params, :query_params => query_params, :form_params => form_params, :body => post_body, :auth_names => auth_names, :return_type => return_type ) data, status_code, headers = @api_client.call_api(:GET, local_var_path, new_options) if @api_client.config.debugging @api_client.config.logger.debug "API called: SelectedHTMLApi#get_selected_multiple\nData: #{data.inspect}\nStatus code: #{status_code}\nHeaders: #{headers}" end return data, status_code, headers end
HTML of a selected page area by URL and CSS selector Returns just HTML on success, JSON on error @param url [String] URL of the target page @param [Hash] opts the optional parameters @option opts [String] :selector CSS selector (null by default, returns whole page HTML) @option opts [Hash<String, String>] :headers HTTP headers to pass to the target page. Can be specified either via a nested query parameter (…&headers=value1&headers==value2) or as a JSON encoded object (…&headers={"One": "value1", "Another": "value2"}) @option opts [Integer] :timeout Maximum processing time in ms. Increase it in case of timeout errors (5000 by default, maximum is 30000) @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default), costs 2 requests @option opts [String] :proxy Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default) @return [Array<(String, Integer, Hash)>] String data, response status code and response headers
# File lib/webscraping_ai/api/selected_html_api.rb, line 47 def get_selected_with_http_info(url, opts = {}) if @api_client.config.debugging @api_client.config.logger.debug 'Calling API: SelectedHTMLApi.get_selected ...' end # verify the required parameter 'url' is set if @api_client.config.client_side_validation && url.nil? fail ArgumentError, "Missing the required parameter 'url' when calling SelectedHTMLApi.get_selected" end if @api_client.config.client_side_validation && !opts[:'timeout'].nil? && opts[:'timeout'] > 30000 fail ArgumentError, 'invalid value for "opts[:"timeout"]" when calling SelectedHTMLApi.get_selected, must be smaller than or equal to 30000.' end if @api_client.config.client_side_validation && !opts[:'timeout'].nil? && opts[:'timeout'] < 1 fail ArgumentError, 'invalid value for "opts[:"timeout"]" when calling SelectedHTMLApi.get_selected, must be greater than or equal to 1.' end allowable_values = ["datacenter", "residential"] if @api_client.config.client_side_validation && opts[:'proxy'] && !allowable_values.include?(opts[:'proxy']) fail ArgumentError, "invalid value for \"proxy\", must be one of #{allowable_values}" end # resource path local_var_path = '/selected' # query parameters query_params = opts[:query_params] || {} query_params[:'url'] = url query_params[:'selector'] = opts[:'selector'] if !opts[:'selector'].nil? query_params[:'headers'] = opts[:'headers'] if !opts[:'headers'].nil? query_params[:'timeout'] = opts[:'timeout'] if !opts[:'timeout'].nil? query_params[:'js'] = opts[:'js'] if !opts[:'js'].nil? query_params[:'proxy'] = opts[:'proxy'] if !opts[:'proxy'].nil? # header parameters header_params = opts[:header_params] || {} # HTTP header 'Accept' (if needed) header_params['Accept'] = @api_client.select_header_accept(['application/json', 'text/html']) # form parameters form_params = opts[:form_params] || {} # http body (model) post_body = opts[:body] # return_type return_type = opts[:return_type] || 'String' # auth_names auth_names = opts[:auth_names] || ['api_key'] new_options = opts.merge( :header_params => header_params, :query_params => query_params, :form_params => form_params, :body => post_body, :auth_names => auth_names, :return_type => return_type ) data, status_code, headers = @api_client.call_api(:GET, local_var_path, new_options) if @api_client.config.debugging @api_client.config.logger.debug "API called: SelectedHTMLApi#get_selected\nData: #{data.inspect}\nStatus code: #{status_code}\nHeaders: #{headers}" end return data, status_code, headers end
HTML of a selected page areas by URL and CSS selector, with POST request to the target page Returns just HTML on success, JSON on error. Request body will be passed to the target page. @param url [String] URL of the target page @param [Hash] opts the optional parameters @option opts [String] :selector CSS selector (null by default, returns whole page HTML) @option opts [Hash<String, String>] :headers HTTP headers to pass to the target page. Can be specified either via a nested query parameter (…&headers=value1&headers==value2) or as a JSON encoded object (…&headers={"One": "value1", "Another": "value2"}) @option opts [Integer] :timeout Maximum processing time in ms. Increase it in case of timeout errors (5000 by default, maximum is 30000) (default to 5000) @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default), costs 2 requests (default to true) @option opts [String] :proxy Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default) (default to 'datacenter') @option opts [Hash<String, Object>] :request_body Request body to pass to the target page @return [String]
# File lib/webscraping_ai/api/selected_html_api.rb, line 213 def post_selected(url, opts = {}) data, _status_code, _headers = post_selected_with_http_info(url, opts) data end
HTML of multiple page areas by URL and CSS selectors, with POST request to the target page Always returns JSON. Request body will be passed to the target page. @param url [String] URL of the target page @param [Hash] opts the optional parameters @option opts [Array<String>] :selectors Multiple CSS selectors (null by default, returns whole page HTML) @option opts [Hash<String, String>] :headers HTTP headers to pass to the target page. Can be specified either via a nested query parameter (…&headers=value1&headers==value2) or as a JSON encoded object (…&headers={"One": "value1", "Another": "value2"}) @option opts [Integer] :timeout Maximum processing time in ms. Increase it in case of timeout errors (5000 by default, maximum is 30000) (default to 5000) @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default), costs 2 requests (default to true) @option opts [String] :proxy Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default) (default to 'datacenter') @option opts [Hash<String, Object>] :request_body Request body to pass to the target page @return [Array<String>]
# File lib/webscraping_ai/api/selected_html_api.rb, line 307 def post_selected_multiple(url, opts = {}) data, _status_code, _headers = post_selected_multiple_with_http_info(url, opts) data end
HTML of multiple page areas by URL and CSS selectors, with POST request to the target page Always returns JSON. Request body will be passed to the target page. @param url [String] URL of the target page @param [Hash] opts the optional parameters @option opts [Array<String>] :selectors Multiple CSS selectors (null by default, returns whole page HTML) @option opts [Hash<String, String>] :headers HTTP headers to pass to the target page. Can be specified either via a nested query parameter (…&headers=value1&headers==value2) or as a JSON encoded object (…&headers={"One": "value1", "Another": "value2"}) @option opts [Integer] :timeout Maximum processing time in ms. Increase it in case of timeout errors (5000 by default, maximum is 30000) @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default), costs 2 requests @option opts [String] :proxy Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default) @option opts [Hash<String, Object>] :request_body Request body to pass to the target page @return [Array<(Array<String>, Integer, Hash)>] Array<String> data, response status code and response headers
# File lib/webscraping_ai/api/selected_html_api.rb, line 323 def post_selected_multiple_with_http_info(url, opts = {}) if @api_client.config.debugging @api_client.config.logger.debug 'Calling API: SelectedHTMLApi.post_selected_multiple ...' end # verify the required parameter 'url' is set if @api_client.config.client_side_validation && url.nil? fail ArgumentError, "Missing the required parameter 'url' when calling SelectedHTMLApi.post_selected_multiple" end if @api_client.config.client_side_validation && !opts[:'timeout'].nil? && opts[:'timeout'] > 30000 fail ArgumentError, 'invalid value for "opts[:"timeout"]" when calling SelectedHTMLApi.post_selected_multiple, must be smaller than or equal to 30000.' end if @api_client.config.client_side_validation && !opts[:'timeout'].nil? && opts[:'timeout'] < 1 fail ArgumentError, 'invalid value for "opts[:"timeout"]" when calling SelectedHTMLApi.post_selected_multiple, must be greater than or equal to 1.' end allowable_values = ["datacenter", "residential"] if @api_client.config.client_side_validation && opts[:'proxy'] && !allowable_values.include?(opts[:'proxy']) fail ArgumentError, "invalid value for \"proxy\", must be one of #{allowable_values}" end # resource path local_var_path = '/selected-multiple' # query parameters query_params = opts[:query_params] || {} query_params[:'url'] = url query_params[:'selectors'] = @api_client.build_collection_param(opts[:'selectors'], :multi) if !opts[:'selectors'].nil? query_params[:'headers'] = opts[:'headers'] if !opts[:'headers'].nil? query_params[:'timeout'] = opts[:'timeout'] if !opts[:'timeout'].nil? query_params[:'js'] = opts[:'js'] if !opts[:'js'].nil? query_params[:'proxy'] = opts[:'proxy'] if !opts[:'proxy'].nil? # header parameters header_params = opts[:header_params] || {} # HTTP header 'Accept' (if needed) header_params['Accept'] = @api_client.select_header_accept(['application/json']) # HTTP header 'Content-Type' header_params['Content-Type'] = @api_client.select_header_content_type(['application/json', 'application/x-www-form-urlencoded', 'application/xml', 'text/plain']) # form parameters form_params = opts[:form_params] || {} # http body (model) post_body = opts[:body] || @api_client.object_to_http_body(opts[:'request_body']) # return_type return_type = opts[:return_type] || 'Array<String>' # auth_names auth_names = opts[:auth_names] || ['api_key'] new_options = opts.merge( :header_params => header_params, :query_params => query_params, :form_params => form_params, :body => post_body, :auth_names => auth_names, :return_type => return_type ) data, status_code, headers = @api_client.call_api(:POST, local_var_path, new_options) if @api_client.config.debugging @api_client.config.logger.debug "API called: SelectedHTMLApi#post_selected_multiple\nData: #{data.inspect}\nStatus code: #{status_code}\nHeaders: #{headers}" end return data, status_code, headers end
HTML of a selected page areas by URL and CSS selector, with POST request to the target page Returns just HTML on success, JSON on error. Request body will be passed to the target page. @param url [String] URL of the target page @param [Hash] opts the optional parameters @option opts [String] :selector CSS selector (null by default, returns whole page HTML) @option opts [Hash<String, String>] :headers HTTP headers to pass to the target page. Can be specified either via a nested query parameter (…&headers=value1&headers==value2) or as a JSON encoded object (…&headers={"One": "value1", "Another": "value2"}) @option opts [Integer] :timeout Maximum processing time in ms. Increase it in case of timeout errors (5000 by default, maximum is 30000) @option opts [Boolean] :js Execute on-page JavaScript using a headless browser (true by default), costs 2 requests @option opts [String] :proxy Type of proxy, use residential proxies if your site restricts traffic from datacenters (datacenter by default) @option opts [Hash<String, Object>] :request_body Request body to pass to the target page @return [Array<(String, Integer, Hash)>] String data, response status code and response headers
# File lib/webscraping_ai/api/selected_html_api.rb, line 229 def post_selected_with_http_info(url, opts = {}) if @api_client.config.debugging @api_client.config.logger.debug 'Calling API: SelectedHTMLApi.post_selected ...' end # verify the required parameter 'url' is set if @api_client.config.client_side_validation && url.nil? fail ArgumentError, "Missing the required parameter 'url' when calling SelectedHTMLApi.post_selected" end if @api_client.config.client_side_validation && !opts[:'timeout'].nil? && opts[:'timeout'] > 30000 fail ArgumentError, 'invalid value for "opts[:"timeout"]" when calling SelectedHTMLApi.post_selected, must be smaller than or equal to 30000.' end if @api_client.config.client_side_validation && !opts[:'timeout'].nil? && opts[:'timeout'] < 1 fail ArgumentError, 'invalid value for "opts[:"timeout"]" when calling SelectedHTMLApi.post_selected, must be greater than or equal to 1.' end allowable_values = ["datacenter", "residential"] if @api_client.config.client_side_validation && opts[:'proxy'] && !allowable_values.include?(opts[:'proxy']) fail ArgumentError, "invalid value for \"proxy\", must be one of #{allowable_values}" end # resource path local_var_path = '/selected' # query parameters query_params = opts[:query_params] || {} query_params[:'url'] = url query_params[:'selector'] = opts[:'selector'] if !opts[:'selector'].nil? query_params[:'headers'] = opts[:'headers'] if !opts[:'headers'].nil? query_params[:'timeout'] = opts[:'timeout'] if !opts[:'timeout'].nil? query_params[:'js'] = opts[:'js'] if !opts[:'js'].nil? query_params[:'proxy'] = opts[:'proxy'] if !opts[:'proxy'].nil? # header parameters header_params = opts[:header_params] || {} # HTTP header 'Accept' (if needed) header_params['Accept'] = @api_client.select_header_accept(['application/json', 'text/html']) # HTTP header 'Content-Type' header_params['Content-Type'] = @api_client.select_header_content_type(['application/json', 'application/x-www-form-urlencoded', 'application/xml', 'text/plain']) # form parameters form_params = opts[:form_params] || {} # http body (model) post_body = opts[:body] || @api_client.object_to_http_body(opts[:'request_body']) # return_type return_type = opts[:return_type] || 'String' # auth_names auth_names = opts[:auth_names] || ['api_key'] new_options = opts.merge( :header_params => header_params, :query_params => query_params, :form_params => form_params, :body => post_body, :auth_names => auth_names, :return_type => return_type ) data, status_code, headers = @api_client.call_api(:POST, local_var_path, new_options) if @api_client.config.debugging @api_client.config.logger.debug "API called: SelectedHTMLApi#post_selected\nData: #{data.inspect}\nStatus code: #{status_code}\nHeaders: #{headers}" end return data, status_code, headers end