class MediaWiki::Gateway

Constants

USER_AGENT

Attributes

default_user_agent[RW]
cookies[R]
headers[R]
log[R]
wiki_url[R]

Public Class Methods

new(url, options = {}, http_options = {}) click to toggle source

Set up a MediaWiki::Gateway for a given MediaWiki installation

url

Path to API of target MediaWiki (eg. 'en.wikipedia.org/w/api.php')

options

Hash of options

http_options

Hash of options for RestClient::Request (via http_send)

Options:

:bot

When set to true, executes API queries with the bot parameter (see www.mediawiki.org/wiki/API:Edit#Parameters). Defaults to false.

:ignorewarnings

Log API warnings and invalid page titles, instead throwing MediaWiki::APIError

:limit

Maximum number of results returned per search (see www.mediawiki.org/wiki/API:Query_-_Lists#Limits), defaults to the MediaWiki default of 500.

:logdevice

Log device to use. Defaults to STDERR

:loglevel

Log level to use, defaults to Logger::WARN. Set to Logger::DEBUG to dump every request and response to the log.

:maxlag

Maximum allowed server lag (see www.mediawiki.org/wiki/Manual:Maxlag_parameter), defaults to 5 seconds.

:retry_count

Number of times to try before giving up if MediaWiki returns 503 Service Unavailable, defaults to 3 (original request plus two retries).

:retry_delay

Seconds to wait before retry if MediaWiki returns 503 Service Unavailable, defaults to 10 seconds.

:user_agent

User-Agent header to send with requests, defaults to ::default_user_agent or nil.

   # File lib/media_wiki/gateway.rb
34 def initialize(url, options = {}, http_options = {})
35   @options = {
36     bot:         false,
37     limit:       500,
38     logdevice:   STDERR,
39     loglevel:    Logger::WARN,
40     max_results: 500,
41     maxlag:      5,
42     retry_count: 3,
43     retry_delay: 10,
44     user_agent:  self.class.default_user_agent
45   }.merge(options)
46 
47   @log = Logger.new(@options[:logdevice])
48   @log.level = @options[:loglevel]
49 
50   @http_options, @wiki_url, @cookies, @headers = http_options, url, {}, {
51     'User-Agent'      => [@options[:user_agent], USER_AGENT].compact.join(' '),
52     'Accept-Encoding' => 'gzip'
53   }
54 end

Public Instance Methods

send_request(form_data, continue_xpath = nil) click to toggle source

Make generic request to API

form_data

hash of attributes to post

continue_xpath

XPath selector for query continue parameter

Returns XML document

   # File lib/media_wiki/gateway.rb
64 def send_request(form_data, continue_xpath = nil)
65   make_api_request(form_data, continue_xpath).first
66 end

Private Instance Methods

get_response(res) click to toggle source

Get API XML response If there are errors or warnings, raise APIError Otherwise return XML root

    # File lib/media_wiki/gateway.rb
202 def get_response(res)
203   begin
204     res = res.force_encoding('UTF-8') if res.respond_to?(:force_encoding)
205     doc = REXML::Document.new(res).root
206   rescue REXML::ParseException
207     raise MediaWiki::Exception.new('Response is not XML.  Are you sure you are pointing to api.php?')
208   end
209 
210   log.debug("RES: #{doc}")
211 
212   unless %w[api mediawiki].include?(doc.name)
213     raise MediaWiki::Exception.new("Response does not contain Mediawiki API XML: #{res}")
214   end
215 
216   if error = doc.elements['error']
217     raise APIError.new(*error.attributes.values_at(*%w[code info]))
218   end
219 
220   if warnings = doc.elements['warnings']
221     warning("API warning: #{warnings.children.map(&:text).join(', ')}")
222   end
223 
224   doc
225 end
get_token(type, page_titles) click to toggle source

Fetch token (type 'delete', 'edit', 'email', 'import', 'move', 'protect')

   # File lib/media_wiki/gateway.rb
71 def get_token(type, page_titles)
72   res = send_request(
73     'action'  => 'query',
74     'prop'    => 'info',
75     'intoken' => type,
76     'titles'  => page_titles
77   )
78 
79   unless token = res.elements['query/pages/page'].attributes[type + 'token']
80     raise Unauthorized.new "User is not permitted to perform this operation: #{type}"
81   end
82 
83   token
84 end
http_send(url, form_data, headers, &block) click to toggle source

Execute the HTTP request using either GET or POST as appropriate. @yieldparam response

    # File lib/media_wiki/gateway.rb
176 def http_send url, form_data, headers, &block
177   opts = @http_options.merge(url: url, headers: headers)
178   opts[:method] = form_data['action'] == 'query' ? :get : :post
179   opts[:method] == :get ? headers[:params] = form_data : opts[:payload] = form_data
180 
181   log.debug("#{opts[:method].upcase}: #{form_data.inspect}, #{@cookies.inspect}")
182 
183   RestClient::Request.execute(opts) do |response, request, result|
184     # When a block is passed to RestClient::Request.execute, we must
185     # manually handle response codes ourselves. If no block is passed,
186     # then redirects are automatically handled, but HTTP errors also
187     # result in exceptions being raised. For now, we manually check for
188     # HTTP 503 errors (see: #make_api_request), but we must also manually
189     # handle HTTP redirects.
190     if [301, 302, 307].include?(response.code) && request.method == :get
191       response = response.follow_redirection(request, result)
192     end
193 
194     block.call(response)
195   end
196 
197 end
iterate_query(list, res_xpath, attr, param, options, &block) click to toggle source

Iterate over query results

list

list name to query

res_xpath

XPath selector for results

attr

attribute name to extract, if any

param

parameter name to continue query

options

additional query options

Yields each attribute value, or, if attr is nil, each REXML::Element.

    # File lib/media_wiki/gateway.rb
 95 def iterate_query(list, res_xpath, attr, param, options, &block)
 96   items, block = [], lambda { |item| items << item } unless block
 97 
 98   attribute_names = %w[from continue].map { |name|
 99     "name()='#{param[0, 2]}#{name}'"
100   }
101 
102   req_xpath = "//query-continue/#{list}/@*[#{attribute_names.join(' or ')}]"
103   res_xpath = "//query/#{list}/#{res_xpath}" unless res_xpath.start_with?('/')
104 
105   options, continue = options.merge('action' => 'query', 'list' => list), nil
106 
107   loop {
108     res, continue = make_api_request(options, req_xpath)
109 
110     REXML::XPath.match(res, res_xpath).each { |element|
111       block[attr ? element.attributes[attr] : element]
112     }
113 
114     continue ? options[param] = continue : break
115   }
116 
117   items
118 end
make_api_request(form_data, continue_xpath = nil, retry_count = 1) click to toggle source

Make generic request to API

form_data

hash of attributes to post

continue_xpath

XPath selector for query continue parameter

retry_count

Counter for retries

Returns array of XML document and query continue parameter.

    # File lib/media_wiki/gateway.rb
127 def make_api_request(form_data, continue_xpath = nil, retry_count = 1)
128   form_data.update('format' => 'xml', 'maxlag' => @options[:maxlag])
129 
130   http_send(@wiki_url, form_data, @headers.merge(cookies: @cookies)) do |response|
131     if response.code == 503 && retry_count < @options[:retry_count]
132       log.warn("503 Service Unavailable: #{response.body}.  Retry in #{@options[:retry_delay]} seconds.")
133       sleep(@options[:retry_delay])
134       make_api_request(form_data, continue_xpath, retry_count + 1)
135     end
136 
137     # Check response for errors and return XML
138     unless response.code >= 200 && response.code < 300
139       raise MediaWiki::Exception.new("Bad response: #{response}")
140     end
141 
142     doc = get_response(response.dup)
143 
144     # login and createaccount actions require a second request with a token received on the first request
145     if %w[login createaccount].include?(action = form_data['action'])
146       action_result = doc.elements[action].attributes['result']
147       @cookies.update(response.cookies)
148 
149       case action_result.downcase
150         when 'success'
151           return [doc, false]
152         when 'needtoken'
153           token = doc.elements[action].attributes['token']
154 
155           if action == 'login'
156             return make_api_request(form_data.merge('lgtoken' => token))
157           elsif action == 'createaccount'
158             return make_api_request(form_data.merge('token' => token))
159           end
160         else
161           if action == 'login'
162             raise Unauthorized.new("Login failed: #{action_result}")
163           elsif action == 'createaccount'
164             raise Unauthorized.new("Account creation failed: #{action_result}")
165           end
166       end
167     end
168 
169     return [doc, (continue_xpath && doc.elements['query-continue']) ?
170       REXML::XPath.first(doc, continue_xpath) : nil]
171   end
172 end
valid_page?(page) click to toggle source
    # File lib/media_wiki/gateway.rb
235 def valid_page?(page)
236   page && !page.attributes['missing'] && (!page.attributes['invalid'] ||
237     warning("Invalid title '#{page.attributes['title']}'"))
238 end
validate_options(options, valid_options) click to toggle source
    # File lib/media_wiki/gateway.rb
227 def validate_options(options, valid_options)
228   options.each_key { |opt|
229     unless valid_options.include?(opt.to_s)
230       raise ArgumentError, "Unknown option '#{opt}'", caller(1)
231     end
232   }
233 end
warning(msg) click to toggle source
    # File lib/media_wiki/gateway.rb
240 def warning(msg)
241   raise APIError.new('warning', msg) unless @options[:ignorewarnings]
242   log.warn(msg)
243   false
244 end