module Kudzu::Model::Page
Public Instance Methods
body()
click to toggle source
# File lib/kudzu/model/page.rb, line 63 def body @body end
body=(body)
click to toggle source
# File lib/kudzu/model/page.rb, line 67 def body=(body) @body = body end
css?()
click to toggle source
# File lib/kudzu/model/page.rb, line 23 def css? !mime_type.to_s.match(%r{text/css}).nil? end
decoded_body()
click to toggle source
# File lib/kudzu/model/page.rb, line 79 def decoded_body @decoded_body ||= decode_body(body) end
etag()
click to toggle source
# File lib/kudzu/model/page.rb, line 11 def etag response_header['etag'] end
filtered()
click to toggle source
# File lib/kudzu/model/page.rb, line 71 def filtered @filtered end
filtered=(filtered)
click to toggle source
# File lib/kudzu/model/page.rb, line 75 def filtered=(filtered) @filtered = filtered end
html?()
click to toggle source
# File lib/kudzu/model/page.rb, line 15 def html? !mime_type.to_s.match(%r{text/html|application/xhtml\+xml}).nil? end
js?()
click to toggle source
# File lib/kudzu/model/page.rb, line 27 def js? !mime_type.to_s.match(%r{text/javascript|application/javascript|application/x-javascript}).nil? end
last_modified()
click to toggle source
# File lib/kudzu/model/page.rb, line 4 def last_modified last_modified = response_header['last-modified'] Time.parse(last_modified).localtime if last_modified rescue nil end
parsed_doc()
click to toggle source
# File lib/kudzu/model/page.rb, line 83 def parsed_doc @parsed_doc ||= if html? Nokogiri::HTML(decoded_body) elsif xml? Nokogiri::XML(decoded_body) end end
status_client_error?()
click to toggle source
# File lib/kudzu/model/page.rb, line 43 def status_client_error? 400 <= status && status <= 499 end
status_gone?()
click to toggle source
# File lib/kudzu/model/page.rb, line 59 def status_gone? status == 410 end
status_not_found?()
click to toggle source
# File lib/kudzu/model/page.rb, line 55 def status_not_found? status == 404 end
status_not_modified?()
click to toggle source
# File lib/kudzu/model/page.rb, line 51 def status_not_modified? status == 304 end
status_redirection?()
click to toggle source
# File lib/kudzu/model/page.rb, line 39 def status_redirection? 300 <= status && status <= 399 end
status_server_error?()
click to toggle source
# File lib/kudzu/model/page.rb, line 47 def status_server_error? 500 <= status && status <= 599 end
status_success?()
click to toggle source
# File lib/kudzu/model/page.rb, line 35 def status_success? 200 <= status && status <= 299 end
text?()
click to toggle source
# File lib/kudzu/model/page.rb, line 31 def text? html? || xml? || !mime_type.to_s.match(%r{text/}).nil? end
xml?()
click to toggle source
# File lib/kudzu/model/page.rb, line 19 def xml? !mime_type.to_s.match(%r{text/xml|application/xml|application/rss\+xml|application/atom\+xml}).nil? end
Private Instance Methods
decode_body(body)
click to toggle source
# File lib/kudzu/model/page.rb, line 93 def decode_body(body) if body && text? if enc = find_encoding(body) body.dup.force_encoding(enc).encode('utf-8', invalid: :replace, undef: :replace) else body.dup.encode('utf-8', invalid: :replace, undef: :replace) end else body end end
find_encoding(body)
click to toggle source
# File lib/kudzu/model/page.rb, line 105 def find_encoding(body) begin enc = Encoding.find(charset) rescue ArgumentError return nil end if enc == Encoding::Shift_JIS Encoding::CP932 elsif enc == Encoding::EUC_JP require 'nkf' guessed = NKF.guess(body) [Encoding::EUCJP_MS, Encoding::CP51932].include?(guessed) ? guessed : enc else enc end end