module Kudzu::Model::Page

Public Instance Methods

body() click to toggle source
# File lib/kudzu/model/page.rb, line 63
def body
  @body
end
body=(body) click to toggle source
# File lib/kudzu/model/page.rb, line 67
def body=(body)
  @body = body
end
css?() click to toggle source
# File lib/kudzu/model/page.rb, line 23
def css?
  !mime_type.to_s.match(%r{text/css}).nil?
end
decoded_body() click to toggle source
# File lib/kudzu/model/page.rb, line 79
def decoded_body
  @decoded_body ||= decode_body(body)
end
etag() click to toggle source
# File lib/kudzu/model/page.rb, line 11
def etag
  response_header['etag']
end
filtered() click to toggle source
# File lib/kudzu/model/page.rb, line 71
def filtered
  @filtered
end
filtered=(filtered) click to toggle source
# File lib/kudzu/model/page.rb, line 75
def filtered=(filtered)
  @filtered = filtered
end
html?() click to toggle source
# File lib/kudzu/model/page.rb, line 15
def html?
  !mime_type.to_s.match(%r{text/html|application/xhtml\+xml}).nil?
end
js?() click to toggle source
# File lib/kudzu/model/page.rb, line 27
def js?
  !mime_type.to_s.match(%r{text/javascript|application/javascript|application/x-javascript}).nil?
end
last_modified() click to toggle source
# File lib/kudzu/model/page.rb, line 4
def last_modified
  last_modified = response_header['last-modified']
  Time.parse(last_modified).localtime if last_modified
rescue
  nil
end
parsed_doc() click to toggle source
# File lib/kudzu/model/page.rb, line 83
def parsed_doc
  @parsed_doc ||= if html?
                    Nokogiri::HTML(decoded_body)
                  elsif xml?
                    Nokogiri::XML(decoded_body)
                  end
end
status_client_error?() click to toggle source
# File lib/kudzu/model/page.rb, line 43
def status_client_error?
  400 <= status && status <= 499
end
status_gone?() click to toggle source
# File lib/kudzu/model/page.rb, line 59
def status_gone?
  status == 410
end
status_not_found?() click to toggle source
# File lib/kudzu/model/page.rb, line 55
def status_not_found?
  status == 404
end
status_not_modified?() click to toggle source
# File lib/kudzu/model/page.rb, line 51
def status_not_modified?
  status == 304
end
status_redirection?() click to toggle source
# File lib/kudzu/model/page.rb, line 39
def status_redirection?
  300 <= status && status <= 399
end
status_server_error?() click to toggle source
# File lib/kudzu/model/page.rb, line 47
def status_server_error?
  500 <= status && status <= 599
end
status_success?() click to toggle source
# File lib/kudzu/model/page.rb, line 35
def status_success?
  200 <= status && status <= 299
end
text?() click to toggle source
# File lib/kudzu/model/page.rb, line 31
def text?
  html? || xml? || !mime_type.to_s.match(%r{text/}).nil?
end
xml?() click to toggle source
# File lib/kudzu/model/page.rb, line 19
def xml?
  !mime_type.to_s.match(%r{text/xml|application/xml|application/rss\+xml|application/atom\+xml}).nil?
end

Private Instance Methods

decode_body(body) click to toggle source
# File lib/kudzu/model/page.rb, line 93
def decode_body(body)
  if body && text?
    if enc = find_encoding(body)
      body.dup.force_encoding(enc).encode('utf-8', invalid: :replace, undef: :replace)
    else
      body.dup.encode('utf-8', invalid: :replace, undef: :replace)
    end
  else
    body
  end
end
find_encoding(body) click to toggle source
# File lib/kudzu/model/page.rb, line 105
def find_encoding(body)
  begin
    enc = Encoding.find(charset)
  rescue ArgumentError
    return nil
  end

  if enc == Encoding::Shift_JIS
    Encoding::CP932
  elsif enc == Encoding::EUC_JP
    require 'nkf'
    guessed = NKF.guess(body)
    [Encoding::EUCJP_MS, Encoding::CP51932].include?(guessed) ? guessed : enc
  else
    enc
  end
end