Table of Contents - kudzu-1.2.0 Documentation
Classes and Modules
- Kudzu
- Kudzu::Adapter
- Kudzu::Adapter::Memory
- Kudzu::Adapter::Memory::Frontier
- Kudzu::Adapter::Memory::Link
- Kudzu::Adapter::Memory::Page
- Kudzu::Adapter::Memory::Repository
- Kudzu::Agent
- Kudzu::Agent::Fetcher
- Kudzu::Agent::Http
- Kudzu::Agent::Http::Connection
- Kudzu::Agent::Http::ConnectionPool
- Kudzu::Agent::PageFilterer
- Kudzu::Agent::Reference
- Kudzu::Agent::Response
- Kudzu::Agent::Robots
- Kudzu::Agent::Robots::Parser
- Kudzu::Agent::Robots::Rule
- Kudzu::Agent::Robots::RuleSet
- Kudzu::Agent::Robots::Txt
- Kudzu::Agent::Sleeper
- Kudzu::Agent::UrlExtractor
- Kudzu::Agent::UrlExtractor::ForHTML
- Kudzu::Agent::UrlExtractor::ForXML
- Kudzu::Agent::UrlFilterer
- Kudzu::Agent::Util
- Kudzu::Agent::Util::CharsetDetector
- Kudzu::Agent::Util::ContentTypeParser
- Kudzu::Agent::Util::Matcher
- Kudzu::Agent::Util::MimeTypeDetector
- Kudzu::Agent::Util::TitleParser
- Kudzu::Callback
- Kudzu::Common
- Kudzu::Config
- Kudzu::Config::Delegator
- Kudzu::Config::Filter
- Kudzu::Config::Filter::Delegator
- Kudzu::Crawler
- Kudzu::Model
- Kudzu::Model::Base
- Kudzu::Model::Link
- Kudzu::Model::Page
- Kudzu::ThreadPool
Methods
- ::correct — Kudzu::Agent::Util::CharsetDetector
- ::detect — Kudzu::Agent::Util::CharsetDetector
- ::detect — Kudzu::Agent::Util::MimeTypeDetector
- ::from_body — Kudzu::Agent::Util::MimeTypeDetector
- ::from_header — Kudzu::Agent::Util::MimeTypeDetector
- ::from_html — Kudzu::Agent::Util::CharsetDetector
- ::from_html — Kudzu::Agent::Util::TitleParser
- ::from_text — Kudzu::Agent::Util::CharsetDetector
- ::from_url — Kudzu::Agent::Util::MimeTypeDetector
- ::from_xml — Kudzu::Agent::Util::CharsetDetector
- ::log — Kudzu
- ::match? — Kudzu::Agent::Util::Matcher
- ::match? — Kudzu::Common
- ::match_to_allows? — Kudzu::Agent::Util::Matcher
- ::match_to_denies? — Kudzu::Agent::Util::Matcher
- ::new — Kudzu::Adapter::Memory::Frontier
- ::new — Kudzu::Adapter::Memory::Repository
- ::new — Kudzu::Agent
- ::new — Kudzu::Agent::Fetcher
- ::new — Kudzu::Agent::Http::ConnectionPool
- ::new — Kudzu::Agent::PageFilterer
- ::new — Kudzu::Agent::Robots
- ::new — Kudzu::Agent::Robots::Txt
- ::new — Kudzu::Agent::Robots::RuleSet
- ::new — Kudzu::Agent::Sleeper
- ::new — Kudzu::Agent::UrlExtractor
- ::new — Kudzu::Agent::UrlExtractor::ForHTML
- ::new — Kudzu::Agent::UrlExtractor::ForXML
- ::new — Kudzu::Agent::UrlFilterer
- ::new — Kudzu::Callback
- ::new — Kudzu::Config
- ::new — Kudzu::Config::Delegator
- ::new — Kudzu::Config::Filter
- ::new — Kudzu::Config::Filter::Delegator
- ::new — Kudzu::Crawler
- ::new — Kudzu::Model::Base
- ::new — Kudzu::ThreadPool
- ::parse — Kudzu::Agent::Robots::Parser
- ::parse — Kudzu::Agent::Util::ContentTypeParser
- ::parse — Kudzu::Agent::Util::TitleParser
- ::parse_body — Kudzu::Agent::Robots::Parser
- ::parse_line — Kudzu::Agent::Robots::Parser
- ::path_regexp — Kudzu::Agent::Robots::Parser
- ::path_to_dir — Kudzu::Common
- ::sort — Kudzu::Agent::Robots::Parser
- ::split_line — Kudzu::Agent::Robots::Parser
- ::ua_regexp — Kudzu::Agent::Robots::Parser
- ::unquote — Kudzu::Agent::Util::ContentTypeParser
- #add_filter — Kudzu::Config
- #add_filter — Kudzu::Config::Delegator
- #allowed? — Kudzu::Agent::PageFilterer
- #allowed? — Kudzu::Agent::Robots
- #allowed? — Kudzu::Agent::UrlFilterer
- #allowed_by_robots? — Kudzu::Agent::UrlFilterer
- #allowed_ext? — Kudzu::Agent::UrlFilterer
- #allowed_host? — Kudzu::Agent::UrlFilterer
- #allowed_index? — Kudzu::Agent::PageFilterer
- #allowed_mime_type? — Kudzu::Agent::PageFilterer
- #allowed_path? — Kudzu::Agent::Robots::RuleSet
- #allowed_path? — Kudzu::Agent::UrlFilterer
- #allowed_response_header? — Kudzu::Agent::PageFilterer
- #allowed_size? — Kudzu::Agent::PageFilterer
- #allowed_url? — Kudzu::Agent::UrlFilterer
- #append_cookie — Kudzu::Agent::Fetcher
- #around — Kudzu::Callback
- #body — Kudzu::Model::Page
- #body= — Kudzu::Model::Page
- #build_http — Kudzu::Agent::Fetcher
- #build_request — Kudzu::Agent::Fetcher
- #build_response — Kudzu::Agent::Fetcher
- #checkout — Kudzu::Agent::Http::ConnectionPool
- #clear — Kudzu::Adapter::Memory::Frontier
- #close — Kudzu::Agent::Http::ConnectionPool
- #crawl_delay — Kudzu::Agent::Robots
- #create_thread — Kudzu::ThreadPool
- #css? — Kudzu::Model::Page
- #decode_body — Kudzu::Model::Page
- #decoded_body — Kudzu::Model::Page
- #delay_second — Kudzu::Agent::Sleeper
- #delete — Kudzu::Adapter::Memory::Repository
- #delete_page — Kudzu::Crawler
- #dequeue — Kudzu::Adapter::Memory::Frontier
- #enqueue — Kudzu::Adapter::Memory::Frontier
- #enqueue_links — Kudzu::Crawler
- #etag — Kudzu::Model::Page
- #extract — Kudzu::Agent::UrlExtractor
- #extract — Kudzu::Agent::UrlExtractor::ForHTML
- #extract — Kudzu::Agent::UrlExtractor::ForXML
- #extract_refs — Kudzu::Agent
- #fetch — Kudzu::Agent
- #fetch — Kudzu::Agent::Fetcher
- #fetch — Kudzu::Agent::Robots
- #fetch — Kudzu::Crawler
- #fetch_and_parse — Kudzu::Agent::Robots
- #fetched? — Kudzu::Agent::Response
- #filter — Kudzu::Agent::UrlFilterer
- #filter_response? — Kudzu::Agent
- #filtered — Kudzu::Model::Page
- #filtered= — Kudzu::Model::Page
- #find_by_url — Kudzu::Adapter::Memory::Repository
- #find_encoding — Kudzu::Model::Page
- #find_filter — Kudzu::Config
- #find_set — Kudzu::Agent::Robots
- #find_txt — Kudzu::Agent::Robots
- #finish_http — Kudzu::Agent::Http::ConnectionPool
- #focused_descendants? — Kudzu::Agent::UrlFilterer
- #focused_host? — Kudzu::Agent::UrlFilterer
- #force_header_encoding — Kudzu::Agent::Fetcher
- #from_atom — Kudzu::Agent::UrlExtractor::ForXML
- #from_html — Kudzu::Agent::UrlExtractor::ForHTML
- #from_meta — Kudzu::Agent::UrlExtractor::ForHTML
- #from_rss — Kudzu::Agent::UrlExtractor::ForXML
- #handle_success — Kudzu::Crawler
- #html? — Kudzu::Model::Page
- #js? — Kudzu::Model::Page
- #last_modified — Kudzu::Model::Page
- #multi_thread — Kudzu::Crawler
- #node_to_title — Kudzu::Agent::UrlExtractor::ForHTML
- #nofollow? — Kudzu::Agent::UrlExtractor::ForHTML
- #normalize — Kudzu::Agent::UrlExtractor
- #on — Kudzu::Callback
- #parse_cookie — Kudzu::Agent::Fetcher
- #parsed_doc — Kudzu::Model::Page
- #politeness_delay — Kudzu::Agent::Sleeper
- #pool — Kudzu::Agent::Http::ConnectionPool
- #pool_name — Kudzu::Agent::Fetcher
- #redirection? — Kudzu::Agent::Fetcher
- #reduce — Kudzu::Agent::Http::ConnectionPool
- #refs_to_links — Kudzu::Crawler
- #register — Kudzu::Adapter::Memory::Repository
- #register_page — Kudzu::Crawler
- #run — Kudzu::Crawler
- #run_callback — Kudzu::Crawler
- #sanitize — Kudzu::Agent::UrlExtractor
- #send_request — Kudzu::Agent::Fetcher
- #shutdown — Kudzu::ThreadPool
- #single_thread — Kudzu::Crawler
- #sitemaps — Kudzu::Agent::Robots
- #sleep_second — Kudzu::Agent::Sleeper
- #start — Kudzu::Agent
- #start — Kudzu::ThreadPool
- #start_http — Kudzu::Agent::Fetcher
- #status_client_error? — Kudzu::Model::Page
- #status_gone? — Kudzu::Model::Page
- #status_not_found? — Kudzu::Model::Page
- #status_not_modified? — Kudzu::Model::Page
- #status_redirection? — Kudzu::Model::Link
- #status_redirection? — Kudzu::Model::Page
- #status_server_error? — Kudzu::Model::Page
- #status_success? — Kudzu::Model::Link
- #status_success? — Kudzu::Model::Page
- #text? — Kudzu::Model::Page
- #uri — Kudzu::Model::Link
- #visit_link — Kudzu::Crawler
- #wait — Kudzu::ThreadPool
- #xml? — Kudzu::Model::Page