# Generated from medusa-crawler-1.0.0.gem by gem2rpm -*- rpm-spec -*-
%global gem_name medusa-crawler

Name: rubygem-%{gem_name}
Version: 1.0.0
Release: 1%{?dist}
Summary: Medusa is a ruby crawler framework
License: MIT
URL: https://github.com/brutuscat/medusa-crawler
Source0: https://rubygems.org/gems/%{gem_name}-%{version}.gem
BuildRequires: ruby(release)
BuildRequires: rubygems-devel
BuildRequires: ruby >= 2.3.0
BuildArch: noarch

%description
== Medusa: a ruby crawler framework
{rdoc-image:https://badge.fury.io/rb/medusa-crawler.svg}[https://rubygems.org/gems/medusa-crawler]
rdoc-image:https://github.com/brutuscat/medusa-crawler/workflows/Ruby/badge.svg?event=push
Medusa is a framework for the ruby language to crawl and collect useful
information about the pages
it visits. It is versatile, allowing you to write your own specialized tasks
quickly and easily.
=== Features
* Choose the links to follow on each page with +focus_crawl+
* Multi-threaded design for high performance
* Tracks +301+ HTTP redirects
* Allows exclusion of URLs based on regular expressions
* Records response time for each page
* Obey _robots.txt_ directives (optional, but recommended)
* In-memory or persistent storage of pages during crawl, provided by
Moneta[https://github.com/moneta-rb/moneta]
* Inherits OpenURI behavior (redirects, automatic charset and encoding
detection, proxy configuration options).
<b>Do you have an idea or a suggestion? {Open an issue and talk about
it}[https://github.com/brutuscat/medusa-crawler/issues/new]</b>
=== Examples
Medusa is versatile and to be used programatically, you can start with one or
multiple URIs:
require 'medusa'
Medusa.crawl('https://www.example.com', depth_limit: 2)
Or you can pass a block and it will yield the crawler back, to manage
configuration or drive its crawling focus:
require 'medusa'
Medusa.crawl('https://www.example.com', depth_limit: 2) do |crawler|
crawler.discard_page_bodies = some_flag
# Persist all the pages state across crawl-runs.
crawler.clear_on_startup = false
crawler.storage = Medusa::Storage.Moneta(:Redis,
'redis://redis.host.name:6379/0')
crawler.skip_links_like(/private/)
crawler.on_pages_like(/public/) do |page|
logger.debug "[public page]  #{page.url} took #{page.response_time} found
#{page.links.count}"
end
# Use an arbitrary logic, page by page, to continue customize the crawling.
crawler.focus_crawl(/public/) do |page|
page.links.first
end
end.
.


%package doc
Summary: Documentation for %{name}
Requires: %{name} = %{version}-%{release}
BuildArch: noarch

%description doc
Documentation for %{name}.

%prep
%setup -q -n %{gem_name}-%{version}

%build
# Create the gem as gem install only works on a gem file
gem build ../%{gem_name}-%{version}.gemspec

# %%gem_install compiles any C extensions and installs the gem into ./%%gem_dir
# by default, so that we can move it into the buildroot in %%install
%gem_install

%install
mkdir -p %{buildroot}%{gem_dir}
cp -a .%{gem_dir}/* \
        %{buildroot}%{gem_dir}/



%check
pushd .%{gem_instdir}
# Run the test suite.
popd

%files
%dir %{gem_instdir}
%license %{gem_instdir}/LICENSE.txt
%{gem_instdir}/VERSION
%{gem_libdir}
%exclude %{gem_cache}
%{gem_spec}

%files doc
%doc %{gem_docdir}
%doc %{gem_instdir}/CHANGELOG.md
%doc %{gem_instdir}/CONTRIBUTORS.md
%doc %{gem_instdir}/README.rdoc
%{gem_instdir}/Rakefile
%{gem_instdir}/spec

%changelog
* Tue Aug 24 2021 mockbuilder - 1.0.0-1
- Initial package