125 strm << BESIndent::LMarg << prolog <<
"(this: " << (
void *)
this <<
")" << endl;
127 strm << BESIndent::LMarg <<
"d_skip_regex: " << (d_skip_regex?d_skip_regex->pattern():
"WAS NOT SET") << endl;
128 if (!d_effective_urls.empty()) {
129 strm << BESIndent::LMarg <<
"effective url list:" << endl;
131 auto it = d_effective_urls.begin();
132 while( it!= d_effective_urls.end()){
133 strm << BESIndent::LMarg << (*it).first <<
" --> " << (*it).second->str();
136 BESIndent::UnIndent();
139 strm << BESIndent::LMarg <<
"effective url list: EMPTY" << endl;
141 BESIndent::UnIndent();
189 std::lock_guard<std::mutex> lock_me(d_cache_lock_mutex);
191 BESDEBUG(MODULE, prolog <<
"BEGIN url: " << source_url->str() << endl);
192 BESDEBUG(MODULE_DUMPER, prolog <<
"dump: " << endl <<
dump() << endl);
195 BESDEBUG(MODULE, prolog <<
"CACHE IS DISABLED." << endl);
201 if (source_url->str().find(HTTP_PROTOCOL) != 0 && source_url->str().find(HTTPS_PROTOCOL) != 0) {
202 BESDEBUG(MODULE, prolog <<
"END Not an HTTP request, SKIPPING." << endl);
206 BESRegex *skip_regex = get_skip_regex();
208 size_t match_length = 0;
209 match_length = skip_regex->
match(source_url->str().c_str(), source_url->str().length());
210 if (match_length == source_url->str().length()) {
211 BESDEBUG(MODULE, prolog <<
"END Candidate url matches the "
212 "no_redirects_regex_pattern [" << skip_regex->pattern() <<
213 "][match_length=" << match_length <<
"] SKIPPING." << endl);
216 BESDEBUG(MODULE, prolog <<
"Candidate url: '" << source_url->str() <<
"' does NOT match the "
217 "skip_regex pattern [" << skip_regex->pattern() <<
"]" << endl);
220 BESDEBUG(MODULE, prolog <<
"The cache_effective_urls_skip_regex() was NOT SET "<< endl);
223 shared_ptr<http::EffectiveUrl> effective_url = get_cached_eurl(source_url->str());
227 bool retrieve_and_cache = !effective_url;
233 BESDEBUG(MODULE, prolog <<
"Cache hit for: " << source_url->str() << endl);
234 retrieve_and_cache = effective_url->is_expired();
235 BESDEBUG(MODULE, prolog <<
"Cached target URL is " << (retrieve_and_cache?
"":
"not ") <<
"expired." << endl);
239 if(retrieve_and_cache){
240 BESDEBUG(MODULE, prolog <<
"Acquiring effective URL for " << source_url->str() << endl);
244 sw.
start(prolog +
"Retrieve and cache effective url for source url: " + source_url->str());
245 effective_url = curl::retrieve_effective_url(source_url);
247 BESDEBUG(MODULE, prolog <<
" source_url: " << source_url->str() <<
" (" << (source_url->is_trusted()?
"":
"NOT ") <<
"trusted)" << endl);
248 BESDEBUG(MODULE, prolog <<
"effective_url: " << effective_url->dump() <<
" (" << (source_url->is_trusted()?
"":
"NOT ") <<
"trusted)" << endl);
250 d_effective_urls[source_url->str()] = effective_url;
252 BESDEBUG(MODULE, prolog <<
"Updated record for "<< source_url->str() <<
" cache size: " << d_effective_urls.size() << endl);
260 effective_url = shared_ptr<EffectiveUrl>(
new EffectiveUrl(effective_url));
266 effective_url = shared_ptr<EffectiveUrl>(
new EffectiveUrl(effective_url,source_url->is_trusted()));
269 BESDEBUG(MODULE_DUMPER, prolog <<
"dump: " << endl <<
dump() << endl);
271 BESDEBUG(MODULE, prolog <<
"END" << endl);
273 return effective_url;