bes  Updated for version 3.20.8
HttpCache.cc
1 
2 // -*- mode: c++; c-basic-offset:4 -*-
3 
4 // This file is part of the BES http package, part of the Hyrax data server.
5 
6 // Copyright (c) 2020 OPeNDAP, Inc.
7 // Author: Nathan Potter <ndp@opendap.org>
8 //
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
13 //
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
18 //
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 //
23 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
24 
25 // Authors:
26 // ndp Nathan Potter <ndp@opendap.org>
27 
28 #include <config.h>
29 
30 #include <sys/stat.h>
31 
32 #include <string>
33 #include <fstream>
34 #include <sstream>
35 #include <vector>
36 
37 #include <cstdlib>
38 
39 #include "PicoSHA2/picosha2.h"
40 
41 #include <BESInternalError.h>
42 #include <BESDebug.h>
43 #include <BESUtil.h>
44 #include <TheBESKeys.h>
45 
46 #include "HttpCache.h"
47 #include "HttpUtils.h"
48 #include "HttpNames.h"
49 #include "url_impl.h"
50 
51 #ifdef HAVE_ATEXIT
52 #define AT_EXIT(x) atexit((x))
53 #else
54 #define AT_EXIT(x)
55 #endif
56 #define prolog string("HttpCache::").append(__func__).append("() - ")
57 
58 
59 using std::endl;
60 using std::string;
61 using std::vector;
62 using std::stringstream;
63 
64 namespace http {
65 
66  HttpCache *HttpCache::d_instance = 0;
67  bool HttpCache::d_enabled = true;
68 
69  unsigned long HttpCache::getCacheSizeFromConfig() {
70  bool found = false;
71  string size;
72  unsigned long size_in_megabytes = 0;
73  TheBESKeys::TheKeys()->get_value(HTTP_CACHE_SIZE_KEY, size, found);
74 
75  if (found) {
76  std::istringstream iss(size);
77  iss >> size_in_megabytes;
78  } else {
79  stringstream msg;
80  msg << prolog << "The BES Key " << HTTP_CACHE_SIZE_KEY << " is not set.";
81  BESDEBUG(HTTP_MODULE, msg.str() << endl);
82  throw BESInternalError(msg.str(), __FILE__, __LINE__);
83  }
84 
85  return size_in_megabytes;
86  }
87 
88  string HttpCache::getCacheDirFromConfig() {
89  bool found;
90  string subdir = "";
91  TheBESKeys::TheKeys()->get_value(HTTP_CACHE_DIR_KEY, subdir, found);
92 
93  if (!found) {
94  stringstream msg;
95  msg << prolog << "The BES Key " << HTTP_CACHE_DIR_KEY << " is not set.";
96  BESDEBUG(HTTP_MODULE, msg.str() << endl);
97  throw BESInternalError(msg.str(), __FILE__, __LINE__);
98  }
99 
100  return subdir;
101  }
102 
103  string HttpCache::getCachePrefixFromConfig() {
104  bool found;
105  string prefix = "";
106  TheBESKeys::TheKeys()->get_value(HTTP_CACHE_PREFIX_KEY, prefix, found);
107 
108  if (found) {
109  prefix = BESUtil::lowercase(prefix);
110  } else {
111  stringstream msg;
112  msg << prolog << "The BES Key " << HTTP_CACHE_PREFIX_KEY << " is not set.";
113  BESDEBUG(HTTP_MODULE, msg.str() << endl);
114  throw BESInternalError(msg.str(), __FILE__, __LINE__);
115  }
116 
117  return prefix;
118  }
119 
120  HttpCache::HttpCache() {
121  BESDEBUG(HTTP_MODULE, prolog << "BEGIN" << endl);
122 
123  string cacheDir = getCacheDirFromConfig();
124  string cachePrefix = getCachePrefixFromConfig();
125  unsigned long cacheSizeMbytes = getCacheSizeFromConfig();
126 
127  BESDEBUG(HTTP_MODULE, prolog << "Cache configuration params: " << cacheDir << ", " << cachePrefix << ", "
128  << cacheSizeMbytes << endl);
129  initialize(cacheDir, cachePrefix, cacheSizeMbytes);
130 
131  BESDEBUG(HTTP_MODULE, prolog << "END" << endl);
132  }
133 
134 #if 1
135  HttpCache::HttpCache(const string &cache_dir, const string &prefix, unsigned long long size) {
136 
137  BESDEBUG(HTTP_MODULE, prolog << "BEGIN" << endl);
138 
139  initialize(cache_dir, prefix, size);
140 
141  BESDEBUG(HTTP_MODULE, prolog << "END" << endl);
142  }
143 #endif
144 #if 0
145  HttpCache *
146  HttpCache::get_instance(const string &cache_dir, const string &cache_file_prefix,
147  unsigned long long max_cache_size) {
148  if (d_enabled && d_instance == 0) {
149  if (dir_exists(cache_dir)) {
150  d_instance = new HttpCache(cache_dir, cache_file_prefix, max_cache_size);
151  d_enabled = d_instance->cache_enabled();
152  if (!d_enabled) {
153  delete d_instance;
154  d_instance = 0;
155  BESDEBUG(HTTP_MODULE, "HttpCache::" << __func__ << "() - " << "Cache is DISABLED" << endl);
156  } else {
157  AT_EXIT(delete_instance);
158 
159  BESDEBUG(HTTP_MODULE, "HttpCache::" << __func__ << "() - " << "Cache is ENABLED" << endl);
160  }
161  }
162  }
163 
164  return d_instance;
165  }
166 #endif
167 
171  HttpCache *
173  if (d_enabled && d_instance == 0) {
174  try {
175  d_instance = new HttpCache();
176  d_enabled = d_instance->cache_enabled();
177  if (!d_enabled) {
178  delete d_instance;
179  d_instance = 0;
180  BESDEBUG(HTTP_MODULE, prolog << "Cache is DISABLED" << endl);
181  } else {
182  AT_EXIT(delete_instance);
183 
184  BESDEBUG(HTTP_MODULE, prolog << "Cache is ENABLED" << endl);
185  }
186  }
187  catch (BESInternalError &bie) {
188  BESDEBUG(HTTP_MODULE,
189  "[ERROR] HttpCache::get_instance(): Failed to obtain cache! msg: " << bie.get_message()
190  << endl);
191  }
192  }
193 
194  return d_instance;
195  }
196 
197 #if HASH_CACHE_FILENAME
198 
199  string
200  HttpCache::get_hash(const string &s)
201  {
202  if (s.empty()){
203  string msg = "You cannot hash the empty string.";
204  BESDEBUG(HTTP_MODULE, prolog << msg << endl);
205  throw BESInternalError(msg, __FILE__, __LINE__);
206  }
207  return picosha2::hash256_hex_string(s[0] == '/' ? s : "/" + s);
208  }
209 
210 
211  bool is_url(const string &candidate){
212  size_t index = candidate.find(HTTP_PROTOCOL);
213  if(index){
214  index = candidate.find(HTTPS_PROTOCOL);
215  if(index){
216  return false;
217  }
218  }
219  return true;
220  }
221 
222 
230  string get_real_name_extension(const string &identifier){
231  string real_name_extension;
232 
233  string path_part;
234 
235  if(is_url(identifier)) {
236  // Since it's a URL it might have a massive query string attached, and since wee
237  // have no idea what the query parameters mean, we'll just punt and look at the path part of the URL.
238  // We make an instance of http::url which will carve up the URL for us.
239  http::url target_url(identifier);
240  path_part = target_url.path();
241  }
242  else {
243  path_part = identifier;
244  }
245 
246  vector<string> path_elements;
247  // Now that we a "path" (none of that query string mess) we can tokenize it.
248  BESUtil::tokenize(path_part,path_elements);
249  if(!path_elements.empty()){
250  string last = path_elements.back();
251  if(last != path_part)
252  real_name_extension = "#" + last; // This utilizes a hack in libdap
253  }
254  return real_name_extension;
255  }
256 
257 
266  string HttpCache::get_cache_file_name(const string &uid, const string &src_id, bool mangle){
267  stringstream cache_filename;
268  string hashed_part;
269  string real_name_extension;
270  string uid_part;
271 
272  if(!uid.empty())
273  uid_part = uid + "_";
274 
275  if(mangle){
276  hashed_part = get_hash(src_id);
277  }
278  else {
279  hashed_part = src_id;
280  }
281  real_name_extension = get_real_name_extension(src_id);
282 
283  cache_filename << get_cache_file_prefix() << uid_part << hashed_part << real_name_extension;
284 
285  string cf_name = BESUtil::assemblePath(this->get_cache_directory(), cache_filename.str() );
286 
287  return cf_name;
288  }
289 
290 
291  string HttpCache::get_cache_file_name( const string &src, bool mangle){
292  string uid;
293  return get_cache_file_name(uid,src, mangle);
294  }
295 
296 
297 #endif
298 
299 } // namespace http
virtual std::string get_message()
get the error message for this exception
Definition: BESError.h:99
void initialize(const std::string &cache_dir, const std::string &prefix, unsigned long long size)
Initialize an instance of FileLockingCache.
const std::string get_cache_directory()
static bool dir_exists(const std::string &dir)
const std::string get_cache_file_prefix()
exception thrown if internal error encountered
static void tokenize(const std::string &str, std::vector< std::string > &tokens, const std::string &delimiters="/")
Definition: BESUtil.cc:1057
static std::string lowercase(const std::string &s)
Definition: BESUtil.cc:200
static std::string assemblePath(const std::string &firstPart, const std::string &secondPart, bool leadingSlash=false, bool trailingSlash=false)
Assemble path fragments making sure that they are separated by a single '/' character.
Definition: BESUtil.cc:821
void get_value(const std::string &s, std::string &val, bool &found)
Retrieve the value of a given key, if set.
Definition: TheBESKeys.cc:339
static TheBESKeys * TheKeys()
Definition: TheBESKeys.cc:71
A cache for content accessed via HTTP.
Definition: HttpCache.h:54
static HttpCache * get_instance()
Definition: HttpCache.cc:172
virtual std::string get_cache_file_name(const std::string &uid, const std::string &src, bool mangle=true)
Definition: HttpCache.cc:266
utility class for the HTTP catalog module
Definition: EffectiveUrl.cc:58
string get_real_name_extension(const string &identifier)
Definition: HttpCache.cc:230