bes  Updated for version 3.20.8
CmrApi.cc
1 // -*- mode: c++; c-basic-offset:4 -*-
2 
3 // This file is part of cmr_module, A C++ MODULE that can be loaded in to
4 // the OPeNDAP Back-End Server (BES) and is able to handle remote requests.
5 
6 // Copyright (c) 2015 OPeNDAP, Inc.
7 // Author: Nathan Potter <ndp@opendap.org>
8 //
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
13 //
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
18 //
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 //
23 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
24 
25 /*
26  * CmrApi.cc
27  *
28  * Created on: July, 13 2018
29  * Author: ndp
30  */
31 #include <memory>
32 #include "rapidjson/document.h"
33 #include "rapidjson/writer.h"
34 #include "rapidjson/prettywriter.h"
35 #include "rapidjson/stringbuffer.h"
36 #include "rapidjson/filereadstream.h"
37 #include <cstdio>
38 #include <cstring>
39 #include <iostream>
40 #include <sstream>
41 
42 
43 #include <util.h>
44 #include <debug.h>
45 
46 #include <BESError.h>
47 #include <BESSyntaxUserError.h>
48 #include <BESDebug.h>
49 #include <BESUtil.h>
50 #include <TheBESKeys.h>
51 
52 #include "CmrApi.h"
53 #include "CmrNames.h"
54 #include "CmrError.h"
55 #include "rjson_utils.h"
56 
57 using std::string;
58 
59 #define CMR_HOST_URL_KEY "CMR.host.url"
60 #define DEFAULT_CMR_HOST_URL "https://cmr.earthdata.nasa.gov/"
61 #define CMR_SEARCH_SERVICE "/search"
62 #define prolog string("CmrApi::").append(__func__).append("() - ")
63 
64 namespace cmr {
65 
66  CmrApi::CmrApi() : d_cmr_search_endpoint_url(DEFAULT_CMR_HOST_URL){
67  bool found;
68  string cmr_search_endpoint_url;
69  TheBESKeys::TheKeys()->get_value(CMR_HOST_URL_KEY, cmr_search_endpoint_url,found);
70  if(found){
71  d_cmr_search_endpoint_url = cmr_search_endpoint_url;
72  }
73  string search(CMR_SEARCH_SERVICE);
74  if (d_cmr_search_endpoint_url.length() >= search.length()) {
75  if (0 != d_cmr_search_endpoint_url.compare (d_cmr_search_endpoint_url.length() - search.length(), search.length(), search)){
76  d_cmr_search_endpoint_url = BESUtil::pathConcat(d_cmr_search_endpoint_url,search);
77  }
78  }
79  BESDEBUG(MODULE, prolog << "Using CMR search endpoint: " << d_cmr_search_endpoint_url << endl);
80  }
81 
85 const rapidjson::Value&
86 CmrApi::get_children(const rapidjson::Value& obj) {
88 
89  itr = obj.FindMember("children");
90  bool result = itr != obj.MemberEnd();
91  string msg = prolog + (result?"Located":"FAILED to locate") + " the value 'children' in the object.";
92  BESDEBUG(MODULE, msg << endl);
93  if(!result){
94  throw CmrError(msg,__FILE__,__LINE__);
95  }
96 
97  const rapidjson::Value& children = itr->value;
98  result = children.IsArray();
99  msg = prolog + "The value 'children' is" + (result?"":" NOT") + " an array.";
100  BESDEBUG(MODULE, msg << endl);
101  if(!result){
102  throw CmrError(msg,__FILE__,__LINE__);
103  }
104  return children;
105 }
106 
110 const rapidjson::Value&
111 CmrApi::get_feed(const rapidjson::Document &cmr_doc){
112 
113  bool result = cmr_doc.IsObject();
114  string msg = prolog + "Json document is" + (result?"":" NOT") + " an object.";
115  BESDEBUG(MODULE, msg << endl);
116  if(!result){
117  throw CmrError(msg,__FILE__,__LINE__);
118  }
119 
120  //################### feed
121  rapidjson::Value::ConstMemberIterator itr = cmr_doc.FindMember("feed");
122  result = itr != cmr_doc.MemberEnd();
123  msg = prolog + (result?"Located":"FAILED to locate") + " the value 'feed'.";
124  BESDEBUG(MODULE, msg << endl);
125  if(!result){
126  throw CmrError(msg,__FILE__,__LINE__);
127  }
128 
129  const rapidjson::Value& feed = itr->value;
130  result = feed.IsObject();
131  msg = prolog + "The value 'feed' is" + (result?"":" NOT") + " an object.";
132  BESDEBUG(MODULE, msg << endl);
133  if(!result){
134  throw CmrError(msg,__FILE__,__LINE__);
135  }
136  return feed;
137 }
138 
142 const rapidjson::Value&
143 CmrApi::get_entries(const rapidjson::Document &cmr_doc){
144  bool result;
145  string msg;
146 
147  const rapidjson::Value& feed = get_feed(cmr_doc);
148 
149  rapidjson::Value::ConstMemberIterator itr = feed.FindMember("entry");
150  result = itr != feed.MemberEnd();
151  msg = prolog + (result?"Located":"FAILED to locate") + " the value 'entry'.";
152  BESDEBUG(MODULE, msg << endl);
153  if(!result){
154  throw CmrError(msg,__FILE__,__LINE__);
155  }
156 
157  const rapidjson::Value& entry = itr->value;
158  result = entry.IsArray();
159  msg = prolog + "The value 'entry' is" + (result?"":" NOT") + " an Array.";
160  BESDEBUG(MODULE, msg << endl);
161  if(!result){
162  throw CmrError(msg,__FILE__,__LINE__);
163  }
164  return entry;
165 }
166 
170 const rapidjson::Value&
171 CmrApi::get_temporal_group(const rapidjson::Document &cmr_doc){
172  rjson_utils ru;
173 
174  bool result;
175  string msg;
176  const rapidjson::Value& feed = get_feed(cmr_doc);
177 
178  //################### facets
179  rapidjson::Value::ConstMemberIterator itr = feed.FindMember("facets");
180  result = itr != feed.MemberEnd();
181  msg = prolog + (result?"Located":"FAILED to locate") + " the value 'facets'." ;
182  BESDEBUG(MODULE, msg << endl);
183  if(!result){
184  throw CmrError(msg,__FILE__,__LINE__);
185  }
186 
187  const rapidjson::Value& facets_obj = itr->value;
188  result = facets_obj.IsObject();
189  msg = prolog + "The value 'facets' is" + (result?"":" NOT") + " an object.";
190  BESDEBUG(MODULE, msg << endl);
191  if(!result){
192  throw CmrError(msg,__FILE__,__LINE__);
193  }
194 
195  const rapidjson::Value& facets = get_children(facets_obj);
196  for (rapidjson::SizeType i = 0; i < facets.Size(); i++) { // Uses SizeType instead of size_t
197  const rapidjson::Value& facet = facets[i];
198 
199  string facet_title = ru.getStringValue(facet,"title");
200  string temporal_title("Temporal");
201  if(facet_title == temporal_title){
202  msg = prolog + "Found Temporal object.";
203  BESDEBUG(MODULE, msg << endl);
204  return facet;
205  }
206  else {
207  msg = prolog + "The child of 'facets' with title '"+facet_title+"' does not match 'Temporal'";
208  BESDEBUG(MODULE, msg << endl);
209  }
210  }
211  msg = prolog + "Failed to locate the Temporal facet.";
212  BESDEBUG(MODULE, msg << endl);
213  throw CmrError(msg,__FILE__,__LINE__);
214 
215 } // CmrApi::get_temporal_group()
216 
220 const rapidjson::Value&
221 CmrApi::get_year_group(const rapidjson::Document &cmr_doc){
222  rjson_utils rju;
223  string msg;
224 
225  const rapidjson::Value& temporal_group = get_temporal_group(cmr_doc);
226  const rapidjson::Value& temporal_children = get_children(temporal_group);
227  for (rapidjson::SizeType j = 0; j < temporal_children.Size(); j++) { // Uses SizeType instead of size_t
228  const rapidjson::Value& temporal_child = temporal_children[j];
229 
230  string temporal_child_title = rju.getStringValue(temporal_child,"title");
231  string year_title("Year");
232  if(temporal_child_title == year_title){
233  msg = prolog + "Found Year object.";
234  BESDEBUG(MODULE, msg << endl);
235  return temporal_child;
236  }
237  else {
238  msg = prolog + "The child of 'Temporal' with title '"+temporal_child_title+"' does not match 'Year'";
239  BESDEBUG(MODULE, msg << endl);
240  }
241  }
242  msg = prolog + "Failed to locate the Year group.";
243  BESDEBUG(MODULE, msg << endl);
244  throw CmrError(msg,__FILE__,__LINE__);
245 }
246 
250 const rapidjson::Value&
251 CmrApi::get_month_group(const string r_year, const rapidjson::Document &cmr_doc){
252  rjson_utils rju;
253  string msg;
254 
255  const rapidjson::Value& year_group = get_year_group(cmr_doc);
256  const rapidjson::Value& years = get_children(year_group);
257  for (rapidjson::SizeType i = 0; i < years.Size(); i++) { // Uses SizeType instead of size_t
258  const rapidjson::Value& year_obj = years[i];
259 
260  string year_title = rju.getStringValue(year_obj,"title");
261  if(r_year == year_title){
262  msg = prolog + "Found Year object.";
263  BESDEBUG(MODULE, msg << endl);
264 
265  const rapidjson::Value& year_children = get_children(year_obj);
266  for (rapidjson::SizeType j = 0; j < year_children.Size(); j++) { // Uses SizeType instead of size_t
267  const rapidjson::Value& child = year_children[i];
268  string title = rju.getStringValue(child,"title");
269  string month_title("Month");
270  if(title == month_title){
271  msg = prolog + "Found Month object.";
272  BESDEBUG(MODULE, msg << endl);
273  return child;
274  }
275  else {
276  msg = prolog + "The child of 'Year' with title '"+title+"' does not match 'Month'";
277  BESDEBUG(MODULE, msg << endl);
278  }
279  }
280  }
281  else {
282  msg = prolog + "The child of 'Year' group with title '"+year_title+"' does not match the requested year ("+r_year+")";
283  BESDEBUG(MODULE, msg << endl);
284  }
285  }
286  msg = prolog + "Failed to locate the Year group.";
287  BESDEBUG(MODULE, msg << endl);
288  throw CmrError(msg,__FILE__,__LINE__);
289 }
290 
291 const rapidjson::Value&
292 CmrApi::get_month(const string r_month, const string r_year, const rapidjson::Document &cmr_doc){
293  rjson_utils rju;
294  stringstream msg;
295 
296  const rapidjson::Value& month_group = get_month_group(r_year,cmr_doc);
297  const rapidjson::Value& months = get_children(month_group);
298  for (rapidjson::SizeType i = 0; i < months.Size(); i++) { // Uses SizeType instead of size_t
299  const rapidjson::Value& month = months[i];
300  string month_id = rju.getStringValue(month,"title");
301  if(month_id == r_month){
302  msg.str("");
303  msg << prolog << "Located requested month ("<<r_month << ")";
304  BESDEBUG(MODULE, msg.str() << endl);
305  return month;
306  }
307  else {
308  msg.str("");
309  msg << prolog << "The month titled '"<<month_id << "' does not match the requested month ("<< r_month <<")";
310  BESDEBUG(MODULE, msg.str() << endl);
311  }
312  }
313  msg.str("");
314  msg << prolog << "Failed to locate request Year/Month.";
315  BESDEBUG(MODULE, msg.str() << endl);
316  throw CmrError(msg.str(),__FILE__,__LINE__);
317 }
318 
319 const rapidjson::Value&
320 CmrApi::get_day_group(const string r_month, const string r_year, const rapidjson::Document &cmr_doc){
321  rjson_utils rju;
322  stringstream msg;
323 
324  const rapidjson::Value& month = get_month(r_month, r_year, cmr_doc);
325  const rapidjson::Value& month_children = get_children(month);
326 
327  for (rapidjson::SizeType k = 0; k < month_children.Size(); k++) { // Uses SizeType instead of size_t
328  const rapidjson::Value& object = month_children[k];
329  string title = rju.getStringValue(object,"title");
330  string day_group_title = "Day";
331  if(title == day_group_title){
332  msg.str("");
333  msg << prolog << "Located Day group for year: " << r_year << " month: "<< r_month;
334  BESDEBUG(MODULE, msg.str() << endl);
335  return object;
336  }
337  }
338  msg.str("");
339  msg << prolog << "Failed to locate requested Day year: " << r_year << " month: "<< r_month;
340  BESDEBUG(MODULE, msg.str() << endl);
341  throw CmrError(msg.str(),__FILE__,__LINE__);
342 }
343 
344 
351 void
352 CmrApi::get_years(string collection_name, vector<string> &years_result){
353  rjson_utils rju;
354  // bool result;
355  string msg;
356 
357  string url = BESUtil::assemblePath(d_cmr_search_endpoint_url, "granules.json") +
358  "?concept_id=" + collection_name + "&include_facets=v2";
359 
361  rju.getJsonDoc(url,doc);
362 
363  const rapidjson::Value& year_group = get_year_group(doc);
364  const rapidjson::Value& years = get_children(year_group);
365  for (rapidjson::SizeType k = 0; k < years.Size(); k++) { // Uses SizeType instead of size_t
366  const rapidjson::Value& year_obj = years[k];
367  string year = rju.getStringValue(year_obj,"title");
368  years_result.push_back(year);
369  }
370 } // CmrApi::get_years()
371 
372 
381 void
382 CmrApi::get_months(string collection_name, string r_year, vector<string> &months_result){
383  rjson_utils rju;
384 
385  stringstream msg;
386 
387  string url = BESUtil::assemblePath(d_cmr_search_endpoint_url, "granules.json")
388  + "?concept_id="+collection_name
389  +"&include_facets=v2"
390  +"&temporal_facet[0][year]="+r_year;
391 
393  rju.getJsonDoc(url,doc);
394  BESDEBUG(MODULE, prolog << "Got JSON Document: "<< endl << rju.jsonDocToString(doc) << endl);
395 
396  const rapidjson::Value& year_group = get_year_group(doc);
397  const rapidjson::Value& years = get_children(year_group);
398  if(years.Size() != 1){
399  msg.str("");
400  msg << prolog << "We expected to get back one year (" << r_year << ") but we got back " << years.Size();
401  BESDEBUG(MODULE, msg.str() << endl);
402  throw CmrError(msg.str(),__FILE__,__LINE__);
403  }
404 
405  const rapidjson::Value& year = years[0];
406  string year_title = rju.getStringValue(year,"title");
407  if(year_title != r_year){
408  msg.str("");
409  msg << prolog << "The returned year (" << year_title << ") does not match the requested year ("<< r_year << ")";
410  BESDEBUG(MODULE, msg.str() << endl);
411  throw CmrError(msg.str(),__FILE__,__LINE__);
412  }
413 
414  const rapidjson::Value& year_children = get_children(year);
415  if(year_children.Size() != 1){
416  msg.str("");
417  msg << prolog << "We expected to get back one child for the year (" << r_year << ") but we got back " << years.Size();
418  BESDEBUG(MODULE, msg.str() << endl);
419  throw CmrError(msg.str(),__FILE__,__LINE__);
420  }
421 
422  const rapidjson::Value& month_group = year_children[0];
423  string title = rju.getStringValue(month_group,"title");
424  if(title != string("Month")){
425  msg.str("");
426  msg << prolog << "We expected to get back a Month object, but we did not.";
427  BESDEBUG(MODULE, msg.str() << endl);
428  throw CmrError(msg.str(),__FILE__,__LINE__);
429  }
430 
431  const rapidjson::Value& months = get_children(month_group);
432  for (rapidjson::SizeType i = 0; i < months.Size(); i++) { // Uses SizeType instead of size_t
433  const rapidjson::Value& month = months[i];
434  string month_id = rju.getStringValue(month,"title");
435  months_result.push_back(month_id);
436  }
437  return;
438 
439 } // CmrApi::get_months()
440 
444 void
445 CmrApi::get_days(string collection_name, string r_year, string r_month, vector<string> &days_result){
446  rjson_utils rju;
447  stringstream msg;
448 
449  string url = BESUtil::assemblePath(d_cmr_search_endpoint_url, "granules.json")
450  + "?concept_id="+collection_name
451  +"&include_facets=v2"
452  +"&temporal_facet[0][year]="+r_year
453  +"&temporal_facet[0][month]="+r_month;
454 
455  rapidjson::Document cmr_doc;
456  rju.getJsonDoc(url,cmr_doc);
457  BESDEBUG(MODULE, prolog << "Got JSON Document: "<< endl << rju.jsonDocToString(cmr_doc) << endl);
458 
459  const rapidjson::Value& day_group = get_day_group(r_month, r_year, cmr_doc);
460  const rapidjson::Value& days = get_children(day_group);
461  for (rapidjson::SizeType i = 0; i < days.Size(); i++) { // Uses SizeType instead of size_t
462  const rapidjson::Value& day = days[i];
463  string day_id = rju.getStringValue(day,"title");
464  days_result.push_back(day_id);
465  }
466 }
467 
468 
469 
473 void
474 CmrApi::get_granule_ids(string collection_name, string r_year, string r_month, string r_day, vector<string> &granules_ids){
475  rjson_utils rju;
476  stringstream msg;
477  rapidjson::Document cmr_doc;
478 
479  granule_search(collection_name, r_year, r_month, r_day, cmr_doc);
480 
481  const rapidjson::Value& entries = get_entries(cmr_doc);
482  for (rapidjson::SizeType i = 0; i < entries.Size(); i++) { // Uses SizeType instead of size_t
483  const rapidjson::Value& granule = entries[i];
484  string day_id = rju.getStringValue(granule,"id");
485  granules_ids.push_back(day_id);
486  }
487 
488 }
489 
490 
494 unsigned long
495 CmrApi::granule_count(string collection_name, string r_year, string r_month, string r_day){
496  stringstream msg;
497  rapidjson::Document cmr_doc;
498  granule_search(collection_name, r_year, r_month, r_day, cmr_doc);
499  const rapidjson::Value& entries = get_entries(cmr_doc);
500  return entries.Size();
501 }
502 
507 void
508 CmrApi::granule_search(string collection_name, string r_year, string r_month, string r_day, rapidjson::Document &result_doc){
509  rjson_utils rju;
510 
511  string url = BESUtil::assemblePath(d_cmr_search_endpoint_url, "granules.json")
512  + "?concept_id="+collection_name
513  + "&include_facets=v2"
514  + "&page_size=2000";
515 
516  if(!r_year.empty())
517  url += "&temporal_facet[0][year]="+r_year;
518 
519  if(!r_month.empty())
520  url += "&temporal_facet[0][month]="+r_month;
521 
522  if(!r_day.empty())
523  url += "&temporal_facet[0][day]="+r_day;
524 
525  BESDEBUG(MODULE, prolog << "CMR Granule Search Request Url: : " << url << endl);
526  rju.getJsonDoc(url,result_doc);
527  BESDEBUG(MODULE, prolog << "Got JSON Document: "<< endl << rju.jsonDocToString(result_doc) << endl);
528 }
529 
530 
531 
535 void
536 CmrApi::get_granules(string collection_name, string r_year, string r_month, string r_day, vector<Granule *> &granules){
537  stringstream msg;
538  rapidjson::Document cmr_doc;
539 
540  granule_search(collection_name, r_year, r_month, r_day, cmr_doc);
541 
542  const rapidjson::Value& entries = get_entries(cmr_doc);
543  for (rapidjson::SizeType i = 0; i < entries.Size(); i++) { // Uses SizeType instead of size_t
544  const rapidjson::Value& granule_obj = entries[i];
545  // rapidjson::Value grnl(granule_obj, cmr_doc.GetAllocator());
546  Granule *g = new Granule(granule_obj);
547  granules.push_back(g);
548  }
549 
550 }
551 
552 
553 
554 void
555 CmrApi::get_collection_ids(std::vector<std::string> &collection_ids){
556  bool found = false;
557  string key = CMR_COLLECTIONS;
558  TheBESKeys::TheKeys()->get_values(CMR_COLLECTIONS, collection_ids, found);
559  if(!found){
560  throw BESInternalError(string("The '") +CMR_COLLECTIONS
561  + "' field has not been configured.", __FILE__, __LINE__);
562  }
563 }
564 
565 
569 cmr::Granule* CmrApi::get_granule(string collection_name, string r_year, string r_month, string r_day, string granule_id)
570 {
571  vector<Granule *> granules;
572  Granule *result = 0;
573 
574  get_granules(collection_name, r_year, r_month, r_day, granules);
575  for(size_t i=0; i<granules.size() ;i++){
576  string id = granules[i]->getName();
577  BESDEBUG(MODULE, prolog << "Comparing granule id: " << granule_id << " to collection member id: " << id << endl);
578  if( id == granule_id){
579  result = granules[i];
580  }
581  else {
582  delete granules[i];
583  granules[i] = 0;
584  }
585  }
586  return result;
587 }
588 
589 
590 
591 
592 
593 
594 } // namespace cmr
595 
exception thrown if internal error encountered
static std::string pathConcat(const std::string &firstPart, const std::string &secondPart, char separator='/')
Concatenate path fragments making sure that they are separated by a single '/' character.
Definition: BESUtil.cc:772
static std::string assemblePath(const std::string &firstPart, const std::string &secondPart, bool leadingSlash=false, bool trailingSlash=false)
Assemble path fragments making sure that they are separated by a single '/' character.
Definition: BESUtil.cc:821
(Constant) member iterator for a JSON object value
Definition: document.h:177
void get_value(const std::string &s, std::string &val, bool &found)
Retrieve the value of a given key, if set.
Definition: TheBESKeys.cc:339
static TheBESKeys * TheKeys()
Definition: TheBESKeys.cc:71
void get_values(const std::string &s, std::vector< std::string > &vals, bool &found)
Retrieve the values of a given key, if set.
Definition: TheBESKeys.cc:370
std::string jsonDocToString(rapidjson::Document &d)
Definition: rjson_utils.cc:110
void getJsonDoc(const std::string &url, rapidjson::Document &d)
Definition: rjson_utils.cc:57
std::string getStringValue(const rapidjson::Value &object, const std::string &name)
Definition: rjson_utils.cc:84
GenericValue< UTF8<> > Value
GenericValue with UTF8 encoding.
Definition: document.h:2189
GenericDocument< UTF8<> > Document
GenericDocument with UTF8 encoding.
Definition: document.h:2585
RAPIDJSON_NAMESPACE_BEGIN typedef unsigned SizeType
Size type (for string lengths, array sizes, etc.)
Definition: rapidjson.h:384