bes  Updated for version 3.20.8
SuperChunky.cc
1 // -*- mode: c++; c-basic-offset:4 -*-
2 
3 // This file is part of the BES
4 // Copyright (c) 2020 OPeNDAP, Inc.
5 // Author: Nathan Potter<ndp@opendap.org>
6 //
7 // This library is free software; you can redistribute it and/or
8 // modify it under the terms of the GNU Lesser General Public
9 // License as published by the Free Software Foundation; either
10 // version 2.1 of the License, or (at your option) any later version.
11 //
12 // This library is distributed in the hope that it will be useful,
13 // but WITHOUT ANY WARRANTY; without even the implied warranty of
14 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 // Lesser General Public License for more details.
16 //
17 // You should have received a copy of the GNU Lesser General Public
18 // License along with this library; if not, write to the Free Software
19 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 //
21 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
22 //
23 // Created by ndp on 12/2/20.
24 //
25 #include "config.h"
26 
27 #include <vector>
28 #include <string>
29 #include <sstream>
30 #include <iostream>
31 #include <fstream>
32 
33 #include "BaseType.h"
34 #include "D4Group.h"
35 
36 #include "BESInternalError.h"
37 #include "BESInternalFatalError.h"
38 #include "BESDebug.h"
39 #include "BESUtil.h"
40 #include "TheBESKeys.h"
41 #include "BESLog.h"
42 #include "BESStopWatch.h"
43 #include "BESIndent.h"
44 
45 #include "DmrppNames.h"
46 #include "DMRpp.h"
47 #include "DmrppD4Group.h"
48 #include "DmrppArray.h"
49 #include "DmrppParserSax2.h"
50 #include "DmrppTypeFactory.h"
51 
52 #include "SuperChunk.h"
53 
54 #define prolog std::string("superchunky::").append(__func__).append("() - ")
55 
56 namespace dmrpp {
57 
58 bool debug = true;
59 
60 
61 void compute_super_chunks(dmrpp::DmrppArray *array, bool only_constrained, vector<SuperChunk *> &super_chunks){
62 
63  // Now we get the chunkyness
64  auto chunk_dim_sizes = array->get_chunk_dimension_sizes();
65  //unsigned int chunk_size_in_elements = array->get_chunk_size_in_elements();
66  auto chunks = array->get_immutable_chunks();
67 
68  //unsigned long long super_chunk_index = 0;
69  auto currentSuperChunk = new SuperChunk();
70  super_chunks.push_back(currentSuperChunk); // first super chunk...
71  if(debug) cout << "SuperChunking array: "<< array->name() << endl;
72 
73  for(const auto &chunk:chunks){
74  bool was_added = currentSuperChunk->add_chunk(chunk);
75  if(!was_added){
76  if(debug) {
77  unsigned long long next_contiguous_chunk_offset = currentSuperChunk->get_offset() + currentSuperChunk->get_size();
78  unsigned long long gap_size;
79  bool is_behind = false;
80  if(chunk->get_offset() > next_contiguous_chunk_offset){
81  gap_size = chunk->get_offset() - next_contiguous_chunk_offset;
82  }
83  else {
84  is_behind = true;
85  gap_size = next_contiguous_chunk_offset - chunk->get_offset();
86  }
87  stringstream msg;
88  msg << "FOUND GAP chunk(offset: " << chunk->get_offset();
89  msg << " size: " << chunk->get_size() << ")";
90  msg << " SuperChunk(ptr: " << (void *) currentSuperChunk;
91  msg << " offset: " << currentSuperChunk->get_offset();
92  msg << " size: " << currentSuperChunk->get_size();
93  msg << " next_contiguous_chunk_offset: " << next_contiguous_chunk_offset << ") ";
94  msg << " gap_size: " << gap_size;
95  msg << " bytes" << (is_behind?" behind":" beyond") << " target offset";
96  msg << endl;
97  cerr << msg.str();
98  }
99  // If we were working on a SuperChunk (i.e. the current SuperChunk contains chunks)
100  // then we need to start a new one.
101  if(!currentSuperChunk->empty()){
102  currentSuperChunk = new SuperChunk();
103  super_chunks.push_back(currentSuperChunk); // next super chunk...
104  }
105  bool add_first_successful = currentSuperChunk->add_chunk(chunk);
106  if(!add_first_successful)
107  throw BESInternalError("ERROR: Failed to add first Chunk to a new SuperChunk."+
108  chunk->to_string() ,__FILE__,__LINE__);
109 
110  }
111  }
112  // Dump the currentSuperChunk if it doesn't have anything in it.
113  if(currentSuperChunk->empty()) {
114  super_chunks.pop_back();
115  delete currentSuperChunk;
116  }
117  if(false){
118  cout << "SuperChunk Inventory For Array: " << array->name() << endl;
119  for(auto super_chunk: super_chunks) {
120  cout << super_chunk->to_string(true) << endl;
121  }
122  }
123 }
124 void compute_super_chunks(libdap::BaseType *var, bool only_constrained, vector<SuperChunk *> &super_chunks) {
125  if (var->is_simple_type())
126  return;
127  if (var->is_constructor_type())
128  return;
129  if (var->is_vector_type()) {
130  auto array = dynamic_cast<dmrpp::DmrppArray *>(var);
131  if (array) {
132  if(debug) cout << "Found DmrppArray: "<< array->name() << endl;
133  compute_super_chunks(array, only_constrained, super_chunks);
134  }
135  else {
136  BESDEBUG(MODULE, prolog << "The variable: "<< var->name()
137  << " is not an instance of DmrppArray. SKIPPING"<< endl);
138  }
139  }
140 }
141 
142 #if 0
143 void inventory_super_chunks(libdap::BaseType *var, bool only_constrained, vector<SuperChunk *> &super_chunks){
144  if(var->is_simple_type())
145  return;
146  if(var->is_constructor_type())
147  return;
148  if(var->is_vector_type()){
149  auto array = dynamic_cast<DmrppArray*>(var);
150  if(array){
151  // Now we get the chunkyness
152  auto chunk_dim_sizes = array->get_chunk_dimension_sizes();
153  //unsigned int chunk_size_in_elements = array->get_chunk_size_in_elements();
154  auto chunks = array->get_immutable_chunks();
155  unsigned long long next_contiguous_chunk_offset = 0;
156 
157  //unsigned long long super_chunk_index = 0;
158  vector<vector<const Chunk *> *> super_chunks;
159  auto currentSuperChunk = new vector<const Chunk *>();
160  super_chunks.push_back(currentSuperChunk); // first super chunk...
161 
162  if(debug) cout << "SuperChunking array: "<< array->name() << endl;
163 
164  bool first = true;
165  for(auto chunk:chunks){
166  auto current_offset = chunk.get_offset();
167  auto current_size = chunk.get_size();
168  // auto c_pia = chunk.get_position_in_array();
169 
170  if(!first){
171  if(current_offset!=next_contiguous_chunk_offset){
172  // The current chunk is not contiguous with the previous
173  unsigned long long gap_size = current_offset - next_contiguous_chunk_offset;
174  if(debug) {
175  cout << "FOUND GAP current_offset: " << current_offset <<
176  " nbytes: " << current_offset <<
177  " next_contiguous_chunk_offset: " << next_contiguous_chunk_offset <<
178  " gap_size: " << gap_size <<
179  " currentSuperChunk.size(): " << currentSuperChunk->size() << endl;
180  }
181  // If we were working on a SuperChunk (i.e. the current SuperChunk contains chunks)
182  // then we need to start a new one.
183  if(!currentSuperChunk->empty()){
184  currentSuperChunk = new vector<const Chunk *>();
185  super_chunks.push_back(currentSuperChunk); // next super chunk...
186  }
187  }
188  }
189  currentSuperChunk->push_back(&chunk);
190  next_contiguous_chunk_offset = current_offset + current_size;
191  first = false;
192  }
193  // Dump the currentSuperChunk if it doesn't have anything in it.
194  if(currentSuperChunk->empty()) {
195  super_chunks.pop_back();
196  delete currentSuperChunk;
197  }
198  cout << "SuperChunk Inventory For Array: " << array->name() << endl;
199  unsigned long long sc_count=0;
200  for(auto super_chunk: super_chunks) {
201  cout << " SuperChunk[" << sc_count++ << "] contains : " << super_chunk->size() << " chunks."
202  << endl;
203  if (debug) {
204  for (auto chunk:*super_chunk) {
205  cout << " " << chunk->to_string() << endl;
206  }
207  }
208  }
209 
210  }
211  else {
212  cerr << prolog << " ERROR! The variable: "<< var->name()
213  << " is not an instance of DmrppArray. SKIPPING"<< endl;
214  }
215 
216  }
217  }
218 #endif
219 
220  void inventory_super_chunks(libdap::D4Group *group, bool only_constrained, vector<SuperChunk *> &super_chunks){
221 
222  // Process Groups - RECURSION HAPPENS HERE.
223  auto gtr = group->grp_begin();
224  while(gtr!=group->grp_end()){
225  if(debug) cout << "Found Group: "<< (*gtr)->name() << endl;
226  inventory_super_chunks(*gtr++, only_constrained, super_chunks);
227  }
228 
229  // Process Vars
230  auto vtr = group->var_begin();
231  while(vtr!=group->var_end()){
232  if(debug) cout << "Found Variable: "<< (*vtr)->type_name() << " " << (*vtr)->name() << endl;
233  compute_super_chunks(*vtr++, only_constrained, super_chunks);
234  //inventory_super_chunks(*vtr++, only_constrained);
235  }
236 
237  }
238 
239  void inventory_super_chunks(DMRpp &dmr, bool only_constrained, vector<SuperChunk *> &super_chunks){
240  inventory_super_chunks(dmr.root(), only_constrained, super_chunks);
241  }
242 
243 
244  dmrpp::DMRpp *get_dmrpp(const string dmrpp_filename){
245  ifstream dmrpp_ifs (dmrpp_filename);
246  if (dmrpp_ifs.is_open())
247  {
248  dmrpp::DmrppParserSax2 parser;
249  dmrpp::DmrppTypeFactory factory;
250  auto dmr = new DMRpp(&factory,dmrpp_filename);
251  parser.intern(dmrpp_ifs, dmr);
252  return dmr;
253  }
254  else {
255  throw BESInternalFatalError("The provided file could not be opened. filename: '"+dmrpp_filename+"'",__FILE__,__LINE__);
256  }
257  }
258 
259 
260  void inventory_super_chunks(const string dmrpp_filename){
261  cout << "DMR++ file: " << dmrpp_filename << endl;
262  dmrpp::DMRpp *dmr = get_dmrpp(dmrpp_filename);
263 
264  vector<SuperChunk *> super_chunks;
265 
266  {
267  BESStopWatch sw;
268  sw.start(prolog);
269  dmrpp::inventory_super_chunks(*dmr, false, super_chunks);
270  }
271 
272  cout << "DMR++ file: " << dmrpp_filename << endl;
273  cout << "Produced " << super_chunks.size() << " SuperChunks." << endl;
274  for(auto super_chunk: super_chunks) {
275  cout << super_chunk->to_string(true) << endl;
276  }
277 
278  delete dmr;
279  }
280 
281 
282 
283  void dump_vars(libdap::D4Group *group){
284  // Process Groups - RECURSION HAPPENS HERE.
285  auto gtr = group->grp_begin();
286  while(gtr!=group->grp_end()){
287  if(debug) cout << "Found Group: "<< (*gtr)->name() << endl;
288  dump_vars(*gtr++);
289  }
290 
291  // Process Vars
292  auto vtr = group->var_begin();
293  while(vtr!=group->var_end()){
294  libdap::BaseType *bt = *vtr++;
295  bt->dump(cout);
296  cout << endl;
297  }
298 
299  }
300  void dump_vars(DMRpp &dmr){
301  dump_vars(dmr.root());
302  }
303 
304 
305 
306 
307 
308 } // namespace dmrpp
309 
310 int main(int argc, char *argv[]) {
311  string bes_log_file("superchunky_bes.log");
312  //string bes_debug_log_file("cerr");
313  //string bes_debug_keys( "bes,http,curl,dmrpp,dmrpp:3,dmrpp:4,rr");
314  //string http_cache_dir;
315  string prefix;
316  //string http_netrc_file;
317  string cache_effective_urls("false");
318  char *prefixCstr = getenv("prefix");
319  if (prefixCstr) {
320  prefix = prefixCstr;
321  } else {
322  prefix = "/";
323  }
324 
325  cout << "bes_log_file: " << bes_log_file << endl;
326 
327  auto bes_config_file = BESUtil::assemblePath(prefix, "/etc/bes/bes.conf", true);
328  TheBESKeys::ConfigFile = bes_config_file; // Set the config file for TheBESKeys
329  TheBESKeys::TheKeys()->set_key("BES.LogName", bes_log_file); // Set the log file so it goes where we say.
330  TheBESKeys::TheKeys()->set_key("AllowedHosts", "^https?:\\/\\/.*$", false); // Set AllowedHosts to allow any URL
331  TheBESKeys::TheKeys()->set_key("AllowedHosts", "^file:\\/\\/\\/.*$", true); // Set AllowedHosts to allow any file
332  TheBESKeys::TheKeys()->set_key("Http.cache.effective.urls", cache_effective_urls, false); // Set AllowedHosts to allow any file
333 
334 
335  // if (bes_debug) BESDebug::SetUp(bes_debug_log_file + "," + bes_debug_keys); // Enable BESDebug settings
336 
337 
338  BESIndent::SetIndent("");
339 
340  for(auto i=1; i<argc; i++){
341  string dmrpp_filename(argv[i]);
342  //dmrpp::inventory_super_chunks(dmrpp_filename);
343 
344  dmrpp::DMRpp *dmrpp = dmrpp::get_dmrpp( dmrpp_filename);
345  dump_vars(*dmrpp);
346  }
347  return 0;
348 }
349 
350 
351 
exception thrown if internal error encountered
exception thrown if an internal error is found and is fatal to the BES
virtual bool start(std::string name)
Definition: BESStopWatch.cc:67
static std::string assemblePath(const std::string &firstPart, const std::string &secondPart, bool leadingSlash=false, bool trailingSlash=false)
Assemble path fragments making sure that they are separated by a single '/' character.
Definition: BESUtil.cc:821
static TheBESKeys * TheKeys()
Definition: TheBESKeys.cc:71
void set_key(const std::string &key, const std::string &val, bool addto=false)
allows the user to set key/value pairs from within the application.
Definition: TheBESKeys.cc:205
static std::string ConfigFile
Definition: TheBESKeys.h:184
Provide a way to print the DMR++ response.
Definition: DMRpp.h:42
Extend libdap::Array so that a handler can read data using a DMR++ file.
Definition: DmrppArray.h:64
virtual unsigned long long get_size(bool constrained=false)
Return the total number of elements in this Array.
Definition: DmrppArray.cc:305
virtual std::vector< std::shared_ptr< Chunk > > get_immutable_chunks() const
A const reference to the vector of chunks.
Definition: DmrppCommon.h:153
virtual unsigned long add_chunk(const std::string &data_url, const std::string &byte_order, unsigned long long size, unsigned long long offset, const std::string &position_in_array="")
Add a new chunk as defined by an h4:byteStream element.
Definition: DmrppCommon.cc:199
void intern(std::istream &f, libdap::DMR *dest_dmr)