bes  Updated for version 3.20.8
check_dmrpp.cc
1 #include <iostream>
2 #include<fstream>
3 #include <string>
4 #include <vector>
5 using namespace std;
6 
7 bool find_var(const string &str, const vector<string>var_type_list,
8  vector<string>&var_type,vector<string>&var_name);
9 bool find_endvar(const string &str,const string vtype);
10 bool find_chunk(const string &str);
11 
12 int main (int argc, char** argv)
13 {
14  // Provide the dmrpp file name and the file name to store the variables that miss values
15  if(argc !=3) {
16  cout<<"Please provide the dmrpp file name to be checked and the output name."<<endl;
17  return -1;
18  }
19 
20  string fname(argv[1]);
21  ifstream dmrpp_fstream;
22  dmrpp_fstream.open(fname.c_str(),ifstream::in);
23  string dmrpp_line;
24 
25  // DAP4 supported atomic datatype
26  vector<string> var_type_list;
27  var_type_list.push_back("Float32");
28  var_type_list.push_back("Int32");
29  var_type_list.push_back("Float64");
30  var_type_list.push_back("Byte");
31  var_type_list.push_back("Int16");
32  var_type_list.push_back("UInt16");
33  var_type_list.push_back("String");
34  var_type_list.push_back("UInt32");
35  var_type_list.push_back("Int8");
36  var_type_list.push_back("Int64");
37  var_type_list.push_back("UInt64");
38  var_type_list.push_back("UInt8");
39  var_type_list.push_back("Char");
40 
41  // var_type and var_name should be var data type and var name in the dmrpp file
42  vector<string>var_type;
43  vector<string>var_name;
44 
45  //The vector to check if chunk block inside this var block(<var ..> </var>)
46  vector<bool>chunk_exist;
47 
48  // The following flags are used to check the variables that miss the values.
49  // In a dmrpp file, an example of variable block may start from
50  // <Float32 name="temperature"> and end with </Float32>
51  // fin_vb_start: flag to find the start of the var block
52  // fin_vb_end: flag to find the end of the var block
53  // chunk_found: flag to find is chunking information is inside the var block
54  bool fin_vb_start = false;
55  bool fin_vb_end = false;
56  bool chunk_found = false;
57 
58  // Check every line of the dmrpp file. This will use less memory.
59  while(getline(dmrpp_fstream,dmrpp_line)) {
60 
61  // If we find the start of the var block(<var..>)
62  if(true == fin_vb_start) {
63 
64  // var data type must exist.
65  if(var_type.empty()) {
66  cout<<"Doesn't have the variable datatype, abort for dmrpp file "<<fname << endl;
67  return -1;
68  }
69  // Not find the end of var block. try to find it.
70  if(false == fin_vb_end)
71  fin_vb_end = find_endvar(dmrpp_line, var_type[var_type.size()-1]);
72 
73  // If find the end of var block, check if the chunk is already found in the var block.
74  if(true == fin_vb_end) {
75  if(false == chunk_found)
76  chunk_exist.push_back(false);
77 
78  // If we find the end of this var block,
79  // reset all bools for the next variable.
80  fin_vb_start = false;
81  fin_vb_end = false;
82  chunk_found = false;
83  }
84  else {// Check if having chunks within this var block.
85  if(false == chunk_found) {
86  chunk_found = find_chunk(dmrpp_line);
87  // When finding the chunk info, update the chunk_exist vector.
88  if(true == chunk_found)
89  chunk_exist.push_back(true);
90  }
91  }
92  }
93  else // Continue finding the var block
94  fin_vb_start = find_var(dmrpp_line,var_type_list,var_type,var_name);
95 
96  }
97 
98  //Sanity check to make sure the chunk_exist vector is the same as var_type vector.
99  //If not, something is wrong with this dmrpp file.
100  if(chunk_exist.size()!=var_type.size()) {
101  cout<<"Number of chunk check is not consistent with the number of var check."<<endl;
102  cout<< "The dmrpp file is "<<fname<<endl;
103  return -1;
104  }
105 
106 #if 0
107 for(size_t i = 0; i<var_type.size(); i++)
108 cout<<"var_type["<<i<<"]= "<<var_type[i]<<endl;
109 for(size_t i = 0; i<var_name.size(); i++) {
110 cout<<"var_name["<<i<<"]= "<<var_name[i]<<endl;
111 cout<<"chunk_exist["<<i<<"]= "<<chunk_exist[i]<<endl;
112 }
113 #endif
114 
115  bool has_missing_info = false;
116  size_t last_missing_chunk_index = -1;
117 
118  // Check if there are any missing variable information.
119  // FIXME: 'i' is unsigned and thus will always be >= 0. jhrg 9/18/20
120  for (size_t i =var_type.size()-1;i>=0;i--) {
121  if(false == chunk_exist[i]){
122  has_missing_info = true;
123  last_missing_chunk_index = i;
124  break;
125  }
126  }
127 
128 #if 0
129  size_t j = 0;
130  for (size_t i =0;i<var_type.size();i++) {
131  if(false == chunk_exist[i]){
132  j++;
133  if(j == 1)
134  cout<<"The following variables don't have data value information(datatype + data name): "<<endl;
135  cout<< var_type[i] <<" "<<var_name[i] <<endl;
136  }
137  }
138 #endif
139 
140  // Report the final output.
141  if(true == has_missing_info) {
142 
143  ofstream dmrpp_ofstream;
144  string fname2(argv[2]);
145  dmrpp_ofstream.open(fname2.c_str(),ofstream::out);
146 
147  for (size_t i =0;i<var_type.size();i++) {
148  if(false == chunk_exist[i]) {
149  if (i!=last_missing_chunk_index)
150  dmrpp_ofstream<<var_name[i] <<",";
151  else
152  dmrpp_ofstream<<var_name[i];
153  }
154  }
155 
156  dmrpp_ofstream.close();
157  }
158 
159 
160  return 0;
161 
162 }
163 
164 // Find the the var type and var name like <Int16 name="foo">
165 bool find_var(const string &str, const vector<string>var_type_list,
166  vector<string>&var_type,vector<string>&var_name) {
167 
168  bool ret = false;
169  //if(str[0]=='\n' || str[0]!=' '){
170  // Every var block will have spaces before <
171  if(str[0]!=' '){
172  return ret;
173  }
174 
175  // Ignore the line with all spaces
176  size_t non_space_char_pos = str.find_first_not_of(' ');
177  if(non_space_char_pos == string::npos){
178  return ret;
179  }
180 
181  // The first non-space character should be '<'
182  if(str[non_space_char_pos]!='<') {
183  return ret;
184  }
185 
186  // After space, must at least contain '<','>'
187  if(str.size() <= (non_space_char_pos+1)){
188  return ret;
189  }
190 
191  // The last character must be '>', maybe this is too strict.
192  // We will see.
193  if(str[str.size()-1]!='>' ) {
194  return ret;
195  }
196 
197  // char_2 is a character right after<
198  char char_2 = str[non_space_char_pos+1];
199 
200  // The first var character must be one of the list.
201  // The following list includes the first character
202  // of all possible variable types.
203  string v_1char_list = "FIUBSC";
204 
205  // If the first character is not one of DAP type,ignore.
206  if(v_1char_list.find_first_of(char_2)==string::npos) {
207  return ret;
208  }
209 
210  // Find ' name="' and the position after non_space_char_pos+1, like <Int16 name="d16_1">
211  string sep=" name=\"";
212  size_t sep_pos = str.find(sep,non_space_char_pos+2);
213 
214  // Cannot find "name=..", ignore this line.
215  if(sep_pos == string::npos){
216  return ret;
217  }
218 
219  // Try to figure out the variable type.
220  size_t var_index = -1;
221  bool found = false;
222  for (size_t i = 0; i<var_type_list.size() && !found ;i++) {
223  if(str.compare(non_space_char_pos+1,sep_pos-non_space_char_pos-1,var_type_list[i]) == 0) {
224  var_index = i;
225  found = true;
226  }
227  }
228 
229  // If cannot find the supported type, ignore this line.
230  if(!found) {
231  return ret;
232  }
233 
234  // Find the end quote position of the variable name.
235  char end_quote='"';
236  size_t end_name_pos = str.find(end_quote,sep_pos+sep.size()+1);
237  if(end_name_pos == string::npos)
238  ret = false;
239  else {
240  // Find both var type and var name. Store them in the vector
241  string var_name_line = str.substr(sep_pos+sep.size(),end_name_pos-sep_pos-sep.size());
242  var_type.push_back(var_type_list[var_index]);
243  var_name.push_back(var_name_line);
244  ret = true;
245  }
246  return ret;
247 }
248 
249 // Find whether there are chunks inside the var block.
250 // Any chunk info(chunk or contiguous) should include
251 // "<dmrpp:chunk " and "offset".
252 bool find_chunk(const string &str) {
253  bool ret = false;
254  string chunk_mark = "<dmrpp:chunk ";
255  string offset_mark = "offset";
256  size_t chunk_mark_pos = str.find(chunk_mark);
257  if(chunk_mark_pos !=string::npos) {
258  if(string::npos != str.find(offset_mark, chunk_mark_pos+chunk_mark.size()))
259  ret = true;
260  }
261  return ret;
262 }
263 
264 // Find the end of var block such as </Int32>
265 // There may be space before </Int32>
266 bool find_endvar(const string &str, const string vtype) {
267  bool ret = false;
268  string end_var = "</" + vtype + '>';
269  size_t vb_end_pos = str.find(end_var);
270  if(vb_end_pos !=string::npos) {
271  if((vb_end_pos + end_var.size())==str.size())
272  ret = true;
273  }
274  return ret;
275 }
276 
277 
278 
279 
280 
281 
282 
283 
284 
285 
286 
287