Orcus
Loading...
Searching...
No Matches
yaml_parser.hpp
1/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2/*
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
6 */
7
8#ifndef INCLUDED_ORCUS_YAML_PARSER_HPP
9#define INCLUDED_ORCUS_YAML_PARSER_HPP
10
11#include "orcus/yaml_parser_base.hpp"
12#include "orcus/parser_global.hpp"
13
14namespace orcus {
15
21{
22public:
26 void begin_parse() {}
27
31 void end_parse() {}
32
37
41 void end_document() {}
42
47
51 void end_sequence() {}
52
56 void begin_map() {}
57
61 void begin_map_key() {}
62
66 void end_map_key() {}
67
71 void end_map() {}
72
78 void string(std::string_view value)
79 {
80 (void)value;
81 }
82
88 void number(double val)
89 {
90 (void)val;
91 }
92
96 void boolean_true() {}
97
102
106 void null() {}
107};
108
117template<typename HandlerT>
119{
120public:
121 typedef HandlerT handler_type;
122
123 yaml_parser(std::string_view content, handler_type& hdl);
124
125 void parse();
126
127private:
128 size_t end_scope();
129 void check_or_begin_document();
130 void check_or_begin_map();
131 void check_or_begin_sequence();
132 void parse_value(const char* p, size_t len);
133 void push_value(const char* p, size_t len);
134 void parse_line(const char* p, size_t len);
135 void parse_map_key(const char* p, size_t len);
136
137 void handler_begin_parse();
138 void handler_end_parse();
139 void handler_begin_document();
140 void handler_end_document();
141 void handler_begin_sequence();
142 void handler_end_sequence();
143 void handler_begin_map();
144 void handler_end_map();
145 void handler_begin_map_key();
146 void handler_end_map_key();
147 void handler_string(const char* p, size_t n);
148 void handler_number(double val);
149 void handler_boolean_true();
150 void handler_boolean_false();
151 void handler_null();
152
153private:
154 handler_type& m_handler;
155};
156
157template<typename _Handler>
159{
160 push_parse_token(yaml::detail::parse_token_t::begin_parse);
161 m_handler.begin_parse();
162}
163
164template<typename _Handler>
165void yaml_parser<_Handler>::handler_end_parse()
166{
167 push_parse_token(yaml::detail::parse_token_t::end_parse);
168 m_handler.end_parse();
169}
170
171template<typename _Handler>
172void yaml_parser<_Handler>::handler_begin_document()
173{
174 push_parse_token(yaml::detail::parse_token_t::begin_document);
175 m_handler.begin_document();
176}
177
178template<typename _Handler>
179void yaml_parser<_Handler>::handler_end_document()
180{
181 push_parse_token(yaml::detail::parse_token_t::end_document);
182 m_handler.end_document();
183}
184
185template<typename _Handler>
186void yaml_parser<_Handler>::handler_begin_sequence()
187{
188 push_parse_token(yaml::detail::parse_token_t::begin_sequence);
189 m_handler.begin_sequence();
190}
191
192template<typename _Handler>
193void yaml_parser<_Handler>::handler_end_sequence()
194{
195 push_parse_token(yaml::detail::parse_token_t::end_sequence);
196 m_handler.end_sequence();
197}
198
199template<typename _Handler>
200void yaml_parser<_Handler>::handler_begin_map()
201{
202 push_parse_token(yaml::detail::parse_token_t::begin_map);
203 m_handler.begin_map();
204}
205
206template<typename _Handler>
207void yaml_parser<_Handler>::handler_end_map()
208{
209 push_parse_token(yaml::detail::parse_token_t::end_map);
210 m_handler.end_map();
211}
212
213template<typename _Handler>
214void yaml_parser<_Handler>::handler_begin_map_key()
215{
216 push_parse_token(yaml::detail::parse_token_t::begin_map_key);
217 m_handler.begin_map_key();
218}
219
220template<typename _Handler>
221void yaml_parser<_Handler>::handler_end_map_key()
222{
223 push_parse_token(yaml::detail::parse_token_t::end_map_key);
224 m_handler.end_map_key();
225}
226
227template<typename _Handler>
228void yaml_parser<_Handler>::handler_string(const char* p, size_t n)
229{
230 push_parse_token(yaml::detail::parse_token_t::string);
231 m_handler.string({p, n});
232}
233
234template<typename _Handler>
235void yaml_parser<_Handler>::handler_number(double val)
236{
237 push_parse_token(yaml::detail::parse_token_t::number);
238 m_handler.number(val);
239}
240
241template<typename _Handler>
242void yaml_parser<_Handler>::handler_boolean_true()
243{
244 push_parse_token(yaml::detail::parse_token_t::boolean_true);
245 m_handler.boolean_true();
246}
247
248template<typename _Handler>
249void yaml_parser<_Handler>::handler_boolean_false()
250{
251 push_parse_token(yaml::detail::parse_token_t::boolean_false);
252 m_handler.boolean_false();
253}
254
255template<typename _Handler>
256void yaml_parser<_Handler>::handler_null()
257{
258 push_parse_token(yaml::detail::parse_token_t::null);
259 m_handler.null();
260}
261
262template<typename _Handler>
263yaml_parser<_Handler>::yaml_parser(std::string_view content, handler_type& hdl) :
264 yaml::parser_base(content), m_handler(hdl) {}
265
266template<typename _Handler>
267void yaml_parser<_Handler>::parse()
268{
269 handler_begin_parse();
270
271 while (has_char())
272 {
273 reset_on_new_line();
274
275 size_t indent = parse_indent();
276 if (indent == parse_indent_end_of_stream)
277 break;
278
279 if (indent == parse_indent_blank_line)
280 continue;
281
282 size_t cur_scope = get_scope();
283
284 if (cur_scope <= indent)
285 {
286 if (in_literal_block())
287 {
288 handle_line_in_literal(indent);
289 continue;
290 }
291
292 if (has_line_buffer())
293 {
294 // This line is part of multi-line string. Push the line to the
295 // buffer as-is.
296 handle_line_in_multi_line_string();
297 continue;
298 }
299 }
300
301 if (cur_scope == scope_empty)
302 {
303 if (indent > 0)
304 throw parse_error(
305 "first node of the document should not be indented.", offset());
306
307 push_scope(indent);
308 }
309 else if (indent > cur_scope)
310 {
311 push_scope(indent);
312 }
313 else if (indent < cur_scope)
314 {
315 // Current indent is less than the current scope level.
316 do
317 {
318 cur_scope = end_scope();
319 if (cur_scope < indent)
320 throw parse_error("parse: invalid indent level.", offset());
321 }
322 while (indent < cur_scope);
323 }
324
325 // Parse the rest of the line.
326 std::string_view line = parse_to_end_of_line();
327 line = trim(line);
328
329 assert(!line.empty());
330 parse_line(line.data(), line.size());
331 }
332
333 // End all remaining scopes.
334 size_t cur_scope = get_scope();
335 while (cur_scope != scope_empty)
336 cur_scope = end_scope();
337
338 if (get_doc_hash())
339 handler_end_document();
340
341 handler_end_parse();
342}
343
344template<typename _Handler>
345size_t yaml_parser<_Handler>::end_scope()
346{
347 switch (get_scope_type())
348 {
349 case yaml::detail::scope_t::map:
350 {
351 if (get_last_parse_token() == yaml::detail::parse_token_t::end_map_key)
352 handler_null();
353
354 handler_end_map();
355 break;
356 }
357 case yaml::detail::scope_t::sequence:
358 {
359 if (get_last_parse_token() == yaml::detail::parse_token_t::begin_sequence_element)
360 handler_null();
361
362 handler_end_sequence();
363 break;
364 }
365 case yaml::detail::scope_t::multi_line_string:
366 {
367 std::string_view merged = merge_line_buffer();
368 handler_string(merged.data(), merged.size());
369 break;
370 }
371 default:
372 {
373 if (has_line_buffer())
374 {
375 assert(get_line_buffer_count() == 1);
376 std::string_view line = pop_line_front();
377 parse_value(line.data(), line.size());
378 }
379 }
380 }
381 return pop_scope();
382}
383
384template<typename _Handler>
385void yaml_parser<_Handler>::check_or_begin_document()
386{
387 if (!get_doc_hash())
388 {
389 set_doc_hash(mp_char);
390 handler_begin_document();
391 }
392}
393
394template<typename _Handler>
395void yaml_parser<_Handler>::check_or_begin_map()
396{
397 switch (get_scope_type())
398 {
399 case yaml::detail::scope_t::unset:
400 {
401 check_or_begin_document();
402 set_scope_type(yaml::detail::scope_t::map);
403 handler_begin_map();
404 break;
405 }
406 case yaml::detail::scope_t::map:
407 {
408 if (get_last_parse_token() == yaml::detail::parse_token_t::end_map_key)
409 handler_null();
410 break;
411 }
412 default:
413 ;
414 }
415}
416
417template<typename _Handler>
418void yaml_parser<_Handler>::check_or_begin_sequence()
419{
420 switch (get_scope_type())
421 {
422 case yaml::detail::scope_t::unset:
423 {
424 check_or_begin_document();
425 set_scope_type(yaml::detail::scope_t::sequence);
426 handler_begin_sequence();
427 break;
428 }
429 case yaml::detail::scope_t::sequence:
430 {
431 if (get_last_parse_token() == yaml::detail::parse_token_t::begin_sequence_element)
432 handler_null();
433 break;
434 }
435 default:
436 ;
437 }
438
439 push_parse_token(yaml::detail::parse_token_t::begin_sequence_element);
440}
441
442template<typename _Handler>
443void yaml_parser<_Handler>::parse_value(const char* p, size_t len)
444{
445 check_or_begin_document();
446
447 const char* p0 = p;
448 const char* p_end = p + len;
449 double val;
450 p = parse_numeric(p, p_end, val);
451 if (p == p_end)
452 {
453 handler_number(val);
454 return;
455 }
456
457 yaml::detail::keyword_t kw = parse_keyword(p0, len);
458
459 if (kw != yaml::detail::keyword_t::unknown)
460 {
461 switch (kw)
462 {
463 case yaml::detail::keyword_t::null:
464 handler_null();
465 break;
466 case yaml::detail::keyword_t::boolean_true:
467 handler_boolean_true();
468 break;
469 case yaml::detail::keyword_t::boolean_false:
470 handler_boolean_false();
471 break;
472 default:
473 ;
474 }
475
476 return;
477 }
478
479 // Failed to parse it as a number or a keyword. It must be a string.
480 handler_string(p0, len);
481}
482
483template<typename _Handler>
484void yaml_parser<_Handler>::push_value(const char* p, size_t len)
485{
486 check_or_begin_document();
487
488 if (has_line_buffer() && get_scope_type() == yaml::detail::scope_t::unset)
489 set_scope_type(yaml::detail::scope_t::multi_line_string);
490
491 push_line_back(p, len);
492}
493
494template<typename _Handler>
495void yaml_parser<_Handler>::parse_line(const char* p, size_t len)
496{
497 const char* p_end = p + len;
498 const char* p0 = p; // Save the original head position.
499
500 if (*p == '-')
501 {
502 ++p;
503 if (p == p_end)
504 {
505 // List item start.
506 check_or_begin_sequence();
507 return;
508 }
509
510 switch (*p)
511 {
512 case '-':
513 {
514 // start of a document
515 ++p;
516 if (p == p_end)
517 throw parse_error("parse_line: line ended with '--'.", offset_last_char_of_line());
518
519 if (*p != '-')
520 parse_error::throw_with(
521 "parse_line: '-' expected but '", *p, "' found.",
522 offset_last_char_of_line() - std::ptrdiff_t(p_end-p));
523
524 ++p; // Skip the '-'.
525 set_doc_hash(p);
526 handler_begin_document();
527 clear_scopes();
528
529 if (p != p_end)
530 {
531 skip_blanks(p, p_end-p);
532
533 // Whatever comes after '---' is equivalent of first node.
534 assert(p != p_end);
535 push_scope(0);
536 parse_line(p, p_end-p);
537 }
538 return;
539 }
540 case ' ':
541 {
542 check_or_begin_sequence();
543
544 // list item start with inline first item content.
545 ++p;
546 if (p == p_end)
547 throw parse_error(
548 "parse_line: list item expected, but the line ended prematurely.",
549 offset_last_char_of_line() - std::ptrdiff_t(p_end-p));
550
551 skip_blanks(p, p_end-p);
552
553 size_t scope_width = get_scope() + (p-p0);
554 push_scope(scope_width);
555 parse_line(p, p_end-p);
556 return;
557 }
558 default:
559 // It is none of the above.
560 p = p0;
561 }
562
563 }
564
565 if (get_scope_type() == yaml::detail::scope_t::sequence)
566 parse_error::throw_with(
567 "'-' was expected for a sequence element, but '", *p, "' was found.",
568 offset_last_char_of_line()-len+1);
569
570 // If the line doesn't start with a "- ", it must be a dictionary key.
571 parse_map_key(p, len);
572}
573
574template<typename _Handler>
575void yaml_parser<_Handler>::parse_map_key(const char* p, size_t len)
576{
577 const char* p_end = p + len;
578 const char* p0 = p; // Save the original head position.
579
580 switch (*p)
581 {
582 case '"':
583 {
584 std::string_view quoted_str = parse_double_quoted_string_value(p, len);
585
586 if (p == p_end)
587 {
588 handler_string(quoted_str.data(), quoted_str.size());
589 return;
590 }
591
592 skip_blanks(p, p_end-p);
593
594 if (*p != ':')
595 throw parse_error(
596 "parse_map_key: ':' is expected after the quoted string key.",
597 offset() - std::ptrdiff_t(p_end-p+1));
598
599 check_or_begin_map();
600 handler_begin_map_key();
601 handler_string(quoted_str.data(), quoted_str.size());
602 handler_end_map_key();
603
604 ++p; // skip the ':'.
605 if (p == p_end)
606 return;
607
608 // Skip all white spaces.
609 skip_blanks(p, p_end-p);
610 }
611 break;
612 case '\'':
613 {
614 std::string_view quoted_str = parse_single_quoted_string_value(p, len);
615
616 if (p == p_end)
617 {
618 handler_string(quoted_str.data(), quoted_str.size());
619 return;
620 }
621
622 skip_blanks(p, p_end-p);
623
624 if (*p != ':')
625 throw parse_error(
626 "parse_map_key: ':' is expected after the quoted string key.",
627 offset() - std::ptrdiff_t(p_end-p+1));
628
629 check_or_begin_map();
630 handler_begin_map_key();
631 handler_string(quoted_str.data(), quoted_str.size());
632 handler_end_map_key();
633
634 ++p; // skip the ':'.
635 if (p == p_end)
636 return;
637
638 skip_blanks(p, p_end-p);
639 }
640 break;
641 default:
642 {
643 key_value kv = parse_key_value(p, p_end-p);
644
645 if (kv.key.empty())
646 {
647 // No map key found.
648 if (*p == '|')
649 {
650 start_literal_block();
651 return;
652 }
653
654 push_value(p, len);
655 return;
656 }
657
658 check_or_begin_map();
659 handler_begin_map_key();
660 parse_value(kv.key.data(), kv.key.size());
661 handler_end_map_key();
662
663 if (kv.value.empty())
664 return;
665
666 p = kv.value.data();
667 }
668 }
669
670 if (*p == '|')
671 {
672 start_literal_block();
673 return;
674 }
675
676 // inline map item.
677 if (*p == '-')
678 throw parse_error(
679 "parse_map_key: sequence entry is not allowed as an inline map item.",
680 offset() - std::ptrdiff_t(p_end-p+1));
681
682 size_t scope_width = get_scope() + (p-p0);
683 push_scope(scope_width);
684 parse_line(p, p_end-p);
685}
686
687}
688
689#endif
690
691/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
Definition yaml_parser_base.hpp:66
Definition yaml_parser.hpp:21
void end_map()
Definition yaml_parser.hpp:71
void begin_parse()
Definition yaml_parser.hpp:26
void end_sequence()
Definition yaml_parser.hpp:51
void end_parse()
Definition yaml_parser.hpp:31
void null()
Definition yaml_parser.hpp:106
void boolean_true()
Definition yaml_parser.hpp:96
void begin_map_key()
Definition yaml_parser.hpp:61
void boolean_false()
Definition yaml_parser.hpp:101
void begin_map()
Definition yaml_parser.hpp:56
void number(double val)
Definition yaml_parser.hpp:88
void end_document()
Definition yaml_parser.hpp:41
void begin_document()
Definition yaml_parser.hpp:36
void string(std::string_view value)
Definition yaml_parser.hpp:78
void begin_sequence()
Definition yaml_parser.hpp:46
void end_map_key()
Definition yaml_parser.hpp:66
Definition yaml_parser.hpp:119