Orcus
Loading...
Searching...
No Matches
csv_parser.hpp
1/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2/*
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
6 */
7
8#ifndef ORCUS_CSV_PARSER_HPP
9#define ORCUS_CSV_PARSER_HPP
10
11#include "csv_parser_base.hpp"
12
13namespace orcus {
14
16{
17public:
21 void begin_parse() {}
22
26 void end_parse() {}
27
31 void begin_row() {}
32
36 void end_row() {}
37
49 void cell(std::string_view value, bool transient)
50 {
51 (void)value; (void)transient;
52 }
53};
54
61template<typename HandlerT>
63{
64public:
65 typedef HandlerT handler_type;
66
67 csv_parser(std::string_view content, handler_type& hdl, const csv::parser_config& config);
68 void parse();
69
70private:
71
72 // handlers
73 void row();
74 void cell();
75 void quoted_cell();
76
77 void parse_cell_with_quote(const char* p0, size_t len0);
78
82 void push_cell_value(const char* p, size_t n);
83
84private:
85 handler_type& m_handler;
86};
87
88template<typename _Handler>
90 std::string_view content, handler_type& hdl, const csv::parser_config& config) :
91 csv::parser_base(content, config), m_handler(hdl) {}
92
93template<typename _Handler>
94void csv_parser<_Handler>::parse()
95{
96#if ORCUS_DEBUG_CSV
97 for (const char* p = mp_begin; p < mp_end; ++p)
98 std::cout << *p;
99 std::cout << std::endl;
100#endif
101
102 m_handler.begin_parse();
103 while (has_char())
104 row();
105 m_handler.end_parse();
106}
107
108template<typename _Handler>
109void csv_parser<_Handler>::row()
110{
111 m_handler.begin_row();
112 while (true)
113 {
114 if (is_text_qualifier(cur_char()))
115 quoted_cell();
116 else
117 cell();
118
119 if (!has_char())
120 {
121 m_handler.end_row();
122 return;
123 }
124
125 char c = cur_char();
126 if (c == '\n')
127 {
128 next();
129#if ORCUS_DEBUG_CSV
130 cout << "(LF)" << endl;
131#endif
132 m_handler.end_row();
133 return;
134 }
135
136 if (!is_delim(c))
137 throw orcus::parse_error("expected a delimiter", offset());
138
139 next();
140
141 if (m_config.trim_cell_value)
142 skip_blanks();
143
144 if (!has_char())
145 {
146 m_handler.end_row();
147 return;
148 }
149 }
150}
151
152template<typename _Handler>
153void csv_parser<_Handler>::cell()
154{
155 const char* p = mp_char;
156 size_t len = 0;
157 char c = cur_char();
158 while (c != '\n' && !is_delim(c))
159 {
160 ++len;
161 next();
162 if (!has_char())
163 break;
164 c = cur_char();
165 }
166
167 if (!len)
168 p = nullptr;
169
170 push_cell_value(p, len);
171}
172
173template<typename _Handler>
174void csv_parser<_Handler>::quoted_cell()
175{
176#if ORCUS_DEBUG_CSV
177 cout << "--- quoted cell" << endl;
178#endif
179 char c = cur_char();
180 assert(is_text_qualifier(c));
181 next(); // Skip the opening quote.
182 if (!has_char())
183 return;
184
185 const char* p0 = mp_char;
186 size_t len = 1;
187 for (; has_char(); next(), ++len)
188 {
189 c = cur_char();
190#if ORCUS_DEBUG_CSV
191 cout << "'" << c << "'" << endl;
192#endif
193 if (!is_text_qualifier(c))
194 continue;
195
196 // current char is a quote. Check if the next char is also a text
197 // qualifier.
198
199 if (has_next() && is_text_qualifier(peek_char()))
200 {
201 next();
202 parse_cell_with_quote(p0, len);
203 return;
204 }
205
206 // Closing quote.
207 m_handler.cell({p0, len-1}, false);
208 next();
209 skip_blanks();
210 return;
211 }
212
213 // Stream ended prematurely. Handle it gracefully.
214 m_handler.cell({p0, len}, false);
215}
216
217template<typename _Handler>
218void csv_parser<_Handler>::parse_cell_with_quote(const char* p0, size_t len0)
219{
220#if ORCUS_DEBUG_CSV
221 using namespace std;
222 cout << "--- parse cell with quote" << endl;
223#endif
224 assert(is_text_qualifier(cur_char()));
225
226 // Push the preceding chars to the temp buffer.
227 m_cell_buf.reset();
228 m_cell_buf.append(p0, len0);
229
230 // Parse the rest, until the closing quote.
231 next();
232 const char* p_cur = mp_char;
233 size_t cur_len = 0;
234 for (; has_char(); next(), ++cur_len)
235 {
236 char c = cur_char();
237#if ORCUS_DEBUG_CSV
238 cout << "'" << c << "'" << endl;
239#endif
240 if (!is_text_qualifier(c))
241 continue;
242
243 if (has_next() && is_text_qualifier(peek_char()))
244 {
245 // double quotation. Copy the current segment to the cell buffer.
246 m_cell_buf.append(p_cur, cur_len);
247
248 next(); // to the 2nd quote.
249 p_cur = mp_char;
250 cur_len = 0;
251 continue;
252 }
253
254 // closing quote. Flush the current segment to the cell
255 // buffer, push the value to the handler, and exit normally.
256 m_cell_buf.append(p_cur, cur_len);
257
258 m_handler.cell(m_cell_buf.str(), true);
259 next();
260 skip_blanks();
261 return;
262 }
263
264 // Stream ended prematurely.
265 throw parse_error("stream ended prematurely while parsing quoted cell.", offset());
266}
267
268template<typename _Handler>
269void csv_parser<_Handler>::push_cell_value(const char* p, size_t n)
270{
271 size_t len = n;
272
273 if (m_config.trim_cell_value)
274 {
275 // Trim any leading blanks.
276 for (size_t i = 0; i < n; ++i, --len, ++p)
277 {
278 if (!is_blank(*p))
279 break;
280 }
281
282 // Trim any trailing blanks.
283 if (len)
284 {
285 const char* p_end = p + (len-1);
286 for (; p != p_end; --p_end, --len)
287 {
288 if (!is_blank(*p_end))
289 break;
290 }
291 }
292 }
293
294 m_handler.cell({p, len}, false);
295#if ORCUS_DEBUG_CSV
296 if (len)
297 cout << "(cell:'" << std::string(p, len) << "')" << endl;
298 else
299 cout << "(cell:'')" << endl;
300#endif
301}
302
303}
304
305#endif
306/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
Definition csv_parser_base.hpp:58
Definition csv_parser.hpp:16
void end_row()
Definition csv_parser.hpp:36
void end_parse()
Definition csv_parser.hpp:26
void begin_row()
Definition csv_parser.hpp:31
void begin_parse()
Definition csv_parser.hpp:21
void cell(std::string_view value, bool transient)
Definition csv_parser.hpp:49
Definition csv_parser.hpp:63
Definition exception.hpp:94
Definition parser_base.hpp:23
Definition config.hpp:20
Definition csv_parser_base.hpp:37