Orcus
Loading...
Searching...
No Matches
csv_parser.hpp
1/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2/*
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
6 */
7
8#ifndef ORCUS_CSV_PARSER_HPP
9#define ORCUS_CSV_PARSER_HPP
10
11#include "csv_parser_base.hpp"
12
13namespace orcus {
14
16{
17public:
21 void begin_parse() {}
22
26 void end_parse() {}
27
31 void begin_row() {}
32
36 void end_row() {}
37
50 void cell(const char* p, size_t n, bool transient)
51 {
52 (void)p; (void)n; (void)transient;
53 }
54};
55
56template<typename _Handler>
58{
59public:
60 typedef _Handler handler_type;
61
62 csv_parser(const char* p, size_t n, handler_type& hdl, const csv::parser_config& config);
63 void parse();
64
65private:
66
67 // handlers
68 void row();
69 void cell();
70 void quoted_cell();
71
72 void parse_cell_with_quote(const char* p0, size_t len0);
73
77 void push_cell_value(const char* p, size_t n);
78
79private:
80 handler_type& m_handler;
81};
82
83template<typename _Handler>
85 const char* p, size_t n, handler_type& hdl, const csv::parser_config& config) :
86 csv::parser_base(p, n, config), m_handler(hdl) {}
87
88template<typename _Handler>
89void csv_parser<_Handler>::parse()
90{
91#if ORCUS_DEBUG_CSV
92 for (const char* p = mp_begin; p < mp_end; ++p)
93 std::cout << *p;
94 std::cout << std::endl;
95#endif
96
97 m_handler.begin_parse();
98 while (has_char())
99 row();
100 m_handler.end_parse();
101}
102
103template<typename _Handler>
104void csv_parser<_Handler>::row()
105{
106 m_handler.begin_row();
107 while (true)
108 {
109 if (is_text_qualifier(cur_char()))
110 quoted_cell();
111 else
112 cell();
113
114 if (!has_char())
115 {
116 m_handler.end_row();
117 return;
118 }
119
120 char c = cur_char();
121 if (c == '\n')
122 {
123 next();
124#if ORCUS_DEBUG_CSV
125 cout << "(LF)" << endl;
126#endif
127 m_handler.end_row();
128 return;
129 }
130
131 if (!is_delim(c))
132 throw orcus::csv::parse_error("expected a delimiter");
133
134 next();
135
136 if (m_config.trim_cell_value)
137 skip_blanks();
138
139 if (!has_char())
140 {
141 m_handler.end_row();
142 return;
143 }
144 }
145}
146
147template<typename _Handler>
148void csv_parser<_Handler>::cell()
149{
150 const char* p = mp_char;
151 size_t len = 0;
152 char c = cur_char();
153 while (c != '\n' && !is_delim(c))
154 {
155 ++len;
156 next();
157 if (!has_char())
158 break;
159 c = cur_char();
160 }
161
162 if (!len)
163 p = nullptr;
164
165 push_cell_value(p, len);
166}
167
168template<typename _Handler>
169void csv_parser<_Handler>::quoted_cell()
170{
171#if ORCUS_DEBUG_CSV
172 cout << "--- quoted cell" << endl;
173#endif
174 char c = cur_char();
175 assert(is_text_qualifier(c));
176 next(); // Skip the opening quote.
177 if (!has_char())
178 return;
179
180 const char* p0 = mp_char;
181 size_t len = 1;
182 for (; has_char(); next(), ++len)
183 {
184 c = cur_char();
185#if ORCUS_DEBUG_CSV
186 cout << "'" << c << "'" << endl;
187#endif
188 if (!is_text_qualifier(c))
189 continue;
190
191 // current char is a quote. Check if the next char is also a text
192 // qualifier.
193
194 if (has_next() && is_text_qualifier(next_char()))
195 {
196 next();
197 parse_cell_with_quote(p0, len);
198 return;
199 }
200
201 // Closing quote.
202 m_handler.cell(p0, len-1, false);
203 next();
204 skip_blanks();
205 return;
206 }
207
208 // Stream ended prematurely. Handle it gracefully.
209 m_handler.cell(p0, len, false);
210}
211
212template<typename _Handler>
213void csv_parser<_Handler>::parse_cell_with_quote(const char* p0, size_t len0)
214{
215#if ORCUS_DEBUG_CSV
216 using namespace std;
217 cout << "--- parse cell with quote" << endl;
218#endif
219 assert(is_text_qualifier(cur_char()));
220
221 // Push the preceding chars to the temp buffer.
222 m_cell_buf.reset();
223 m_cell_buf.append(p0, len0);
224
225 // Parse the rest, until the closing quote.
226 next();
227 const char* p_cur = mp_char;
228 size_t cur_len = 0;
229 for (; has_char(); next(), ++cur_len)
230 {
231 char c = cur_char();
232#if ORCUS_DEBUG_CSV
233 cout << "'" << c << "'" << endl;
234#endif
235 if (!is_text_qualifier(c))
236 continue;
237
238 if (has_next() && is_text_qualifier(next_char()))
239 {
240 // double quotation. Copy the current segment to the cell buffer.
241 m_cell_buf.append(p_cur, cur_len);
242
243 next(); // to the 2nd quote.
244 p_cur = mp_char;
245 cur_len = 0;
246 continue;
247 }
248
249 // closing quote. Flush the current segment to the cell
250 // buffer, push the value to the handler, and exit normally.
251 m_cell_buf.append(p_cur, cur_len);
252
253 m_handler.cell(m_cell_buf.get(), m_cell_buf.size(), true);
254 next();
255 skip_blanks();
256 return;
257 }
258
259 // Stream ended prematurely.
260 throw csv::parse_error("stream ended prematurely while parsing quoted cell.");
261}
262
263template<typename _Handler>
264void csv_parser<_Handler>::push_cell_value(const char* p, size_t n)
265{
266 size_t len = n;
267
268 if (m_config.trim_cell_value)
269 {
270 // Trim any leading blanks.
271 for (size_t i = 0; i < n; ++i, --len, ++p)
272 {
273 if (!is_blank(*p))
274 break;
275 }
276
277 // Trim any trailing blanks.
278 if (len)
279 {
280 const char* p_end = p + (len-1);
281 for (; p != p_end; --p_end, --len)
282 {
283 if (!is_blank(*p_end))
284 break;
285 }
286 }
287 }
288
289 m_handler.cell(p, len, false);
290#if ORCUS_DEBUG_CSV
291 if (len)
292 cout << "(cell:'" << std::string(p, len) << "')" << endl;
293 else
294 cout << "(cell:'')" << endl;
295#endif
296}
297
298}
299
300#endif
301/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
Definition csv_parser_base.hpp:58
Definition csv_parser_base.hpp:67
Definition csv_parser.hpp:16
void end_row()
Definition csv_parser.hpp:36
void end_parse()
Definition csv_parser.hpp:26
void begin_row()
Definition csv_parser.hpp:31
void cell(const char *p, size_t n, bool transient)
Definition csv_parser.hpp:50
void begin_parse()
Definition csv_parser.hpp:21
Definition csv_parser.hpp:58
Definition parser_base.hpp:41
Definition config.hpp:20
Definition csv_parser_base.hpp:37