Argo  1.0
A C++ library for handling JSON.
parser.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017 Andrew Haisley
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a copy
5  * of this software and associated documentation files (the "Software"), to deal
6  * in the Software without restriction, including without limitation the rights
7  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8  * copies of the Software, and to permit persons to whom the Software is
9  * furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in all
12  * copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20  * SOFTWARE.
21  */
22 
24 
25 #include <math.h>
26 
27 #include <sstream>
28 #include <fstream>
29 
30 #include "common.hpp"
31 #include "parser.hpp"
32 #include "lexer.hpp"
33 #include "utf8.hpp"
35 #include "stream_reader.hpp"
36 #include "file_reader.hpp"
37 #include "json_utf8_exception.hpp"
38 #include "json_io_exception.hpp"
39 
40 #ifndef _ARGO_WINDOWS_
41 #include "fd_reader.hpp"
42 #endif
43 
44 using namespace std;
45 using namespace NAMESPACE;
46 
47 parser::parser(
48  reader &r,
49  bool read_all,
50  size_t p_max_token_length,
51  size_t p_max_nesting_depth,
52  bool p_convert_numbers,
53  bool p_fallback_to_double,
54  bool p_convert_strings) :
55  m_reader(r),
56  m_read_all(read_all),
57  m_max_token_length(p_max_token_length),
58  m_max_nesting_depth(p_max_nesting_depth),
59  m_convert_numbers(p_convert_numbers),
60  m_fallback_to_double(p_fallback_to_double),
61  m_convert_strings(p_convert_strings)
62 {
63 }
64 
65 unique_ptr<json> parser::parse_number_int(const token &t)
66 {
67  istringstream is(t.get_raw_value());
68 
69  int i;
70 
71  if (is >> i)
72  {
73  return unique_ptr<json>(new json(i));
74  }
75  else
76  {
77  // We know the int is syntactically correct so this has to be a range error.
78  if (m_fallback_to_double)
79  {
80  return parse_number_double(t);
81  }
82  else
83  {
86  t.get_raw_value(),
87  m_reader.get_byte_index());
88  }
89  }
90 }
91 
92 unique_ptr<json> parser::parse_number_double(const token &t)
93 {
94  istringstream is(t.get_raw_value());
95  double d;
96 
97  if (is >> d && isfinite(d))
98  {
99  return unique_ptr<json>(new json(d));
100  }
101  else
102  {
103  // We know the float is syntactically correct so this has to be a range error.
104  throw json_parser_exception(
106  t.get_raw_value(),
107  m_reader.get_byte_index());
108  }
109 }
110 
111 unique_ptr<json> parser::parse_value(lexer &l, size_t nesting_depth)
112 {
113  const token &t = l.next();
114 
115  switch (t.get_type())
116  {
118  return parse_object(l, nesting_depth + 1);
120  return parse_array(l, nesting_depth + 1);
121  case token::number_int_e:
122  if (m_convert_numbers)
123  {
124  return parse_number_int(t);
125  }
126  else
127  {
128  return unique_ptr<json>(new json(json::number_int_e, t.get_raw_value()));
129  }
131  if (m_convert_numbers)
132  {
133  return parse_number_double(t);
134  }
135  else
136  {
137  return unique_ptr<json>(new json(json::number_double_e, t.get_raw_value()));
138  }
139  case token::string_e:
140  if (m_convert_strings)
141  {
142  try
143  {
144  return unique_ptr<json>(new json(utf8::json_string_to_utf8(t.get_raw_value())));
145  }
146  catch (json_utf8_exception &e)
147  {
148  e.add_byte_index(m_reader.get_byte_index());
149  throw e;
150  }
151  }
152  else
153  {
154  return unique_ptr<json>(new json(json::string_e, t.get_raw_value()));
155  }
156  case token::false_e:
157  return unique_ptr<json>(new json(false));
158  case token::true_e:
159  return unique_ptr<json>(new json(true));
160  case token::null_e:
161  return unique_ptr<json>(new json);
162  default:
163  throw json_parser_exception(
165  t.get_raw_value(),
166  m_reader.get_byte_index());
167  }
168 }
169 
170 unique_ptr<json> parser::parse_array(lexer &l, size_t nesting_depth)
171 {
172  if (nesting_depth > m_max_nesting_depth)
173  {
174  throw json_parser_exception(
176  m_max_nesting_depth,
177  m_reader.get_byte_index());
178  }
179 
180  unique_ptr<json> array = unique_ptr<json>(new json(json::array_e));
181 
182  const token &t1 = l.next();
183 
184  if (t1.get_type() == token::end_array_e)
185  {
186  return array;
187  }
188  else
189  {
190  l.put_back_last();
191  array->get_array().push_back(parse_value(l, nesting_depth));
192  }
193 
194  while (true)
195  {
196  const token &t2 = l.next();
197 
199  {
200  array->get_array().push_back(parse_value(l, nesting_depth));
201  }
202  else if (t2.get_type() == token::end_array_e)
203  {
204  return array;
205  }
206  else
207  {
208  throw json_parser_exception(
210  t2.get_raw_value(),
211  m_reader.get_byte_index());
212  }
213  }
214 
215  return array;
216 }
217 
218 void parser::parse_name_value_pair(lexer &l, unique_ptr<json> &object, size_t nesting_depth)
219 {
220  const token &t1 = l.next();
221 
222  string name;
223 
224  if (t1.get_type() == token::string_e)
225  {
226  if (m_convert_strings)
227  {
229  }
230  else
231  {
232  name = t1.get_raw_value();
233  }
234  }
235  else
236  {
237  throw json_parser_exception(
239  t1.get_raw_value(),
240  m_reader.get_byte_index());
241  }
242 
243  const token &t2 = l.next();
244 
245  if (t2.get_type() != token::name_separator_e)
246  {
247  throw json_parser_exception(
249  t2.get_raw_value(),
250  m_reader.get_byte_index());
251  }
252 
253  object->get_object()[name] = parse_value(l, nesting_depth);
254 }
255 
256 unique_ptr<json> parser::parse_object(lexer &l, size_t nesting_depth)
257 {
258  if (nesting_depth > m_max_nesting_depth)
259  {
260  throw json_parser_exception(
262  m_max_nesting_depth,
263  m_reader.get_byte_index());
264  }
265 
266  unique_ptr<json> object = unique_ptr<json>(new json(json::object_e));
267 
268  const token &t1 = l.next();
269 
270  // check for empty object
271  if (t1.get_type() == token::end_object_e)
272  {
273  return object;
274  }
275  else
276  {
277  l.put_back_last();
278  }
279 
280  while (true)
281  {
282  parse_name_value_pair(l, object, nesting_depth);
283 
284  const token &t2 = l.next();
285 
286  if (t2.get_type() == token::end_object_e)
287  {
288  return object;
289  }
290  else if (t2.get_type() == token::value_separator_e)
291  {
292  continue;
293  }
294  else
295  {
296  throw json_parser_exception(
298  t2.get_raw_value(),
299  m_reader.get_byte_index());
300  }
301  }
302 
303  return object;
304 }
305 
306 unique_ptr<json> parser::parse()
307 {
308  m_reader.reset_byte_index();
309 
310  lexer l(m_reader, m_max_token_length);
311 
312  auto res = parse_value(l, 0);
313 
314  if (m_read_all)
315  {
316  // check that there isn't anything other than whitespace left
317  int c;
318 
319  while ((c = m_reader.next()) != EOF)
320  {
321  if ((c != 0x20) && (c != 0x09) && (c != 0x0A) && (c != 0x0D))
322  {
324  }
325  }
326  }
327  return res;
328 }
329 
330 unique_ptr<json> parser::parse(istream &i)
331 {
332  stream_reader r(&i, max_message_length, true);
333  parser p(r);
334  return p.parse();
335 }
336 
337 #ifndef _ARGO_WINDOWS_
338 unique_ptr<json> parser::parse(int fd)
339 {
340  fd_reader r(fd, max_message_length, true);
341  parser p(r);
342  return p.parse();
343 }
344 #endif
345 
346 unique_ptr<json> parser::parse(FILE *s)
347 {
348  file_reader r(s, max_message_length, true);
349  parser p(r);
350  return p.parse();
351 }
352 
353 unique_ptr<json> parser::parse(const char *s)
354 {
355  istringstream i(s);
356  return parse(i);
357 }
358 
359 unique_ptr<json> parser::parse(const string &s)
360 {
361  istringstream i(s);
362  return parse(i);
363 }
364 
365 unique_ptr<json> parser::load(const string &file_name)
366 {
367  ifstream is(file_name);
368 
369  if (is)
370  {
371  return parse(is);
372  }
373  else
374  {
376  }
377 }
378 
379 istream &NAMESPACE::operator>>(istream &stream, json &j)
380 {
381  j = move(*parser::parse(stream));
382  return stream;
383 }
384 
385 void NAMESPACE::operator>>(string &s, json &j)
386 {
387  j = move(*parser::parse(s));
388 }
A derived class of reader that reads from stdio FILEs.
Definition: file_reader.hpp:35
The json_parser_exception class.
static const size_t max_message_length
The default maximum length of a message that can be parsed.
Definition: parser.hpp:60
const std::string & get_raw_value() const
Get the raw untranslated JSON value.
Definition: token.cpp:78
Exception class for parser errors.
A class to read JSON messages from various types of input stream.
Definition: reader.hpp:42
virtual int next()
Get the next character from the reader.
Definition: reader.cpp:54
The lexer class.
The fd_reader class.
#define NAMESPACE
You can change the namespace of the whole library by changing this value.
Definition: common.hpp:29
Specific class of exceptions for IO errors of various types.
A recursive decent parser for JSON messages.
Definition: parser.hpp:55
STL namespace.
A number was too large or small to convert accrurately to an int or double.
A derived class of reader that reads from POSIX file descriptors.
Definition: fd_reader.hpp:33
void add_byte_index(size_t byte_index) noexcept
Add the byte index where the error occured to the exception.
The file_reader class.
sting in JSON format
Definition: token.hpp:65
floating point number
Definition: token.hpp:63
Common defs needed everywhere and, as far as is possible, platform specific changes.
const token & next()
Definition: lexer.cpp:340
void put_back_last()
Definition: lexer.cpp:354
The json_utf8_exception class.
static std::unique_ptr< json > load(const std::string &file_name)
Definition: parser.cpp:365
The parser class.
Lexical tokens read from a JSON message.
Definition: token.hpp:41
std::unique_ptr< json > parse()
Definition: parser.cpp:306
size_t get_byte_index() const
Get the current byte index in the input.
Definition: reader.cpp:87
All json things are represented by instances of this class.
Definition: json.hpp:63
A lexical analyser for JSON messages.
Definition: lexer.hpp:41
The stream_reader class.
static std::unique_ptr< std::string > json_string_to_utf8(const std::string &src)
Definition: utf8.cpp:163
A class to read JSON messages from C++ istreams.
An invalid character was found during parsing (e.g. ! outside of a string).
whole number
Definition: token.hpp:61
The utf8 class.
Exception class for errors translating to and from UTF-8 strings.
A valid, but unexpected, lexical token was found (e.g. { {).
token_type get_type() const
Get the token type.
Definition: token.cpp:83
static std::unique_ptr< json > parse(std::istream &i)
Definition: parser.cpp:330
The json_io_exception class.
void reset_byte_index()
Reset the byte index at the start of parsing a messege.
Definition: reader.cpp:92