Argo  1.0
A C++ library for handling JSON.
lexer.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017 Andrew Haisley
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a copy
5  * of this software and associated documentation files (the "Software"), to deal
6  * in the Software without restriction, including without limitation the rights
7  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8  * copies of the Software, and to permit persons to whom the Software is
9  * furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in all
12  * copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20  * SOFTWARE.
21  */
22 
24 
25 #include "common.hpp"
26 #include "lexer.hpp"
28 
29 using namespace std;
30 using namespace NAMESPACE;
31 
32 lexer::lexer(reader &r, size_t max_token_length) :
33  m_reader(r),
34  m_last_put_back(false),
35  m_max_token_length(max_token_length),
36  m_buffer(new char[max_token_length])
37 {
38 }
39 
41 {
42  delete [] m_buffer;
43 }
44 
45 void lexer::consume_white_space()
46 {
47  while (true)
48  {
49  int c = m_reader.next();
50 
51  if ((c == 0x20) || (c == 0x09) || (c == 0x0A) || (c == 0x0D))
52  {
53  continue;
54  }
55  else
56  {
57  m_reader.put_back(c);
58  return;
59  }
60  }
61 }
62 
63 void lexer::read_token()
64 {
65  consume_white_space();
66  int c = m_reader.next();
67 
68  if (isdigit(c))
69  {
70  m_reader.put_back(c);
71  read_number();
72  }
73  else
74  {
75  switch (c)
76  {
77  case '[':
78  m_token = token(token::begin_array_e);
79  break;
80  case ']':
81  m_token = token(token::end_array_e);
82  break;
83  case '{':
84  m_token = token(token::begin_object_e);
85  break;
86  case '}':
87  m_token = token(token::end_object_e);
88  break;
89  case ':':
90  m_token = token(token::name_separator_e);
91  break;
92  case ',':
94  break;
95  case '-':
96  m_reader.put_back(c);
97  read_number();
98  break;
99  case '"':
100  read_string();
101  break;
102  case 'f':
103  read_false();
104  break;
105  case 't':
106  read_true();
107  break;
108  case 'n':
109  read_null();
110  break;
111  case EOF:
113  default:
115  }
116  }
117 }
118 
119 void lexer::append_to_number_buffer(char *s, size_t &index, int c)
120 {
121  if (index >= m_max_token_length)
122  {
123  throw json_parser_exception(
125  m_max_token_length,
126  m_reader.get_byte_index());
127  }
128 
129  s[index++] = c;
130 }
131 
132 void lexer::throw_number_exception(int c)
133 {
134  if (c == EOF)
135  {
137  }
138  else
139  {
140  throw json_parser_exception(
142  static_cast<char>(c),
143  m_reader.get_byte_index());
144  }
145 }
146 
147 size_t lexer::read_digits(size_t &index)
148 {
149  size_t num_digits = 0;
150 
151  while (true)
152  {
153  int c = m_reader.next();
154  if (isdigit(c))
155  {
156  append_to_number_buffer(m_buffer, index, c);
157  num_digits++;
158  }
159  else
160  {
161  m_reader.put_back(c);
162  return num_digits;
163  }
164  }
165 }
166 
167 void lexer::read_number()
168 {
169  int c = m_reader.next();
170 
171  size_t n = 0;
172  bool is_double = false;
173  size_t num_int_digits = 0;
174 
175  // - or digit
176  if (isdigit(c))
177  {
178  append_to_number_buffer(m_buffer, n, c);
179  num_int_digits++;
180  }
181  else if (c == '-')
182  {
183  append_to_number_buffer(m_buffer, n, c);
184  }
185  else
186  {
187  throw_number_exception(c);
188  }
189 
190  // some digits
191  while (true)
192  {
193  c = m_reader.next();
194 
195  if (isdigit(c))
196  {
197  append_to_number_buffer(m_buffer, n, c);
198  num_int_digits++;
199  }
200  else
201  {
202  m_reader.put_back(c);
203  break;
204  }
205  }
206 
207  if (num_int_digits == 0)
208  {
209  // Must be at least one digit in the buffer
210  throw_number_exception(c);
211  }
212  else if (num_int_digits > 1 && m_buffer[n - num_int_digits] == '0')
213  {
214  // If there's more than one digit, the first one must not be zero. No leading zeroes
215  // are allowed by the standard.
216  throw_number_exception('0');
217  }
218 
219  c = m_reader.next();
220 
221  // maybe followed by a point
222  if (c == '.')
223  {
224  append_to_number_buffer(m_buffer, n, c);
225  is_double = true;
226 
227  // followed by more digits
228  if (read_digits(n) == 0)
229  {
230  throw_number_exception(c);
231  }
232 
233  c = m_reader.next();
234  }
235 
236  // e/E
237  if (c == 'e' || c == 'E')
238  {
239  append_to_number_buffer(m_buffer, n, c);
240  is_double = true;
241 
242  // +/-
243  c = m_reader.next();
244 
245  if (c == '+' || c == '-')
246  {
247  append_to_number_buffer(m_buffer, n, c);
248 
249  // some digits
250  if (read_digits(n) == 0)
251  {
252  throw_number_exception(c);
253  }
254  }
255  else
256  {
257  throw_number_exception(c);
258  }
259  }
260  else
261  {
262  m_reader.put_back(c);
263  }
264 
265  m_token = token(is_double ? token::number_double_e : token::number_int_e, m_buffer, n);
266 }
267 
268 void lexer::read_string()
269 {
270  size_t n = 0;
271  bool in_escape = false;
272 
273  while (true)
274  {
275  int c;
276 
277  c = m_reader.next();
278  if (c <= 0xf)
279  {
280  throw json_parser_exception(
282  static_cast<char>(c),
283  m_reader.get_byte_index());
284  }
285  else if (c == '"' && !in_escape)
286  {
287  m_token = token(token::string_e, m_buffer, n);
288  return;
289  }
290  else
291  {
292  if (n >= m_max_token_length)
293  {
294  throw json_parser_exception(
296  m_max_token_length,
297  m_reader.get_byte_index());
298  }
299  else
300  {
301  m_buffer[n++] = c;
302  }
303  }
304  in_escape = (c == '\\');
305  }
306 }
307 
308 void lexer::read_false()
309 {
310  read_matching("alse");
311  m_token = token(token::false_e);
312 }
313 
314 void lexer::read_true()
315 {
316  read_matching("rue");
317  m_token = token(token::true_e);
318 }
319 
320 void lexer::read_null()
321 {
322  read_matching("ull");
323  m_token = token(token::null_e);
324 }
325 
326 void lexer::read_matching(const char *s)
327 {
328  int i = 0;
329  while (s[i] != 0)
330  {
331  char c = m_reader.next();
332 
333  if (c != s[i++])
334  {
336  }
337  }
338 }
339 
341 {
342  if (m_last_put_back)
343  {
344  m_last_put_back = false;
345  }
346  else
347  {
348  read_token();
349  }
350 
351  return m_token;
352 }
353 
355 {
356  m_last_put_back = true;
357 }
The json_parser_exception class.
Exception class for parser errors.
A class to read JSON messages from various types of input stream.
Definition: reader.hpp:42
virtual int next()
Get the next character from the reader.
Definition: reader.cpp:54
The lexer class.
#define NAMESPACE
You can change the namespace of the whole library by changing this value.
Definition: common.hpp:29
STL namespace.
A string had too many characters in it.
A number had too many characters in it.
sting in JSON format
Definition: token.hpp:65
floating point number
Definition: token.hpp:63
Common defs needed everywhere and, as far as is possible, platform specific changes.
const token & next()
Definition: lexer.cpp:340
void put_back_last()
Definition: lexer.cpp:354
The file ended where more tokens were expected.
Lexical tokens read from a JSON message.
Definition: token.hpp:41
size_t get_byte_index() const
Get the current byte index in the input.
Definition: reader.cpp:87
virtual ~lexer()
Definition: lexer.cpp:40
Something that looked like a number ended up not being (e.g. 1234AAAA).
An invalid character was found during parsing (e.g. ! outside of a string).
whole number
Definition: token.hpp:61
virtual void put_back(int c)
Put back a character so that it is returned by the next call to next().
Definition: reader.cpp:75