json_parser.hpp
Go to the documentation of this file.
1 #ifndef STAN_IO_JSON_JSON_PARSER_HPP
2 #define STAN_IO_JSON_JSON_PARSER_HPP
3 
4 #include <boost/lexical_cast.hpp>
5 
8 
9 #include <stdexcept>
10 #include <iostream>
11 #include <istream>
12 #include <sstream>
13 #include <string>
14 
15 namespace stan {
16 
17  namespace json {
18 
19  const unsigned int MIN_HIGH_SURROGATE = 0xD800;
20  const unsigned int MAX_HIGH_SURROGATE = 0xDBFF;
21  const unsigned int MIN_LOW_SURROGATE = 0xDC00;
22  const unsigned int MAX_LOW_SURROGATE = 0xDFFF;
23  const unsigned int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000;
24 
25  inline bool is_high_surrogate(unsigned int cp) {
26  return (cp >= MIN_HIGH_SURROGATE && cp <= MAX_HIGH_SURROGATE);
27  }
28 
29  inline bool is_low_surrogate(unsigned int cp) {
30  return (cp >= MIN_LOW_SURROGATE && cp <= MAX_LOW_SURROGATE);
31  }
32 
33  inline bool is_whitespace(char c) {
34  return c == ' ' || c == '\n' || c == '\t' || c == '\r';
35  }
36 
37  /**
38  * A <code>json_parser</code> is a SAX-style streaming parser
39  * that enforces JSON syntax and parses JSON elements
40  * from an input stream, sending callbacks to a user-supplied
41  * <code>json_handler</code>.
42  */
43  template <typename Handler, bool Validate_UTF_8>
44  class parser {
45  public:
46  parser(Handler& h,
47  std::istream& in)
48  : h_(h),
49  in_(in),
50  next_char_(0),
51  line_(0),
52  column_(0)
53  { }
54 
55  ~parser() {
56  }
57 
58  void parse() {
59  h_.start_text();
60  parse_text();
61  h_.end_text();
62  }
63 
64  private:
66  std::stringstream ss;
67  ss << "Error in JSON parsing at"
68  << " line=" << line_ << " column=" << column_
69  << std::endl
70  << msg
71  << std::endl;
72  return json_error(ss.str());
73  }
74 
75  // JSON-text = object / array
76  void parse_text() {
77  char c = get_non_ws_char();
78  if (c == '{') { // begin-object
79  h_.start_object();
81  h_.end_object();
82  } else if (c == '[') { // begin-array
83  // array
84  h_.start_array();
86  h_.end_array();
87  } else {
88  throw json_exception("expecting start of object ({) or array ([)");
89  }
90  }
91 
92  // value = false / null / true / object / array / number / string
93  void parse_value() {
94  // value
95  char c = get_non_ws_char();
96  if (c == 'f') {
97  // false
99  } else if (c == 'n') {
100  // null
102  } else if (c == 't') {
103  // true
105  } else if (c == '"') {
106  // string
108  } else if (c == '{' || c == '[') {
109  // object / array
110  unget_char();
111  parse_text();
112  } else if (c == '-' ||
113  (c >= '0' && c <= '9') ) {
114  unget_char();
115  parse_number();
116  } else {
117  throw json_exception("illegal value, expecting object, array, "
118  "number, string, or literal true/false/null");
119  }
120  }
121 
122  void parse_number() {
123  bool is_positive = true;
124 
125  std::stringstream ss;
126  char c = get_non_ws_char();
127  // minus
128  if (c == '-') {
129  is_positive = false;
130  ss << c;
131  c = get_char();
132  }
133 
134  // int
135  // zero / digit1-9
136  if (c < '0' || c > '9')
137  throw json_exception("expecting int part of number");
138  ss << c;
139 
140  // *DIGIT
141  bool leading_zero = (c == '0');
142  c = get_char();
143  if (leading_zero && (c == '0'))
144  throw json_exception("zero padded numbers not allowed");
145  while (c >= '0' && c <= '9') {
146  ss << c;
147  c = get_char();
148  }
149 
150  // frac
151  bool is_integer = true;
152  if (c == '.') {
153  is_integer = false;
154  ss << '.';
155  c = get_char();
156  if (c < '0' || c > '9')
157  throw json_exception("expected digit after decimal");
158  ss << c;
159  c = get_char();
160  while (c >= '0' && c <= '9') {
161  ss << c;
162  c = get_char();
163  }
164  }
165 
166  // exp
167  if (c == 'e' || c == 'E') {
168  is_integer = false;
169  ss << c;
170  c = get_char();
171  // minus / plus
172  if (c == '+' || c == '-') {
173  ss << c;
174  c = get_char();
175  }
176  // 1*DIGIT
177  if (c < '0' || c > '9')
178  throw json_exception("expected digit after e/E");
179  while (c >= '0' && c <= '9') {
180  ss << c;
181  c = get_char();
182  }
183  }
184  unget_char();
185 
186  if (is_integer) {
187  if (is_positive) {
188  unsigned long n; // NOLINT(runtime/int)
189  try {
190  // NOLINTNEXTLINE(runtime/int)
191  n = boost::lexical_cast<unsigned long>(ss.str());
192  } catch (const boost::bad_lexical_cast & ) {
193  throw json_exception("number exceeds integer range");
194  }
195  ss >> n;
196  h_.number_unsigned_long(n);
197  } else {
198  long n; // NOLINT(runtime/int)
199  try {
200  // NOLINTNEXTLINE(runtime/int)
201  n = boost::lexical_cast<unsigned long>(ss.str());
202  } catch (const boost::bad_lexical_cast & ) {
203  throw json_exception("number exceeds integer range");
204  }
205  ss >> n;
206  h_.number_long(n);
207  }
208  } else {
209  double x;
210  try {
211  std::string ss_str = ss.str();
212  x = boost::lexical_cast<double>(ss_str);
213  if (x == 0)
214  io::validate_zero_buf(ss_str);
215  } catch (const boost::bad_lexical_cast & ) {
216  throw json_exception("number exceeds double range");
217  }
218  ss >> x;
219  h_.number_double(x);
220  }
221  }
222 
224  std::stringstream s;
225  while (true) {
226  char c = get_char();
227  if (c == '"') {
228  return s.str();
229  } else if (c == '\\') {
230  c = get_char();
231  if (c == '\\' || c == '/' || c == '"') {
232  s << c;
233  } else if (c == 'b') {
234  s << '\b';
235  } else if (c == 'f') {
236  s << '\f';
237  } else if (c == 'n') {
238  s << '\n';
239  } else if (c == 'r') {
240  s << '\r';
241  } else if (c == 't') {
242  s << '\t';
243  } else if (c == 'u') {
245  } else {
246  throw json_exception("expecting legal escape");
247  }
248  continue;
249  } else if (c > 0 && c < 0x20) { // ASCII control characters
250  throw json_exception("found control character, char values less "
251  "than U+0020 must be \\u escaped");
252  }
253  s << c;
254  }
255  }
256 
258  get_chars("rue");
259  h_.boolean(true);
260  }
261 
263  get_chars("alse");
264  h_.boolean(false);
265  }
266 
268  get_chars("ull");
269  h_.null();
270  }
271 
272  void get_escaped_unicode(std::stringstream& s) {
273  unsigned int codepoint = get_int_as_hex_chars();
274  if (!(is_high_surrogate(codepoint) || is_low_surrogate(codepoint))) {
275  putCodepoint(s, codepoint);
276  } else if (!is_high_surrogate(codepoint)) {
277  throw json_exception("illegal unicode values, found "
278  "low-surrogate, missing high-surrogate");
279  } else {
280  char c = get_char();
281  if (!(c == '\\'))
282  throw json_exception("illegal unicode values, found "
283  "high-surrogate, expecting low-surrogate");
284  c = get_char();
285  if (!(c == 'u'))
286  throw json_exception("illegal unicode values, found "
287  "high-surrogate, expecting low-surrogate");
288  unsigned int codepoint2 = get_int_as_hex_chars();
289  unsigned int supplemental
290  = ((codepoint - MIN_HIGH_SURROGATE) << 10)
291  + (codepoint2 - MIN_LOW_SURROGATE)
292  + MIN_SUPPLEMENTARY_CODE_POINT;
293  putCodepoint(s, supplemental);
294  }
295  }
296 
297  unsigned int get_int_as_hex_chars() {
298  std::stringstream s;
299  s << std::hex;
300  for (int i = 0; i < 4; i++) {
301  char c = get_char();
302  if (!((c >= 'a' && c<= 'f')
303  || (c >= 'A' && c<= 'F')
304  || (c >= '0' && c<= '9')))
305  throw json_exception("illegal unicode code point");
306  s << c;
307  }
308  unsigned int hex;
309  s >> hex;
310  return hex;
311  }
312 
313  void putCodepoint(std::stringstream& s, unsigned int codepoint) {
314  if (codepoint <= 0x7f) {
315  s.put(codepoint);
316  } else if (codepoint <= 0x7ff) {
317  s.put(0xc0 | ((codepoint >> 6) & 0x1f));
318  s.put(0x80 | (codepoint & 0x3f));
319  } else if (codepoint <= 0xffff) {
320  s.put(0xe0 | ((codepoint >> 12) & 0x0f));
321  s.put(0x80 | ((codepoint >> 6) & 0x3f));
322  s.put(0x80 | (codepoint & 0x3f));
323  } else {
324  s.put(0xf0 | ((codepoint >> 18) & 0x07));
325  s.put(0x80 | ((codepoint >> 12) & 0x3f));
326  s.put(0x80 | ((codepoint >> 6) & 0x3f));
327  s.put(0x80 | (codepoint & 0x3f));
328  }
329  }
330 
331  void get_chars(const std::string& s) {
332  for (size_t i = 0; i < s.size(); ++i) {
333  char c = get_char();
334  if (c != s[i])
335  throw json_exception("expecting rest of literal: "
336  + s.substr(i));
337  }
338  }
339 
341  char c = get_non_ws_char();
342  if (c == ']') return;
343  unget_char();
344  while (true) {
345  parse_value();
346  char c = get_non_ws_char();
347  if (c == ']') return;
348  if (c != ',') {
349  throw json_exception("in array, expecting ] or ,");
350  }
351  c = get_non_ws_char();
352  if (c == ']')
353  throw json_exception("in array, expecting value");
354  unget_char();
355  }
356  }
357 
359  char c = get_non_ws_char();
360  if (c == '}') return;
361  while (true) {
362  // string (key)
363  if (c != '"')
364  throw json_exception("expecting member key"
365  " or end of object marker (})");
367  h_.key(key);
368  // name-separator separator
369  c = get_non_ws_char();
370  if (c != ':')
371  throw json_exception("expecting key-value separator :");
372  // value
373  parse_value();
374 
375  // continuation
376  c = get_non_ws_char();
377  if (c == '}')
378  return;
379  if (c != ',')
380  throw json_exception("expecting end of object } or separator ,");
381  c = get_non_ws_char();
382  }
383  }
384 
385  char get_char() {
386  char c = in_.get();
387  if (!in_.good())
388  throw json_exception("unexpected end of stream");
389  if (c == '\n') {
390  ++line_;
391  column_ = 1;
392  } else {
393  ++column_;
394  }
395  return c;
396  }
397 
399  while (true) {
400  char c = get_char();
401  if (is_whitespace(c)) continue;
402  return c;
403  }
404  }
405 
406  void unget_char() {
407  in_.unget();
408  --column_;
409  }
410 
411  Handler& h_;
412  std::istream& in_;
414  size_t line_;
415  size_t column_;
416  };
417 
418 
419  /**
420  * Parse the JSON text represented by the specified input stream,
421  * sending events to the specified handler, and optionally
422  * validating the UTF-8 encoding.
423  *
424  * @tparam Validate_UTF_8
425  * @tparam Handler
426  * @param in Input stream from which to parse
427  * @param handler Handler for events from parser
428  */
429  template <bool Validate_UTF_8, typename Handler>
430  void parse(std::istream& in,
431  Handler& handler) {
433  }
434 
435  /**
436  * Parse the JSON text represented by the specified input stream,
437  * sending events to the specified handler, and optionally
438  * validating the UTF-8 encoding.
439  *
440  * @tparam Handler
441  * @param in Input stream from which to parse
442  * @param handler Handler for events from parser
443  */
444  template <typename Handler>
445  void parse(std::istream& in,
446  Handler& handler) {
447  parse<false>(in, handler);
448  }
449 
450  }
451 }
452 #endif
void putCodepoint(std::stringstream &s, unsigned int codepoint)
std::string parse_string_chars_quotation_mark()
unsigned int get_int_as_hex_chars()
Float_t ss
Definition: plot.C:24
void get_chars(const std::string &s)
bool is_low_surrogate(unsigned int cp)
Definition: json_parser.hpp:29
json_error json_exception(const std::string &msg) const
Definition: json_parser.hpp:65
const XML_Char * s
Definition: expat.h:262
void get_escaped_unicode(std::stringstream &s)
const unsigned int MIN_HIGH_SURROGATE
Definition: json_parser.hpp:19
void parse_array_values_end_array()
void validate_zero_buf(const B &buf)
const unsigned int MAX_LOW_SURROGATE
Definition: json_parser.hpp:22
parser(Handler &h, std::istream &in)
Definition: json_parser.hpp:46
const unsigned int MAX_HIGH_SURROGATE
Definition: json_parser.hpp:20
::xsd::cxx::tree::string< char, simple_type > string
Definition: Database.h:154
ifstream in
Definition: comparison.C:7
const unsigned int MIN_SUPPLEMENTARY_CODE_POINT
Definition: json_parser.hpp:23
void parse_object_members_end_object()
std::istream & in_
bool is_high_surrogate(unsigned int cp)
Definition: json_parser.hpp:25
bool is_integer(T x)
Definition: is_integer.hpp:16
const unsigned int MIN_LOW_SURROGATE
Definition: json_parser.hpp:21
bool is_whitespace(char c)
Definition: json_parser.hpp:33