You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
lnav/src/yajl/yajl_parser.c

499 lines
20 KiB
C

/*
* Copyright (c) 2007-2014, Lloyd Hilaiel <me@lloyd.io>
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#include "api/yajl_parse.h"
#include "yajl_lex.h"
#include "yajl_parser.h"
#include "yajl_encode.h"
#include "yajl_bytestack.h"
#include <stdlib.h>
#include <limits.h>
#include <errno.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <assert.h>
#include <math.h>
#define MAX_VALUE_TO_MULTIPLY ((LLONG_MAX / 10) + (LLONG_MAX % 10))
/* same semantics as strtol */
long long
yajl_parse_integer(const unsigned char *number, unsigned int length)
{
long long ret = 0;
long sign = 1;
const unsigned char *pos = number;
if (*pos == '-') { pos++; sign = -1; }
if (*pos == '+') { pos++; }
while (pos < number + length) {
if ( ret > MAX_VALUE_TO_MULTIPLY ) {
errno = ERANGE;
return sign == 1 ? LLONG_MAX : LLONG_MIN;
}
ret *= 10;
if (LLONG_MAX - ret < (*pos - '0')) {
errno = ERANGE;
return sign == 1 ? LLONG_MAX : LLONG_MIN;
}
if (*pos < '0' || *pos > '9') {
errno = ERANGE;
return sign == 1 ? LLONG_MAX : LLONG_MIN;
}
ret += (*pos++ - '0');
}
return sign * ret;
}
unsigned char *
yajl_render_error_string(yajl_handle hand, const unsigned char * jsonText,
size_t jsonTextLen, int verbose)
{
size_t offset = hand->bytesConsumed;
unsigned char * str;
const char * errorType = NULL;
const char * errorText = NULL;
char text[72];
const char * arrow = " (right here) ------^\n";
if (yajl_bs_current(hand->stateStack) == yajl_state_parse_error) {
errorType = "parse";
errorText = hand->parseError;
} else if (yajl_bs_current(hand->stateStack) == yajl_state_lexical_error) {
errorType = "lexical";
errorText = yajl_lex_error_to_string(yajl_lex_get_error(hand->lexer));
} else {
errorType = "unknown";
}
{
size_t memneeded = 0;
memneeded += strlen(errorType);
memneeded += strlen(" error");
if (errorText != NULL) {
memneeded += strlen(": ");
memneeded += strlen(errorText);
}
str = (unsigned char *) YA_MALLOC(&(hand->alloc), memneeded + 2);
if (!str) return NULL;
str[0] = 0;
strcat((char *) str, errorType);
strcat((char *) str, " error");
if (errorText != NULL) {
strcat((char *) str, ": ");
strcat((char *) str, errorText);
}
strcat((char *) str, "\n");
}
/* now we append as many spaces as needed to make sure the error
* falls at char 41, if verbose was specified */
if (verbose) {
size_t start, end, i;
size_t spacesNeeded;
spacesNeeded = (offset < 30 ? 40 - offset : 10);
start = (offset >= 30 ? offset - 30 : 0);
end = (offset + 30 > jsonTextLen ? jsonTextLen : offset + 30);
for (i=0;i<spacesNeeded;i++) text[i] = ' ';
for (;start < end;start++, i++) {
if (jsonText[start] != '\n' && jsonText[start] != '\r' && jsonText[start] != '\t')
{
text[i] = jsonText[start];
}
else
{
text[i] = ' ';
}
}
assert(i <= 71);
text[i++] = '\n';
text[i] = 0;
{
char * newStr = (char *)
YA_MALLOC(&(hand->alloc), (unsigned int)(strlen((char *) str) +
strlen((char *) text) +
strlen(arrow) + 1));
if (newStr) {
newStr[0] = 0;
strcat((char *) newStr, (char *) str);
strcat((char *) newStr, text);
strcat((char *) newStr, arrow);
}
YA_FREE(&(hand->alloc), str);
str = (unsigned char *) newStr;
}
}
return str;
}
/* check for client cancelation */
#define _CC_CHK(x) \
if (!(x)) { \
yajl_bs_set(hand->stateStack, yajl_state_parse_error); \
hand->parseError = \
"client cancelled parse via callback return value"; \
return yajl_status_client_canceled; \
}
yajl_status
yajl_do_finish(yajl_handle hand)
{
yajl_status stat;
stat = yajl_do_parse(hand,(const unsigned char *) " ",1);
if (stat != yajl_status_ok) return stat;
switch(yajl_bs_current(hand->stateStack))
{
case yajl_state_parse_error:
case yajl_state_lexical_error:
return yajl_status_error;
case yajl_state_got_value:
case yajl_state_parse_complete:
return yajl_status_ok;
default:
if (!(hand->flags & yajl_allow_partial_values))
{
yajl_bs_set(hand->stateStack, yajl_state_parse_error);
hand->parseError = "premature EOF";
return yajl_status_error;
}
return yajl_status_ok;
}
}
yajl_status
yajl_do_parse(yajl_handle hand, const unsigned char * jsonText,
size_t jsonTextLen)
{
yajl_tok tok;
const unsigned char * buf;
size_t bufLen;
size_t * offset = &(hand->bytesConsumed);
*offset = 0;
around_again:
switch (yajl_bs_current(hand->stateStack)) {
case yajl_state_parse_complete:
if (hand->flags & yajl_allow_multiple_values) {
yajl_bs_set(hand->stateStack, yajl_state_got_value);
goto around_again;
}
if (!(hand->flags & yajl_allow_trailing_garbage)) {
if (*offset != jsonTextLen) {
tok = yajl_lex_lex(hand->lexer, jsonText, jsonTextLen,
offset, &buf, &bufLen);
if (tok != yajl_tok_eof) {
yajl_bs_set(hand->stateStack, yajl_state_parse_error);
hand->parseError = "trailing garbage";
}
goto around_again;
}
}
return yajl_status_ok;
case yajl_state_lexical_error:
case yajl_state_parse_error:
return yajl_status_error;
case yajl_state_start:
case yajl_state_got_value:
case yajl_state_map_need_val:
case yajl_state_array_need_val:
case yajl_state_array_start: {
/* for arrays and maps, we advance the state for this
* depth, then push the state of the next depth.
* If an error occurs during the parsing of the nesting
* enitity, the state at this level will not matter.
* a state that needs pushing will be anything other
* than state_start */
yajl_state stateToPush = yajl_state_start;
tok = yajl_lex_lex(hand->lexer, jsonText, jsonTextLen,
offset, &buf, &bufLen);
switch (tok) {
case yajl_tok_eof:
return yajl_status_ok;
case yajl_tok_error:
yajl_bs_set(hand->stateStack, yajl_state_lexical_error);
goto around_again;
case yajl_tok_string:
if (hand->callbacks && hand->callbacks->yajl_string) {
_CC_CHK(hand->callbacks->yajl_string(hand->ctx,
buf, bufLen));
}
break;
case yajl_tok_string_with_escapes:
if (hand->callbacks && hand->callbacks->yajl_string) {
yajl_buf_clear(hand->decodeBuf);
yajl_string_decode(hand->decodeBuf, buf, bufLen);
_CC_CHK(hand->callbacks->yajl_string(
hand->ctx, yajl_buf_data(hand->decodeBuf),
yajl_buf_len(hand->decodeBuf)));
}
break;
case yajl_tok_bool:
if (hand->callbacks && hand->callbacks->yajl_boolean) {
_CC_CHK(hand->callbacks->yajl_boolean(hand->ctx,
*buf == 't'));
}
break;
case yajl_tok_null:
if (hand->callbacks && hand->callbacks->yajl_null) {
_CC_CHK(hand->callbacks->yajl_null(hand->ctx));
}
break;
case yajl_tok_left_bracket:
if (hand->callbacks && hand->callbacks->yajl_start_map) {
_CC_CHK(hand->callbacks->yajl_start_map(hand->ctx));
}
stateToPush = yajl_state_map_start;
break;
case yajl_tok_left_brace:
if (hand->callbacks && hand->callbacks->yajl_start_array) {
_CC_CHK(hand->callbacks->yajl_start_array(hand->ctx));
}
stateToPush = yajl_state_array_start;
break;
case yajl_tok_integer:
if (hand->callbacks) {
if (hand->callbacks->yajl_number) {
_CC_CHK(hand->callbacks->yajl_number(
hand->ctx,(const char *) buf, bufLen));
} else if (hand->callbacks->yajl_integer) {
long long int i = 0;
errno = 0;
i = yajl_parse_integer(buf, bufLen);
if ((i == LLONG_MIN || i == LLONG_MAX) &&
errno == ERANGE)
{
yajl_bs_set(hand->stateStack,
yajl_state_parse_error);
hand->parseError = "integer overflow" ;
/* try to restore error offset */
if (*offset >= bufLen) *offset -= bufLen;
else *offset = 0;
goto around_again;
}
_CC_CHK(hand->callbacks->yajl_integer(hand->ctx,
i));
}
}
break;
case yajl_tok_double:
if (hand->callbacks) {
if (hand->callbacks->yajl_number) {
_CC_CHK(hand->callbacks->yajl_number(
hand->ctx, (const char *) buf, bufLen));
} else if (hand->callbacks->yajl_double) {
double d = 0.0;
yajl_buf_clear(hand->decodeBuf);
yajl_buf_append(hand->decodeBuf, buf, bufLen);
buf = yajl_buf_data(hand->decodeBuf);
errno = 0;
d = strtod((char *) buf, NULL);
if ((d == HUGE_VAL || d == -HUGE_VAL) &&
errno == ERANGE)
{
yajl_bs_set(hand->stateStack,
yajl_state_parse_error);
hand->parseError = "numeric (floating point) "
"overflow";
/* try to restore error offset */
if (*offset >= bufLen) *offset -= bufLen;
else *offset = 0;
goto around_again;
}
_CC_CHK(hand->callbacks->yajl_double(hand->ctx,
d));
}
}
break;
case yajl_tok_right_brace: {
if (yajl_bs_current(hand->stateStack) ==
yajl_state_array_start)
{
if (hand->callbacks &&
hand->callbacks->yajl_end_array)
{
_CC_CHK(hand->callbacks->yajl_end_array(hand->ctx));
}
yajl_bs_pop(hand->stateStack);
goto around_again;
}
/* intentional fall-through */
}
case yajl_tok_colon:
case yajl_tok_comma:
case yajl_tok_right_bracket:
yajl_bs_set(hand->stateStack, yajl_state_parse_error);
hand->parseError =
"unallowed token at this point in JSON text";
goto around_again;
default:
yajl_bs_set(hand->stateStack, yajl_state_parse_error);
hand->parseError = "invalid token, internal error";
goto around_again;
}
/* got a value. transition depends on the state we're in. */
{
yajl_state s = yajl_bs_current(hand->stateStack);
if (s == yajl_state_start || s == yajl_state_got_value) {
yajl_bs_set(hand->stateStack, yajl_state_parse_complete);
} else if (s == yajl_state_map_need_val) {
yajl_bs_set(hand->stateStack, yajl_state_map_got_val);
} else {
yajl_bs_set(hand->stateStack, yajl_state_array_got_val);
}
}
if (stateToPush != yajl_state_start) {
yajl_bs_push(hand->stateStack, stateToPush);
}
goto around_again;
}
case yajl_state_map_start:
case yajl_state_map_need_key: {
/* only difference between these two states is that in
* start '}' is valid, whereas in need_key, we've parsed
* a comma, and a string key _must_ follow */
tok = yajl_lex_lex(hand->lexer, jsonText, jsonTextLen,
offset, &buf, &bufLen);
switch (tok) {
case yajl_tok_eof:
return yajl_status_ok;
case yajl_tok_error:
yajl_bs_set(hand->stateStack, yajl_state_lexical_error);
goto around_again;
case yajl_tok_string_with_escapes:
if (hand->callbacks && hand->callbacks->yajl_map_key) {
yajl_buf_clear(hand->decodeBuf);
yajl_string_decode(hand->decodeBuf, buf, bufLen);
buf = yajl_buf_data(hand->decodeBuf);
bufLen = yajl_buf_len(hand->decodeBuf);
}
/* intentional fall-through */
case yajl_tok_string:
if (hand->callbacks && hand->callbacks->yajl_map_key) {
_CC_CHK(hand->callbacks->yajl_map_key(hand->ctx, buf,
bufLen));
}
yajl_bs_set(hand->stateStack, yajl_state_map_sep);
goto around_again;
case yajl_tok_right_bracket:
if (yajl_bs_current(hand->stateStack) ==
yajl_state_map_start)
{
if (hand->callbacks && hand->callbacks->yajl_end_map) {
_CC_CHK(hand->callbacks->yajl_end_map(hand->ctx));
}
yajl_bs_pop(hand->stateStack);
goto around_again;
}
default:
yajl_bs_set(hand->stateStack, yajl_state_parse_error);
hand->parseError =
"invalid object key (must be a string)";
goto around_again;
}
}
case yajl_state_map_sep: {
tok = yajl_lex_lex(hand->lexer, jsonText, jsonTextLen,
offset, &buf, &bufLen);
switch (tok) {
case yajl_tok_colon:
yajl_bs_set(hand->stateStack, yajl_state_map_need_val);
goto around_again;
case yajl_tok_eof:
return yajl_status_ok;
case yajl_tok_error:
yajl_bs_set(hand->stateStack, yajl_state_lexical_error);
goto around_again;
default:
yajl_bs_set(hand->stateStack, yajl_state_parse_error);
hand->parseError = "object key and value must "
"be separated by a colon (':')";
goto around_again;
}
}
case yajl_state_map_got_val: {
tok = yajl_lex_lex(hand->lexer, jsonText, jsonTextLen,
offset, &buf, &bufLen);
switch (tok) {
case yajl_tok_right_bracket:
if (hand->callbacks && hand->callbacks->yajl_end_map) {
_CC_CHK(hand->callbacks->yajl_end_map(hand->ctx));
}
yajl_bs_pop(hand->stateStack);
goto around_again;
case yajl_tok_comma:
yajl_bs_set(hand->stateStack, yajl_state_map_need_key);
goto around_again;
case yajl_tok_eof:
return yajl_status_ok;
case yajl_tok_error:
yajl_bs_set(hand->stateStack, yajl_state_lexical_error);
goto around_again;
default:
yajl_bs_set(hand->stateStack, yajl_state_parse_error);
hand->parseError = "after key and value, inside map, "
"I expect ',' or '}'";
/* try to restore error offset */
if (*offset >= bufLen) *offset -= bufLen;
else *offset = 0;
goto around_again;
}
}
case yajl_state_array_got_val: {
tok = yajl_lex_lex(hand->lexer, jsonText, jsonTextLen,
offset, &buf, &bufLen);
switch (tok) {
case yajl_tok_right_brace:
if (hand->callbacks && hand->callbacks->yajl_end_array) {
_CC_CHK(hand->callbacks->yajl_end_array(hand->ctx));
}
yajl_bs_pop(hand->stateStack);
goto around_again;
case yajl_tok_comma:
yajl_bs_set(hand->stateStack, yajl_state_array_need_val);
goto around_again;
case yajl_tok_eof:
return yajl_status_ok;
case yajl_tok_error:
yajl_bs_set(hand->stateStack, yajl_state_lexical_error);
goto around_again;
default:
yajl_bs_set(hand->stateStack, yajl_state_parse_error);
hand->parseError =
"after array element, I expect ',' or ']'";
goto around_again;
}
}
}
abort();
return yajl_status_error;
}