commit 25596aec229893f412feba7a7122615f1cafd30b from: Sven M. Hallberg date: Sun Feb 09 23:39:13 2020 UTC make robj LALR commit - f309790abe901c16b0a4dc9e4ec5c29a4a196bd2 commit + 25596aec229893f412feba7a7122615f1cafd30b blob - ae6ead34d3d7be6069bb49e625097893f1b61822 blob + 9b58c23b05ebddfd48f377591bd411aa871f6902 --- pdf.c +++ pdf.c @@ -10,6 +10,7 @@ /* convenience macros */ #define SEQ(...) h_sequence(__VA_ARGS__, NULL) #define CHX(...) h_choice(__VA_ARGS__, NULL) +#define OPT(X) h_ignore(h_optional(X)) #define REP(P,N) h_repeat_n(P, N) #define IGN(P) h_ignore(P) #define LIT(S) h_literal(S) @@ -236,45 +237,26 @@ act_nat(const HParseResult *p, void *u) #define act_xroff act_nat #define act_xrgen act_nat -/* p = (zeroes, d, digits) -- where d>0 */ -HParsedToken * -act_pnat(const HParseResult *p, void *u) +bool +validate_pnat(HParseResult *p, void *u) { - uint64_t x = H_FIELD_UINT(1); - HCountedArray *seq = H_FIELD_SEQ(2); - - // XXX check for overflow - for (size_t i = 0; i < seq->used; i++) - x = x*10 + H_CAST_UINT(seq->elements[i]); - - return H_MAKE_UINT(x); + return H_CAST_UINT(p->ast) > 0; } -/* p = (sign, nat) */ HParsedToken * -act_intg(const HParseResult *p, void *u) +act_sign(const HParseResult *p, void *u) { - uint64_t xu = H_FIELD_UINT(1); - int64_t x; - - assert(xu <= INT64_MAX); // XXX add a validation to ensure this - x = xu; - - HParsedToken *sgn = H_INDEX_TOKEN(p->ast, 0); - if (sgn->token_type == TT_BYTES && - sgn->bytes.token[0] == '-') - x = -x; - - return H_MAKE_SINT(x); + return H_MAKE_SINT(p->ast == NULL ? 1 : -1); } +/* p = (digits, period, digits) */ HParsedToken * -act_real(const HParseResult *p, void *u) +act_realnn(const HParseResult *p, void *u) { double x = 0; double f = 0; - HCountedArray *whole = H_FIELD_SEQ(1, 0); - HCountedArray *fract = H_FIELD_SEQ(1, 2); + HCountedArray *whole = H_FIELD_SEQ(0); + HCountedArray *fract = H_FIELD_SEQ(2); // XXX check for overflow for (size_t i = 0; i < whole->used; i++) @@ -282,15 +264,39 @@ act_real(const HParseResult *p, void *u) for (size_t i = 0; i < fract->used; i++) f = (f + H_CAST_UINT(fract->elements[fract->used - 1 - i])) / 10; - HParsedToken *sgn = H_INDEX_TOKEN(p->ast, 0); - if (sgn->token_type == TT_BYTES && - sgn->bytes.token[0] == '-') - x = -x; - - return H_MAKE_UINT(x + f); // XXX H_MAKE_DOUBLE (-> pprint) + return H_MAKE_DOUBLE(x + f); } +bool +validate_intnn(HParseResult *p, void *u) +{ + return H_CAST_UINT(p->ast) <= INT64_MAX; +} + +/* p = numbnn | (sign, numbnn) */ HParsedToken * +act_numb(const HParseResult *p, void *u) +{ + const HParsedToken *x = p->ast; + int64_t sgn = 1; + + if (x->token_type == TT_SEQUENCE) { + sgn = H_FIELD_SINT(0); + x = H_FIELD_TOKEN(1); + } + + assert(sgn == 1 || sgn == -1); + switch (x->token_type) { + case TT_UINT: + return H_MAKE_SINT(sgn * x->uint); + case TT_DOUBLE: + return H_MAKE_DOUBLE(sgn * x->dbl); + default: + assert(!"reached"); + } +} + +HParsedToken * act_ref(const HParseResult *p, void *u) { Ref *r = H_ALLOC(Ref); @@ -477,7 +483,7 @@ init_parser(struct Env *aux) H_RULE(lf, h_ch('\n')); /* semantic value: \n */ H_RULE(crlf, h_right(cr, lf)); /* semantic value: \n */ H_RULE(eol, CHX(crlf, cr, lf)); - H_RULE(line, h_many(NOT_IN("\r\n"))); + H_RULE(end, h_end_p()); /* character classes */ #define LWCHARS "\0\t\f " @@ -495,7 +501,6 @@ init_parser(struct Env *aux) H_ARULE(hupper, h_ch_range('A', 'F')); H_RULE(hdigit, CHX(digit, hlower, hupper)); H_ARULE(odigit, h_ch_range('0', '7')); - H_RULE(sign, IN("+-")); H_RULE(sp, h_ch(' ')); H_RULE(percent, h_ch('%')); @@ -509,10 +514,12 @@ init_parser(struct Env *aux) H_RULE(rangle, h_ch('>')); H_RULE(lbrack, h_ch('[')); H_RULE(rbrack, h_ch(']')); - H_RULE(zero, h_ch('0')); + H_RULE(plus, h_ch('+')); + H_RULE(minus, h_ch('-')); /* whitespace */ - H_RULE(comment, SEQ(percent, line)); + H_RULE(comment, SEQ(percent, h_many(NOT_IN("\r\n")), CHX(cr,lf,end))); + /* ^ NB: must consume cr/lf for ws to be LL(k) */ H_RULE(wel, IGN(CHX(wchar, comment))); H_RULE(ws, IGN(h_many(wel))); H_RULE(lws, IGN(h_many(IGN(lwchar)))); @@ -524,16 +531,13 @@ init_parser(struct Env *aux) /* misc */ H_RULE(nl, IGN(h_right(lws, eol))); - H_RULE(end, h_end_p()); H_RULE(epsilon, h_epsilon_p()); H_RULE(empty, SEQ(epsilon)); H_RULE(digits, h_many1(digit)); H_ARULE(nat, digits); - H_ARULE(pnat, SEQ(h_many(zero), pdigit, h_many(digit))); + H_VRULE(pnat, nat); H_RULE(npair, SEQ(pnat, wel,ws, nat)); -#define OPT(X) CHX(X, epsilon) - /* * objects */ @@ -543,11 +547,15 @@ init_parser(struct Env *aux) H_RULE(boole, CHX(LIT("true"), LIT("false"))); /* numbers */ - H_ARULE(intg, SEQ(h_optional(sign), nat)); - H_RULE(realnn, CHX(SEQ(digits, period, digits), /* 12.3 */ + H_ARULE(sign, CHX(minus, IGN(plus))); + H_VRULE(intnn, nat); + H_ARULE(realnn, CHX(SEQ(digits, period, digits), /* 12.3 */ SEQ(digits, period, empty), /* 123. */ SEQ(empty, period, digits))); /* .123 */ - H_ARULE(real, SEQ(h_optional(sign), realnn)); + // XXX ^ we _could_ move the "123." case into intnn... + H_RULE(numbnn, CHX(realnn, intnn)); + H_RULE(snumb, SEQ(sign, numbnn)); + H_ARULE(numb, CHX(snumb, numbnn)); /* names */ H_ARULE(nesc, SEQ(hash, hdigit, hdigit)); @@ -577,7 +585,7 @@ init_parser(struct Env *aux) H_RULE(dict, h_indirect()); /* classify objects by whether they start/end with a delimiter: */ - H_RULE(robj, CHX(ref, null, boole, real, intg)); /* rchars */ + H_RULE(robj, CHX(ref, null, boole, numb)); /* rchars */ H_RULE(dobj, CHX(string, array, dict)); /* dchars */ H_RULE(obj, CHX(robj, name, dobj)); @@ -1144,6 +1152,7 @@ act_ks_value(const HParseResult *p, void *u) /* decode and parse the stream data */ res = decode_stream(spec->dict, bytes, spec->parser); + assert(res != NULL); // XXX parse failure! return H_MAKE(HParseResult, res); }