commit - f309790abe901c16b0a4dc9e4ec5c29a4a196bd2
commit + 25596aec229893f412feba7a7122615f1cafd30b
blob - ae6ead34d3d7be6069bb49e625097893f1b61822
blob + 9b58c23b05ebddfd48f377591bd411aa871f6902
--- pdf.c
+++ pdf.c
/* convenience macros */
#define SEQ(...) h_sequence(__VA_ARGS__, NULL)
#define CHX(...) h_choice(__VA_ARGS__, NULL)
+#define OPT(X) h_ignore(h_optional(X))
#define REP(P,N) h_repeat_n(P, N)
#define IGN(P) h_ignore(P)
#define LIT(S) h_literal(S)
#define act_xroff act_nat
#define act_xrgen act_nat
-/* p = (zeroes, d, digits) -- where d>0 */
-HParsedToken *
-act_pnat(const HParseResult *p, void *u)
+bool
+validate_pnat(HParseResult *p, void *u)
{
- uint64_t x = H_FIELD_UINT(1);
- HCountedArray *seq = H_FIELD_SEQ(2);
-
- // XXX check for overflow
- for (size_t i = 0; i < seq->used; i++)
- x = x*10 + H_CAST_UINT(seq->elements[i]);
-
- return H_MAKE_UINT(x);
+ return H_CAST_UINT(p->ast) > 0;
}
-/* p = (sign, nat) */
HParsedToken *
-act_intg(const HParseResult *p, void *u)
+act_sign(const HParseResult *p, void *u)
{
- uint64_t xu = H_FIELD_UINT(1);
- int64_t x;
-
- assert(xu <= INT64_MAX); // XXX add a validation to ensure this
- x = xu;
-
- HParsedToken *sgn = H_INDEX_TOKEN(p->ast, 0);
- if (sgn->token_type == TT_BYTES &&
- sgn->bytes.token[0] == '-')
- x = -x;
-
- return H_MAKE_SINT(x);
+ return H_MAKE_SINT(p->ast == NULL ? 1 : -1);
}
+/* p = (digits, period, digits) */
HParsedToken *
-act_real(const HParseResult *p, void *u)
+act_realnn(const HParseResult *p, void *u)
{
double x = 0;
double f = 0;
- HCountedArray *whole = H_FIELD_SEQ(1, 0);
- HCountedArray *fract = H_FIELD_SEQ(1, 2);
+ HCountedArray *whole = H_FIELD_SEQ(0);
+ HCountedArray *fract = H_FIELD_SEQ(2);
// XXX check for overflow
for (size_t i = 0; i < whole->used; i++)
for (size_t i = 0; i < fract->used; i++)
f = (f + H_CAST_UINT(fract->elements[fract->used - 1 - i])) / 10;
- HParsedToken *sgn = H_INDEX_TOKEN(p->ast, 0);
- if (sgn->token_type == TT_BYTES &&
- sgn->bytes.token[0] == '-')
- x = -x;
-
- return H_MAKE_UINT(x + f); // XXX H_MAKE_DOUBLE (-> pprint)
+ return H_MAKE_DOUBLE(x + f);
}
+bool
+validate_intnn(HParseResult *p, void *u)
+{
+ return H_CAST_UINT(p->ast) <= INT64_MAX;
+}
+
+/* p = numbnn | (sign, numbnn) */
HParsedToken *
+act_numb(const HParseResult *p, void *u)
+{
+ const HParsedToken *x = p->ast;
+ int64_t sgn = 1;
+
+ if (x->token_type == TT_SEQUENCE) {
+ sgn = H_FIELD_SINT(0);
+ x = H_FIELD_TOKEN(1);
+ }
+
+ assert(sgn == 1 || sgn == -1);
+ switch (x->token_type) {
+ case TT_UINT:
+ return H_MAKE_SINT(sgn * x->uint);
+ case TT_DOUBLE:
+ return H_MAKE_DOUBLE(sgn * x->dbl);
+ default:
+ assert(!"reached");
+ }
+}
+
+HParsedToken *
act_ref(const HParseResult *p, void *u)
{
Ref *r = H_ALLOC(Ref);
H_RULE(lf, h_ch('\n')); /* semantic value: \n */
H_RULE(crlf, h_right(cr, lf)); /* semantic value: \n */
H_RULE(eol, CHX(crlf, cr, lf));
- H_RULE(line, h_many(NOT_IN("\r\n")));
+ H_RULE(end, h_end_p());
/* character classes */
#define LWCHARS "\0\t\f "
H_ARULE(hupper, h_ch_range('A', 'F'));
H_RULE(hdigit, CHX(digit, hlower, hupper));
H_ARULE(odigit, h_ch_range('0', '7'));
- H_RULE(sign, IN("+-"));
H_RULE(sp, h_ch(' '));
H_RULE(percent, h_ch('%'));
H_RULE(rangle, h_ch('>'));
H_RULE(lbrack, h_ch('['));
H_RULE(rbrack, h_ch(']'));
- H_RULE(zero, h_ch('0'));
+ H_RULE(plus, h_ch('+'));
+ H_RULE(minus, h_ch('-'));
/* whitespace */
- H_RULE(comment, SEQ(percent, line));
+ H_RULE(comment, SEQ(percent, h_many(NOT_IN("\r\n")), CHX(cr,lf,end)));
+ /* ^ NB: must consume cr/lf for ws to be LL(k) */
H_RULE(wel, IGN(CHX(wchar, comment)));
H_RULE(ws, IGN(h_many(wel)));
H_RULE(lws, IGN(h_many(IGN(lwchar))));
/* misc */
H_RULE(nl, IGN(h_right(lws, eol)));
- H_RULE(end, h_end_p());
H_RULE(epsilon, h_epsilon_p());
H_RULE(empty, SEQ(epsilon));
H_RULE(digits, h_many1(digit));
H_ARULE(nat, digits);
- H_ARULE(pnat, SEQ(h_many(zero), pdigit, h_many(digit)));
+ H_VRULE(pnat, nat);
H_RULE(npair, SEQ(pnat, wel,ws, nat));
-#define OPT(X) CHX(X, epsilon)
-
/*
* objects
*/
H_RULE(boole, CHX(LIT("true"), LIT("false")));
/* numbers */
- H_ARULE(intg, SEQ(h_optional(sign), nat));
- H_RULE(realnn, CHX(SEQ(digits, period, digits), /* 12.3 */
+ H_ARULE(sign, CHX(minus, IGN(plus)));
+ H_VRULE(intnn, nat);
+ H_ARULE(realnn, CHX(SEQ(digits, period, digits), /* 12.3 */
SEQ(digits, period, empty), /* 123. */
SEQ(empty, period, digits))); /* .123 */
- H_ARULE(real, SEQ(h_optional(sign), realnn));
+ // XXX ^ we _could_ move the "123." case into intnn...
+ H_RULE(numbnn, CHX(realnn, intnn));
+ H_RULE(snumb, SEQ(sign, numbnn));
+ H_ARULE(numb, CHX(snumb, numbnn));
/* names */
H_ARULE(nesc, SEQ(hash, hdigit, hdigit));
H_RULE(dict, h_indirect());
/* classify objects by whether they start/end with a delimiter: */
- H_RULE(robj, CHX(ref, null, boole, real, intg)); /* rchars */
+ H_RULE(robj, CHX(ref, null, boole, numb)); /* rchars */
H_RULE(dobj, CHX(string, array, dict)); /* dchars */
H_RULE(obj, CHX(robj, name, dobj));
/* decode and parse the stream data */
res = decode_stream(spec->dict, bytes, spec->parser);
+ assert(res != NULL); // XXX parse failure!
return H_MAKE(HParseResult, res);
}