Commit Diff


commit - f309790abe901c16b0a4dc9e4ec5c29a4a196bd2
commit + 25596aec229893f412feba7a7122615f1cafd30b
blob - ae6ead34d3d7be6069bb49e625097893f1b61822
blob + 9b58c23b05ebddfd48f377591bd411aa871f6902
--- pdf.c
+++ pdf.c
@@ -10,6 +10,7 @@
 /* convenience macros */
 #define SEQ(...)	h_sequence(__VA_ARGS__, NULL)
 #define CHX(...)	h_choice(__VA_ARGS__, NULL)
+#define OPT(X)		h_ignore(h_optional(X))
 #define REP(P,N)	h_repeat_n(P, N)
 #define IGN(P)		h_ignore(P)
 #define LIT(S)		h_literal(S)
@@ -236,45 +237,26 @@ act_nat(const HParseResult *p, void *u)
 #define act_xroff act_nat
 #define act_xrgen act_nat
 
-/* p = (zeroes, d, digits)  -- where d>0 */
-HParsedToken *
-act_pnat(const HParseResult *p, void *u)
+bool
+validate_pnat(HParseResult *p, void *u)
 {
-	uint64_t x = H_FIELD_UINT(1);
-	HCountedArray *seq = H_FIELD_SEQ(2);
-
-	// XXX check for overflow
-	for (size_t i = 0; i < seq->used; i++)
-		x = x*10 + H_CAST_UINT(seq->elements[i]);
-	
-	return H_MAKE_UINT(x);
+	return H_CAST_UINT(p->ast) > 0;
 }
 
-/* p = (sign, nat) */
 HParsedToken *
-act_intg(const HParseResult *p, void *u)
+act_sign(const HParseResult *p, void *u)
 {
-	uint64_t xu = H_FIELD_UINT(1);
-	int64_t x;
-
-	assert(xu <= INT64_MAX);	// XXX add a validation to ensure this
-	x = xu;
-
-	HParsedToken *sgn = H_INDEX_TOKEN(p->ast, 0);
-	if (sgn->token_type == TT_BYTES &&
-	    sgn->bytes.token[0] == '-')
-		x = -x;
-	
-	return H_MAKE_SINT(x);
+	return H_MAKE_SINT(p->ast == NULL ? 1 : -1);
 }
 
+/* p = (digits, period, digits) */
 HParsedToken *
-act_real(const HParseResult *p, void *u)
+act_realnn(const HParseResult *p, void *u)
 {
 	double x = 0;
 	double f = 0;
-	HCountedArray *whole = H_FIELD_SEQ(1, 0);
-	HCountedArray *fract = H_FIELD_SEQ(1, 2);
+	HCountedArray *whole = H_FIELD_SEQ(0);
+	HCountedArray *fract = H_FIELD_SEQ(2);
 
 	// XXX check for overflow
 	for (size_t i = 0; i < whole->used; i++)
@@ -282,15 +264,39 @@ act_real(const HParseResult *p, void *u)
 	for (size_t i = 0; i < fract->used; i++)
 		f = (f + H_CAST_UINT(fract->elements[fract->used - 1 - i])) / 10;
 
-	HParsedToken *sgn = H_INDEX_TOKEN(p->ast, 0);
-	if (sgn->token_type == TT_BYTES &&
-	    sgn->bytes.token[0] == '-')
-		x = -x;
-	
-	return H_MAKE_UINT(x + f);	// XXX H_MAKE_DOUBLE (-> pprint)
+	return H_MAKE_DOUBLE(x + f);
 }
 
+bool
+validate_intnn(HParseResult *p, void *u)
+{
+	return H_CAST_UINT(p->ast) <= INT64_MAX;
+}
+
+/* p = numbnn | (sign, numbnn) */
 HParsedToken *
+act_numb(const HParseResult *p, void *u)
+{
+	const HParsedToken *x = p->ast;
+	int64_t sgn = 1;
+
+	if (x->token_type == TT_SEQUENCE) {
+		sgn = H_FIELD_SINT(0);
+		x = H_FIELD_TOKEN(1);
+	}
+
+	assert(sgn == 1 || sgn == -1);
+	switch (x->token_type) {
+	case TT_UINT:
+		return H_MAKE_SINT(sgn * x->uint);
+	case TT_DOUBLE:
+		return H_MAKE_DOUBLE(sgn * x->dbl);
+	default:
+		assert(!"reached");
+	}
+}
+
+HParsedToken *
 act_ref(const HParseResult *p, void *u)
 {
 	Ref *r = H_ALLOC(Ref);
@@ -477,7 +483,7 @@ init_parser(struct Env *aux)
 	H_RULE(lf,	h_ch('\n'));		/* semantic value: \n */
 	H_RULE(crlf,	h_right(cr, lf));	/* semantic value: \n */
 	H_RULE(eol,	CHX(crlf, cr, lf));
-	H_RULE(line,	h_many(NOT_IN("\r\n")));
+	H_RULE(end,	h_end_p());
 
 	/* character classes */
 #define LWCHARS	"\0\t\f "
@@ -495,7 +501,6 @@ init_parser(struct Env *aux)
 	H_ARULE(hupper,	h_ch_range('A', 'F'));
 	H_RULE(hdigit,	CHX(digit, hlower, hupper));
 	H_ARULE(odigit,	h_ch_range('0', '7'));
-	H_RULE(sign,	IN("+-"));
 
 	H_RULE(sp,	h_ch(' '));
 	H_RULE(percent,	h_ch('%'));
@@ -509,10 +514,12 @@ init_parser(struct Env *aux)
 	H_RULE(rangle,	h_ch('>'));
 	H_RULE(lbrack,	h_ch('['));
 	H_RULE(rbrack,	h_ch(']'));
-	H_RULE(zero,	h_ch('0'));
+	H_RULE(plus,	h_ch('+'));
+	H_RULE(minus,	h_ch('-'));
 
 	/* whitespace */
-	H_RULE(comment,	SEQ(percent, line));
+	H_RULE(comment,	SEQ(percent, h_many(NOT_IN("\r\n")), CHX(cr,lf,end)));
+		/* ^ NB: must consume cr/lf for ws to be LL(k) */
 	H_RULE(wel,	IGN(CHX(wchar, comment)));
 	H_RULE(ws,	IGN(h_many(wel)));
 	H_RULE(lws,	IGN(h_many(IGN(lwchar))));
@@ -524,16 +531,13 @@ init_parser(struct Env *aux)
 
 	/* misc */
 	H_RULE(nl,	IGN(h_right(lws, eol)));
-	H_RULE(end,	h_end_p());
 	H_RULE(epsilon,	h_epsilon_p());
 	H_RULE(empty,	SEQ(epsilon));
 	H_RULE(digits,	h_many1(digit));
 	H_ARULE(nat,	digits);
-	H_ARULE(pnat,	SEQ(h_many(zero), pdigit, h_many(digit)));
+	H_VRULE(pnat,	nat);
 	H_RULE(npair,	SEQ(pnat, wel,ws, nat));
 
-#define OPT(X)	CHX(X, epsilon)
-
 	/*
 	 * objects
 	 */
@@ -543,11 +547,15 @@ init_parser(struct Env *aux)
 	H_RULE(boole,	CHX(LIT("true"), LIT("false")));
 
 	/* numbers */
-	H_ARULE(intg,	SEQ(h_optional(sign), nat));
-	H_RULE(realnn,	CHX(SEQ(digits, period, digits),	/* 12.3 */
+	H_ARULE(sign,	CHX(minus, IGN(plus)));
+	H_VRULE(intnn,	nat);
+	H_ARULE(realnn,	CHX(SEQ(digits, period, digits),	/* 12.3 */
 			    SEQ(digits, period, empty),		/* 123. */
 			    SEQ(empty, period, digits)));	/* .123 */
-	H_ARULE(real,	SEQ(h_optional(sign), realnn));
+		// XXX ^ we _could_ move the "123." case into intnn...
+	H_RULE(numbnn,	CHX(realnn, intnn));
+	H_RULE(snumb,	SEQ(sign, numbnn));
+	H_ARULE(numb,	CHX(snumb, numbnn));
 
 	/* names */
 	H_ARULE(nesc,	SEQ(hash, hdigit, hdigit));
@@ -577,7 +585,7 @@ init_parser(struct Env *aux)
 	H_RULE(dict,	h_indirect());
 
 	/* classify objects by whether they start/end with a delimiter: */
-	H_RULE(robj,	CHX(ref, null, boole, real, intg));	/* rchars */
+	H_RULE(robj,	CHX(ref, null, boole, numb));		/* rchars */
 	H_RULE(dobj,	CHX(string, array, dict));		/* dchars */
 	H_RULE(obj,	CHX(robj, name, dobj));
 
@@ -1144,6 +1152,7 @@ act_ks_value(const HParseResult *p, void *u)
 
 	/* decode and parse the stream data */
 	res = decode_stream(spec->dict, bytes, spec->parser);
+	assert(res != NULL);	// XXX parse failure!
 
 	return H_MAKE(HParseResult, res);
 }