Commit Diff


commit - db7f494b21976897f7c57eb32d967754609119be
commit + b129f11e497010def47afd2afb4c1602d8b10d01
blob - 6050dc39de88d9fa711e55f184f61421d9ab5ff7
blob + faef9d713ea41f7a77d31961c93f2fd6f753914e
--- pdf.c
+++ pdf.c
@@ -102,7 +102,13 @@ validate_eq_uint(HParseResult *p, void *u)
 	    v->uint == (uint64_t)(uintptr_t)u);
 }
 
+bool
+validate_notnull(HParseResult *p, void *u)
+{
+	return p->ast != NULL;
+}
 
+
 /*
  * auxiliary global data structure needed by the parser
  */
@@ -530,15 +536,23 @@ act_nat(const HParseResult *p, void *u)
 	uint64_t x = 0;
 	HCountedArray *seq = H_CAST_SEQ(p->ast);
 
-	// XXX check for overflow
-	for (size_t i = 0; i < seq->used; i++)
-		x = x*10 + H_CAST_UINT(seq->elements[i]);
+	for (size_t i = 0; i < seq->used; i++) {
+		uint64_t d = H_CAST_UINT(seq->elements[i]);
+
+		if (x > (UINT64_MAX - d) / 10)	/* would overflow */
+			return NULL;
+		// XXX introduce a structured error type and unify with VIOL()
 
+		x = x * 10 + d;
+	}
+
 	return H_MAKE_UINT(x);
 }
 #define act_xroff act_nat
 #define act_xrgen act_nat
 
+#define validate_nat validate_notnull
+
 HParser *p_violsev;
 HParsedToken *
 act_viol(const HParseResult *p, void *viol)
@@ -618,6 +632,8 @@ act_numb(const HParseResult *p, void *u)
 	assert(sgn == 1 || sgn == -1);
 	switch (x->token_type) {
 	case TT_UINT:
+		if (x->uint > -INT64_MIN)	/* would overflow */
+			return NULL;	// XXX structured error type
 		return H_MAKE_SINT(sgn * x->uint);
 	case TT_DOUBLE:
 		return H_MAKE_DOUBLE(sgn * x->dbl);
@@ -626,6 +642,8 @@ act_numb(const HParseResult *p, void *u)
 	}
 }
 
+#define validate_numb validate_notnull
+
 HParsedToken *
 act_ref(const HParseResult *p, void *u)
 {
@@ -951,7 +969,7 @@ init_parser(struct Env *aux)
 	H_RULE(epsilon,	h_epsilon_p());
 	H_RULE(empty,	SEQ(epsilon));
 	H_RULE(digits,	h_many1(digit));
-	H_ARULE(nat,	digits);
+	H_VARULE(nat,	digits);
 	H_VRULE(pnat,	nat);
 	H_RULE(npair,	SEQ(pnat, wel,ws, nat));
 
@@ -972,7 +990,7 @@ init_parser(struct Env *aux)
 		// XXX ^ we _could_ move the "123." case into intnn...
 	H_RULE(numbnn,	CHX(realnn, intnn));
 	H_RULE(snumb,	SEQ(sign, numbnn));
-	H_ARULE(numb,	CHX(snumb, numbnn));
+	H_VARULE(numb,	CHX(snumb, numbnn));
 
 	/* names */
 	H_ARULE(nesc,	SEQ(hash, hdigit, hdigit));
@@ -1034,11 +1052,12 @@ init_parser(struct Env *aux)
 	h_bind_indirect(array, array_);
 
 	/* streams */
-	H_RULE(stmbeg,	SEQ(dict, OPT(ws), LIT("stream"), OPT(cr), lf));
-	H_RULE(stmend, CHX(SEQ(eol, LIT("endstream")),
+	H_RULE(stmbeg,	SEQ(dict, OPT(ws), LIT("stream"), OPT(cr),
+			    CHX(lf, VIOL(epsilon, "No linefeed after 'stream' (severity=7)"))));
+	H_RULE(stmend,	CHX(SEQ(eol, LIT("endstream")),
 			   VIOL(LIT("ndstream"), "Stream length >1-too-long (severity=10)"),
-			   VIOL(SEQ(h_many1(wchar), LIT("endstream")),
-				"No newline before endstream (severity=7)"),
+			   VIOL(SEQ(h_many(wchar), LIT("endstream")),
+				"No newline before 'endstream' (severity=7)"),
 			   VIOL(LIT("endstream"), "Stream length 1-too-long (severity=9)"),
 			   VIOL(SEQ(OPT(h_ch_range(0, 255)), OPT(eol), LIT("endstream")),
 				"Stream length 1-too-short (severity=4)"),
@@ -1120,6 +1139,8 @@ init_parser(struct Env *aux)
 
 	H_RULE(xr_td,	SEQ(xrefs, KW("trailer"), ws, dict));
 
+	H_RULE(start_junk,  VIOL(h_many1(h_butnot(h_ch_range(0, 255), header)),
+	                        "Junk bytes before %PDF header (severity=1)"));
 	H_RULE(hdr_junk,    CHX(comment,
 	    VIOL(h_many1(h_butnot(h_ch_range(0, 255), SEQ(npair, wel, KW("obj")))),
 	    "Uncommented junk after header (severity=1)")));
@@ -1131,10 +1152,10 @@ init_parser(struct Env *aux)
 				   VIOL(h_many1(h_butnot(h_ch_range(0, 255), LIT("%%EOF"))),
 					"Data after final %%EOF (severity=7)"),
 				   end));
-	H_RULE(pdf,	SEQ(header, OPT(hdr_junk), h_many1(tail), final_eof_junk));
+	H_RULE(pdf,	SEQ(OPT(start_junk), header, OPT(hdr_junk), h_many1(tail), final_eof_junk));
 
 	/* debug parser to consume as much as possible */
-	H_RULE(pdfdbg,	SEQ(header, OPT(hdr_junk), h_many(tail), body, OPT(xr_td), OPT(SEQ(startxr, final_eof_junk))));
+	H_RULE(pdfdbg,	SEQ(OPT(start_junk), header, OPT(hdr_junk), h_many(tail), body, OPT(xr_td), OPT(SEQ(startxr, final_eof_junk))));
 
 	/*
 	 * filters