commit - db7f494b21976897f7c57eb32d967754609119be
commit + b129f11e497010def47afd2afb4c1602d8b10d01
blob - 6050dc39de88d9fa711e55f184f61421d9ab5ff7
blob + faef9d713ea41f7a77d31961c93f2fd6f753914e
--- pdf.c
+++ pdf.c
v->uint == (uint64_t)(uintptr_t)u);
}
+bool
+validate_notnull(HParseResult *p, void *u)
+{
+ return p->ast != NULL;
+}
+
/*
* auxiliary global data structure needed by the parser
*/
uint64_t x = 0;
HCountedArray *seq = H_CAST_SEQ(p->ast);
- // XXX check for overflow
- for (size_t i = 0; i < seq->used; i++)
- x = x*10 + H_CAST_UINT(seq->elements[i]);
+ for (size_t i = 0; i < seq->used; i++) {
+ uint64_t d = H_CAST_UINT(seq->elements[i]);
+
+ if (x > (UINT64_MAX - d) / 10) /* would overflow */
+ return NULL;
+ // XXX introduce a structured error type and unify with VIOL()
+ x = x * 10 + d;
+ }
+
return H_MAKE_UINT(x);
}
#define act_xroff act_nat
#define act_xrgen act_nat
+#define validate_nat validate_notnull
+
HParser *p_violsev;
HParsedToken *
act_viol(const HParseResult *p, void *viol)
assert(sgn == 1 || sgn == -1);
switch (x->token_type) {
case TT_UINT:
+ if (x->uint > -INT64_MIN) /* would overflow */
+ return NULL; // XXX structured error type
return H_MAKE_SINT(sgn * x->uint);
case TT_DOUBLE:
return H_MAKE_DOUBLE(sgn * x->dbl);
}
}
+#define validate_numb validate_notnull
+
HParsedToken *
act_ref(const HParseResult *p, void *u)
{
H_RULE(epsilon, h_epsilon_p());
H_RULE(empty, SEQ(epsilon));
H_RULE(digits, h_many1(digit));
- H_ARULE(nat, digits);
+ H_VARULE(nat, digits);
H_VRULE(pnat, nat);
H_RULE(npair, SEQ(pnat, wel,ws, nat));
// XXX ^ we _could_ move the "123." case into intnn...
H_RULE(numbnn, CHX(realnn, intnn));
H_RULE(snumb, SEQ(sign, numbnn));
- H_ARULE(numb, CHX(snumb, numbnn));
+ H_VARULE(numb, CHX(snumb, numbnn));
/* names */
H_ARULE(nesc, SEQ(hash, hdigit, hdigit));
h_bind_indirect(array, array_);
/* streams */
- H_RULE(stmbeg, SEQ(dict, OPT(ws), LIT("stream"), OPT(cr), lf));
- H_RULE(stmend, CHX(SEQ(eol, LIT("endstream")),
+ H_RULE(stmbeg, SEQ(dict, OPT(ws), LIT("stream"), OPT(cr),
+ CHX(lf, VIOL(epsilon, "No linefeed after 'stream' (severity=7)"))));
+ H_RULE(stmend, CHX(SEQ(eol, LIT("endstream")),
VIOL(LIT("ndstream"), "Stream length >1-too-long (severity=10)"),
- VIOL(SEQ(h_many1(wchar), LIT("endstream")),
- "No newline before endstream (severity=7)"),
+ VIOL(SEQ(h_many(wchar), LIT("endstream")),
+ "No newline before 'endstream' (severity=7)"),
VIOL(LIT("endstream"), "Stream length 1-too-long (severity=9)"),
VIOL(SEQ(OPT(h_ch_range(0, 255)), OPT(eol), LIT("endstream")),
"Stream length 1-too-short (severity=4)"),
H_RULE(xr_td, SEQ(xrefs, KW("trailer"), ws, dict));
+ H_RULE(start_junk, VIOL(h_many1(h_butnot(h_ch_range(0, 255), header)),
+ "Junk bytes before %PDF header (severity=1)"));
H_RULE(hdr_junk, CHX(comment,
VIOL(h_many1(h_butnot(h_ch_range(0, 255), SEQ(npair, wel, KW("obj")))),
"Uncommented junk after header (severity=1)")));
VIOL(h_many1(h_butnot(h_ch_range(0, 255), LIT("%%EOF"))),
"Data after final %%EOF (severity=7)"),
end));
- H_RULE(pdf, SEQ(header, OPT(hdr_junk), h_many1(tail), final_eof_junk));
+ H_RULE(pdf, SEQ(OPT(start_junk), header, OPT(hdr_junk), h_many1(tail), final_eof_junk));
/* debug parser to consume as much as possible */
- H_RULE(pdfdbg, SEQ(header, OPT(hdr_junk), h_many(tail), body, OPT(xr_td), OPT(SEQ(startxr, final_eof_junk))));
+ H_RULE(pdfdbg, SEQ(OPT(start_junk), header, OPT(hdr_junk), h_many(tail), body, OPT(xr_td), OPT(SEQ(startxr, final_eof_junk))));
/*
* filters