commit b129f11e497010def47afd2afb4c1602d8b10d01 from: pompolic date: Thu Jun 25 12:09:45 2020 UTC Merge branch 'master' into 2020-06-27_RELEASE commit - db7f494b21976897f7c57eb32d967754609119be commit + b129f11e497010def47afd2afb4c1602d8b10d01 blob - 6050dc39de88d9fa711e55f184f61421d9ab5ff7 blob + faef9d713ea41f7a77d31961c93f2fd6f753914e --- pdf.c +++ pdf.c @@ -102,7 +102,13 @@ validate_eq_uint(HParseResult *p, void *u) v->uint == (uint64_t)(uintptr_t)u); } +bool +validate_notnull(HParseResult *p, void *u) +{ + return p->ast != NULL; +} + /* * auxiliary global data structure needed by the parser */ @@ -530,15 +536,23 @@ act_nat(const HParseResult *p, void *u) uint64_t x = 0; HCountedArray *seq = H_CAST_SEQ(p->ast); - // XXX check for overflow - for (size_t i = 0; i < seq->used; i++) - x = x*10 + H_CAST_UINT(seq->elements[i]); + for (size_t i = 0; i < seq->used; i++) { + uint64_t d = H_CAST_UINT(seq->elements[i]); + + if (x > (UINT64_MAX - d) / 10) /* would overflow */ + return NULL; + // XXX introduce a structured error type and unify with VIOL() + x = x * 10 + d; + } + return H_MAKE_UINT(x); } #define act_xroff act_nat #define act_xrgen act_nat +#define validate_nat validate_notnull + HParser *p_violsev; HParsedToken * act_viol(const HParseResult *p, void *viol) @@ -618,6 +632,8 @@ act_numb(const HParseResult *p, void *u) assert(sgn == 1 || sgn == -1); switch (x->token_type) { case TT_UINT: + if (x->uint > -INT64_MIN) /* would overflow */ + return NULL; // XXX structured error type return H_MAKE_SINT(sgn * x->uint); case TT_DOUBLE: return H_MAKE_DOUBLE(sgn * x->dbl); @@ -626,6 +642,8 @@ act_numb(const HParseResult *p, void *u) } } +#define validate_numb validate_notnull + HParsedToken * act_ref(const HParseResult *p, void *u) { @@ -951,7 +969,7 @@ init_parser(struct Env *aux) H_RULE(epsilon, h_epsilon_p()); H_RULE(empty, SEQ(epsilon)); H_RULE(digits, h_many1(digit)); - H_ARULE(nat, digits); + H_VARULE(nat, digits); H_VRULE(pnat, nat); H_RULE(npair, SEQ(pnat, wel,ws, nat)); @@ -972,7 +990,7 @@ init_parser(struct Env *aux) // XXX ^ we _could_ move the "123." case into intnn... H_RULE(numbnn, CHX(realnn, intnn)); H_RULE(snumb, SEQ(sign, numbnn)); - H_ARULE(numb, CHX(snumb, numbnn)); + H_VARULE(numb, CHX(snumb, numbnn)); /* names */ H_ARULE(nesc, SEQ(hash, hdigit, hdigit)); @@ -1034,11 +1052,12 @@ init_parser(struct Env *aux) h_bind_indirect(array, array_); /* streams */ - H_RULE(stmbeg, SEQ(dict, OPT(ws), LIT("stream"), OPT(cr), lf)); - H_RULE(stmend, CHX(SEQ(eol, LIT("endstream")), + H_RULE(stmbeg, SEQ(dict, OPT(ws), LIT("stream"), OPT(cr), + CHX(lf, VIOL(epsilon, "No linefeed after 'stream' (severity=7)")))); + H_RULE(stmend, CHX(SEQ(eol, LIT("endstream")), VIOL(LIT("ndstream"), "Stream length >1-too-long (severity=10)"), - VIOL(SEQ(h_many1(wchar), LIT("endstream")), - "No newline before endstream (severity=7)"), + VIOL(SEQ(h_many(wchar), LIT("endstream")), + "No newline before 'endstream' (severity=7)"), VIOL(LIT("endstream"), "Stream length 1-too-long (severity=9)"), VIOL(SEQ(OPT(h_ch_range(0, 255)), OPT(eol), LIT("endstream")), "Stream length 1-too-short (severity=4)"), @@ -1120,6 +1139,8 @@ init_parser(struct Env *aux) H_RULE(xr_td, SEQ(xrefs, KW("trailer"), ws, dict)); + H_RULE(start_junk, VIOL(h_many1(h_butnot(h_ch_range(0, 255), header)), + "Junk bytes before %PDF header (severity=1)")); H_RULE(hdr_junk, CHX(comment, VIOL(h_many1(h_butnot(h_ch_range(0, 255), SEQ(npair, wel, KW("obj")))), "Uncommented junk after header (severity=1)"))); @@ -1131,10 +1152,10 @@ init_parser(struct Env *aux) VIOL(h_many1(h_butnot(h_ch_range(0, 255), LIT("%%EOF"))), "Data after final %%EOF (severity=7)"), end)); - H_RULE(pdf, SEQ(header, OPT(hdr_junk), h_many1(tail), final_eof_junk)); + H_RULE(pdf, SEQ(OPT(start_junk), header, OPT(hdr_junk), h_many1(tail), final_eof_junk)); /* debug parser to consume as much as possible */ - H_RULE(pdfdbg, SEQ(header, OPT(hdr_junk), h_many(tail), body, OPT(xr_td), OPT(SEQ(startxr, final_eof_junk)))); + H_RULE(pdfdbg, SEQ(OPT(start_junk), header, OPT(hdr_junk), h_many(tail), body, OPT(xr_td), OPT(SEQ(startxr, final_eof_junk)))); /* * filters