commit 72c389d3e539e6d1b8586dab2e48de3c05cbb457 from: Sven M. Hallberg date: Fri Aug 12 17:48:03 2022 UTC Merge branch 'selfref' into ostream commit - f962b66b483de1dea4a80f6ed68c9f7d77123065 commit + 72c389d3e539e6d1b8586dab2e48de3c05cbb457 blob - 0dd4fce48f341be7f85ad3987366a1ff1e491739 blob + 55f85b3810ad6fabfb96f75b9922fd15217f2e35 --- pdf.c +++ pdf.c @@ -2813,6 +2813,8 @@ parse_objstm_obj(struct Env *aux, size_t nr, size_t st } /* resolve a parsed object to its final semantic value */ + static void *INVALID = &INVALID; /* a reserved dummy pointer */ + // XXX move this back into the function const HParsedToken * resolve(struct Env *aux, const HParsedToken *v) { @@ -2823,8 +2825,10 @@ resolve(struct Env *aux, const HParsedToken *v) * The null object maps to NULL. This makes it equivalent to a * nonexistent object and covers the handling of (indirect) null values * in dictionaries (treating them as if the entry did not exist). + * + * Likewise, we map invalid (e.g. circular) references to NULL. */ - if (v == NULL || v->token_type == TT_Null) + if (v == NULL || v == INVALID || v->token_type == TT_Null) return NULL; /* other direct objects pass through */ @@ -2841,8 +2845,7 @@ resolve(struct Env *aux, const HParsedToken *v) if (ent->obj != NULL) return resolve(aux, ent->obj); - /* parse the object and memoize */ - ent->obj = v; /* break loops */ + /* parse the object */ switch (ent->type) { case XR_FREE: @@ -2850,12 +2853,12 @@ resolve(struct Env *aux, const HParsedToken *v) case XR_INUSE: if (ent->n.gen != r->gen) return NULL; /* obj nr reused */ - ent->obj = parse_obj(aux, r->nr, r->gen, ent->n.offs); + v = parse_obj(aux, r->nr, r->gen, ent->n.offs); break; case XR_OBJSTM: if (r->gen != 0) return NULL; /* invalid entry! */ - ent->obj = parse_objstm_obj(aux, r->nr, ent->o.stm, ent->o.idx); + v = parse_objstm_obj(aux, r->nr, ent->o.stm, ent->o.idx); break; default: /* @@ -2865,7 +2868,13 @@ resolve(struct Env *aux, const HParsedToken *v) return NULL; } - return resolve(aux, ent->obj); + /* resolve recursively and memoize */ + ent->obj = INVALID; /* break loops */ + if ((v = resolve(aux, v)) != NULL) + ent->obj = v; + + assert(v != INVALID); /* our dummy does not escape */ + return v; } @@ -4384,6 +4393,7 @@ p_stream_data__m(HAllocator *mm__, const Dict *dict, s const HParsedToken *v; v = dictentry(dict, "Type"); + v = resolve(aux, v); if (v == NULL || v->token_type != TT_BYTES) // XXX -> custom type return NULL; /* no /Type field */ blob - /dev/null blob + be67696abfef0b34980551ecc6ad61dce956dc6d (mode 644) --- /dev/null +++ test/valid/selfref.pdf @@ -0,0 +1,79 @@ +%PDF-1.4 +1 0 obj + << /Type /Catalog + /Outlines 2 0 R + /Pages 3 0 R + >> +endobj + +2 0 obj + << /Type /Outlines + /Count 0 + >> +endobj + +3 0 obj + << /Type /Pages + /Kids [4 0 R] + /Count 1 + >> +endobj + +4 0 obj + << /Type /Page + /Parent 3 0 R + /MediaBox [0 0 612 792] + /Contents 5 0 R + /Resources << /ProcSet 6 0 R + /Font << /F1 7 0 R >> + >> + >> +endobj + +5 0 obj + << /Length 76 /Type 8 0 R >> +stream + BT + /F1 24 Tf + 100 100 Td + (Hello World) Tj + ET +endstream +endobj + +6 0 obj + [/PDF /Text] +endobj + +7 0 obj + << /Type /Font + /Subtype /Type1 + /Name /F1 + /BaseFont /Helvetica + /Encoding /MacRomanEncoding + >> +endobj + +8 0 obj + 8 0 R +endobj + +xref +0 9 +0000000000 65535 f +0000000009 00000 n +0000000097 00000 n +0000000159 00000 n +0000000239 00000 n +0000000456 00000 n +0000000598 00000 n +0000000631 00000 n +0000000776 00000 n + +trailer + << /Size 9 + /Root 1 0 R + >> +startxref +802 +%%EOF