commit b5568c0ce46c37f080fe97f45bfb67e9744e8fde from: Sven M. Hallberg date: Tue Aug 09 19:16:56 2022 UTC fix resolve() for cyclic objects It is not entirely clear whether the spec allows cyclic object definitions such as the following: obj 1 0 1 0 R endobj There is an open errata issue about this topic, but no consensus has emerged so far. Most implementations will accept this, however, so for the time being I'm guessing we should, too. We will treat an object that is defined (directly or indirectly) as itself as equivalent to the null object. The implementation strategy is to give ourselves an distinct invalid pointer beside NULL and use it to mark the memoization entry for a given cross-reference (ent->obj) as INVALID while we recursively try to resolve it. If we eventually hit an INVALID object, we terminate the process and return NULL. The INVALID entry will internally stay in the memoization slot, but should never be returned by resolve(). This commit contains the implementation for resolve(). We'll do its unfortunate copy-paste sibling resolve_item() in the next one. commit - 8993cef512f4545c0e407a733bae4fa4225f3989 commit + b5568c0ce46c37f080fe97f45bfb67e9744e8fde blob - 4c8803047ec8b12e70a961b92abc801055c852e8 blob + b8bbb7e80784e56ddb6d2899270662ef980b569a --- pdf.c +++ pdf.c @@ -2829,6 +2829,7 @@ parse_objstm_obj(struct Env *aux, size_t nr, size_t st const HParsedToken * resolve(struct Env *aux, const HParsedToken *v) { + static void *INVALID = &INVALID; /* a reserved dummy pointer */ XREntry *ent = NULL; Ref *r; @@ -2836,8 +2837,10 @@ resolve(struct Env *aux, const HParsedToken *v) * The null object maps to NULL. This makes it equivalent to a * nonexistent object and covers the handling of (indirect) null values * in dictionaries (treating them as if the entry did not exist). + * + * Likewise, we map invalid (e.g. circular) references to NULL. */ - if (v == NULL || v->token_type == TT_Null) + if (v == NULL || v == INVALID || v->token_type == TT_Null) return NULL; /* other direct objects pass through */ @@ -2854,8 +2857,7 @@ resolve(struct Env *aux, const HParsedToken *v) if (ent->obj != NULL) return resolve(aux, ent->obj); - /* parse the object and memoize */ - ent->obj = v; /* break loops */ + /* parse the object */ switch (ent->type) { case XR_FREE: @@ -2863,12 +2865,12 @@ resolve(struct Env *aux, const HParsedToken *v) case XR_INUSE: if (ent->n.gen != r->gen) return NULL; /* obj nr reused */ - ent->obj = parse_obj(aux, r->nr, r->gen, ent->n.offs); + v = parse_obj(aux, r->nr, r->gen, ent->n.offs); break; case XR_OBJSTM: if (r->gen != 0) return NULL; /* invalid entry! */ - ent->obj = parse_objstm_obj(aux, r->nr, ent->o.stm, ent->o.idx); + v = parse_objstm_obj(aux, r->nr, ent->o.stm, ent->o.idx); break; default: /* @@ -2878,7 +2880,13 @@ resolve(struct Env *aux, const HParsedToken *v) return NULL; } - return resolve(aux, ent->obj); + /* resolve recursively and memoize */ + ent->obj = INVALID; /* break loops */ + if ((v = resolve(aux, v)) != NULL) + ent->obj = v; + + assert(v != INVALID); /* our dummy does not escape */ + return v; }