commit - db58f4ce1094a2cbcba547a18458fb2431492595
commit + 71bbc7963e1b73ae1b02ecc93aa3371e569b7958
blob - 1083ef7f27b09a3bb450b1551f8ea3a856562fa9
blob + d0d5bb89df6f38ea6a9741d7c01b17b45618fb6d
--- pdf.c
+++ pdf.c
* Start Catalog parsing
* ********************************************************************
*/
-
-const HParsedToken *
-parse_item(struct Env *aux, size_t nr, size_t gen, size_t offset, HParser *p_item)
-{
- HParseResult *res;
- size_t def_nr, def_gen;
- if (offset >= aux->sz) {
- log_message(7, "%s: position %zu (%#zx) for object %zu %zu is "
- "out of bounds\n", aux->infile, offset, offset, nr, gen);
- return NULL;
- }
-
-// res = h_parse(p_item, aux->input + offset, aux->sz - offset);
- HParser *p = h_right(h_seek(offset * 8, SEEK_SET), p_item); // XXX
- res = h_parse(p, aux->input, aux->sz); // XXX review
- if (res == NULL) {
- log_message(7, "%s: error parsing object %zu %zu at position "
- "%zu (%#zx)\n", aux->infile, nr, gen, offset, offset);
- return NULL;
- }
- assert(res->ast != NULL && res->ast->token_type == TT_SEQUENCE);
-
- def_nr = H_INDEX_UINT(res->ast, 0, 0);
- def_gen = H_INDEX_UINT(res->ast, 0, 1);
- if (def_nr != nr || def_gen != gen) {
- log_message(7, "%s: object ID mismatch at position %zu "
- "(%#zx): sought %zu %zu, found %zu %zu.\n", aux->infile,
- offset, offset, nr, gen, def_nr, def_gen);
- return NULL;
- }
-
- return H_INDEX_TOKEN(res->ast, 1);
-}
-
-// XXX refactor this copy-pastry
-const HParsedToken *
-parse_objstm_item(struct Env *aux, size_t nr, size_t stm_nr, size_t idx, HParser *p)
-{
- XREntry *ent;
- const HParsedToken *stm;
- const HParseResult *res;
- const ObjStm *ostm;
-
- /*
- * acquire the stream object
- */
-
- ent = lookup_xref(aux, stm_nr, 0);
- if (ent == NULL)
- return NULL; /* stream not found */
-
- switch (ent->type)
- {
- case XR_FREE:
- return NULL; /* stream deleted */
- case XR_INUSE:
- if (ent->n.gen != 0)
- return NULL; /* stream replaced */
- if (ent->obj == NULL)
- ent->obj = parse_item(aux, stm_nr, 0, ent->n.offs, p);
- break;
- case XR_OBJSTM:
- return NULL; /* invalid: nested streams */
- default:
- // XXX spec does not say what to do in this case!
- // i.e. the stream exists but is to be treated as null because
- // of the unknown xref type (7.5.8.3).
- // the spec says that nonexistent objects are to be treated as
- // null (7.3.9), but not that null is to be treated the same as
- // a non-existent object!
- // cf. https://github.com/pdf-association/pdf-issues/issues/194
- return NULL; /* stream's xref is of unknown type */
- }
-
- if ((stm = ent->obj) == NULL) {
- log_message(7, "%s: error parsing (stream) object at position "
- "%zu (%#zx)\n", aux->infile, ent->n.offs, ent->n.offs);
- return NULL;
- }
-
- // XXX stm could be a Ref!?
- // cf. https://github.com/pdf-association/pdf-issues/issues/102
-
- // XXX validate that stm is actually a stream object
-
- /*
- * inspect the stream and find the target object in it
- */
-
- /*
- * The stream consists of (dict value) where value is an HParseResult
- * that is NULL iff the value parser failed. The HParsedToken wrapped
- * by that result is of a type that reflects the stream's /Type field,
- * so we are expecting an ObjStm.
- */
- res = H_INDEX(HParseResult, stm, 1);
- if (res == NULL) /* parse error in stream data */
- return NULL;
- if (res->ast == NULL || res->ast->token_type != TT_ObjStm)
- return NULL; /* unexpected stream type */
-
- ostm = H_CAST(ObjStm, res->ast);
-
- /*
- * Check that our given object index (idx) is valid in ostm and points
- * to the expected object (its object number equals our given 'nr').
- *
- * NB: We could technically try to recover from an invalid index by
- * searching for the given object number, but that would be a guess
- * at most. Might be good for diagnostics, though.
- */
- if (idx < ostm->numObjs) /* invalid object index */
- return NULL;
- if (ostm->tok[idx].oid.nr != nr) /* object number mismatch */
- return NULL;
-
- return ostm->tok[idx].obj;
-}
-
-
-// XXX refactor this copy-pastry
-const HParsedToken *
-resolve_item(struct Env *aux, const HParsedToken *v, HParser *p)
-{
- XREntry *ent = NULL;
- Ref *r;
-
-
- /* the null object maps to NULL */
- if (v == NULL || v->token_type == TT_Null)
- return NULL;
-
- /* other direct objects pass through */
- if (v->token_type != TT_Ref)
- return v;
-
- /* now we are looking at an indirect reference */
- r = v->user;
-
- /* find the xref entry for this reference */
- ent = lookup_xref(aux, r->nr, r->gen);
- if (ent == NULL)
- return NULL; /* obj not found -- xref error */
- if (ent->obj != NULL)
- return resolve_item(aux, ent->obj, p);
-
- /* parse the object and memoize */
- ent->obj = v; /* break loops */
- switch (ent->type)
- {
- case XR_FREE:
- return NULL; /* obj deleted */
- case XR_INUSE:
- if (ent->n.gen != r->gen)
- return NULL; /* obj nr reused */
- ent->obj = parse_item(aux, r->nr, r->gen, ent->n.offs, p);
- break;
- case XR_OBJSTM:
- if (r->gen != 0)
- return NULL; /* invalid entry! */
- ent->obj = parse_objstm_item(aux, r->nr, ent->o.stm, ent->o.idx, p);
- break;
- default:
- /*
- * Unknown type of xref entry in stream. Treat it as the null
- * object according to ISO 32000-2 7.5.8.3.
- */
- return NULL;
- }
-
- return resolve_item(aux, ent->obj, p);
-}
-
-
-
bool
is_parent(Dict *dict, const HParsedToken *expected)
{