commit 20412cdab5e17485a4a49f22a7d4369c6558d46d
from: Sven M. Hallberg <>
date: Fri Aug 12 16:43:43 2022 UTC

use regular resolve (-> p_objdef) to get content stream fragments

If we inspect p_byteostm, we see that it is nothing but a specialized form of
p_objdef that replaces the object parser with byteostream which in turn is a
specialized form of the stream parser that replaces the switch on /Type (in
kstream/kbyteostream) with always using p_bytes, thus returning the stream data
(after filters) as raw TT_BYTES.

But kstream also treats an unrecognized or unspecified /Type with
p_bytes. So, since a content stream should have none of the types
recognized, we can just use p_objdef and thus resolve() here and
eliminate that whole branch of copy paste.

The only downside is that we're now allowing any object to appear where
a stream should be (from the content parser's point of view). All we are
missing though, is a proper token type for stream objects and a simple
check in place of that XXX...

commit - e0350ca3bb91fb9ff9a91e048ab68ce059cd7f66
commit + 20412cdab5e17485a4a49f22a7d4369c6558d46d
blob - df1ba9ef34971e75abc9f44ab9410f97200c1ea8
blob + 6bb8bea40ec6e4ee7dd15e4a0647653f5696a6b7
--- pdf.c
+++ pdf.c
@@ -2252,7 +2252,6 @@ HParser *p_textbegin;
 HParser *p_textstream;
 HParser *p_trailer;
 HParser *p_cstream;
-HParser *p_byteostm;
 HParser *p_dict;
@@ -2263,7 +2262,6 @@ HParser *kstream(HAllocator *, const HParsedToken *, v
 HParser *kxstream(HAllocator *, const HParsedToken *, void *);
 HParser *ktxtstream(HAllocator *, const HParsedToken *, void *);
 HParser *kcontentstream(HAllocator *, const HParsedToken *, void *);
-HParser *kbyteostream(HAllocator *, const HParsedToken *, void *);
 init_runlengthdecode_parser(struct Env *aux)
@@ -2642,9 +2640,6 @@ init_parser(struct Env *aux)
 	H_RULE(contentstream, h_left(h_bind(stmbeg, kcontentstream, aux), stmend));
 	H_RULE(cstream, SEQ(ws, npair, wel, KW("obj"), ws, contentstream,
 			OPT(ws), OPT(lws), KW("endobj")));
-	H_RULE(byteostream, h_left(h_bind(stmbeg, kbyteostream, aux), stmend));
-	H_RULE(byteostm, SEQ(ws, npair, wel, KW("obj"), ws, byteostream,
-			OPT(ws), OPT(lws), KW("endobj")));
 	/* global parser variables */
@@ -2665,7 +2660,6 @@ init_parser(struct Env *aux)
 	p_textbegin  = txtbegin;
 	p_textstream = txtstream;
 	p_cstream    = CHX(cstream, objdef);
-	p_byteostm   = byteostm;
 	p_dict       = dict;
 	p_fail = h_nothing_p();
@@ -3815,55 +3809,6 @@ get_dictoftype(
 		tok = NULL;
 	return tok;
- * This continuation takes the content stream, decodes it if necessary and returns
- * the byte stream for concatenation with other byte streams priot to test extraction.
- * It is very similar to kstream in approach.
- */
-HParser *
-kbyteostream(HAllocator *mm__, const HParsedToken *x, void *env)
-	struct Env *aux = env;
-	HParsedToken *dict_t = H_INDEX_TOKEN(x, 0);
-	Dict *dict = H_CAST(Dict, dict_t);
-	const HParsedToken *v = NULL;
-	HParser *bytes_p, *dict_p, *value_p;
-	struct streamspec *spec;
-	size_t sz=0;
-	/* look for the Length entry -- could be a reference */
-	v = dictentry(dict, "Length");
-	v = resolve(aux, v);		/* resolve indirect references */
-	if (v == NULL || v->token_type != TT_SINT || v->sint < 0) {
-		if (v == NULL)
-			log_message(7, "kbyteostream: stream /Length missing\n");
-		else if (v -> token_type != TT_SINT)
-			log_message(7, "kbyteostream: stream /Length not an integer\n");
-		else if (v < 0)
-			log_message(7, "kbyteostream: stream /Length negative\n");
-		return p_fail;
-	}
-	sz = (size_t)v->sint;
-	dict_p	= p_return__m(mm__, dict_t);
-	bytes_p = p_take__m(mm__, sz, aux);
-	spec = h_alloc(mm__, sizeof(struct streamspec));
-	spec->dict = dict;
-	spec->parser = p_bytes;
-	value_p = h_action__m(mm__, bytes_p, act_ks_value, spec);
-		// XXX this should be h_bind, i think?! so failures in the
-		// postordinate parser propagate...
-	return h_sequence__m(mm__, dict_p, value_p, NULL);
@@ -4426,7 +4371,8 @@ void parse_pagenode (
 		pieces = h_arena_malloc(arena, sizeof(HBytes*) * numelts);
 		for (int i=0; i<numelts; i++) {
 			entry = H_INDEX_TOKEN(contents, i);
-			entry = resolve_item(aux, entry, p_byteostm);
+			entry = resolve(aux, entry);
+			// XXX check that entry is a stream object of the expected type
 			if (!entry) // TODO: Failure ==> xref error -- Figure out how to handle
 				goto end;
 			HParsedToken *res_strm = H_INDEX_TOKEN(entry, 1);