commit 20412cdab5e17485a4a49f22a7d4369c6558d46d from: Sven M. Hallberg <pesco@khjk.org> date: Fri Aug 12 16:43:43 2022 UTC use regular resolve (-> p_objdef) to get content stream fragments If we inspect p_byteostm, we see that it is nothing but a specialized form of p_objdef that replaces the object parser with byteostream which in turn is a specialized form of the stream parser that replaces the switch on /Type (in kstream/kbyteostream) with always using p_bytes, thus returning the stream data (after filters) as raw TT_BYTES. But kstream also treats an unrecognized or unspecified /Type with p_bytes. So, since a content stream should have none of the types recognized, we can just use p_objdef and thus resolve() here and eliminate that whole branch of copy paste. The only downside is that we're now allowing any object to appear where a stream should be (from the content parser's point of view). All we are missing though, is a proper token type for stream objects and a simple check in place of that XXX... commit - e0350ca3bb91fb9ff9a91e048ab68ce059cd7f66 commit + 20412cdab5e17485a4a49f22a7d4369c6558d46d blob - df1ba9ef34971e75abc9f44ab9410f97200c1ea8 blob + 6bb8bea40ec6e4ee7dd15e4a0647653f5696a6b7 --- pdf.c +++ pdf.c @@ -2252,7 +2252,6 @@ HParser *p_textbegin; HParser *p_textstream; HParser *p_trailer; HParser *p_cstream; -HParser *p_byteostm; HParser *p_dict; @@ -2263,7 +2262,6 @@ HParser *kstream(HAllocator *, const HParsedToken *, v HParser *kxstream(HAllocator *, const HParsedToken *, void *); HParser *ktxtstream(HAllocator *, const HParsedToken *, void *); HParser *kcontentstream(HAllocator *, const HParsedToken *, void *); -HParser *kbyteostream(HAllocator *, const HParsedToken *, void *); void init_runlengthdecode_parser(struct Env *aux) @@ -2642,9 +2640,6 @@ init_parser(struct Env *aux) H_RULE(contentstream, h_left(h_bind(stmbeg, kcontentstream, aux), stmend)); H_RULE(cstream, SEQ(ws, npair, wel, KW("obj"), ws, contentstream, OPT(ws), OPT(lws), KW("endobj"))); - H_RULE(byteostream, h_left(h_bind(stmbeg, kbyteostream, aux), stmend)); - H_RULE(byteostm, SEQ(ws, npair, wel, KW("obj"), ws, byteostream, - OPT(ws), OPT(lws), KW("endobj"))); /* global parser variables */ @@ -2665,7 +2660,6 @@ init_parser(struct Env *aux) p_textbegin = txtbegin; p_textstream = txtstream; p_cstream = CHX(cstream, objdef); - p_byteostm = byteostm; p_dict = dict; p_fail = h_nothing_p(); @@ -3815,55 +3809,6 @@ get_dictoftype( tok = NULL; return tok; -} - - -/* - * This continuation takes the content stream, decodes it if necessary and returns - * the byte stream for concatenation with other byte streams priot to test extraction. - * It is very similar to kstream in approach. - */ -HParser * -kbyteostream(HAllocator *mm__, const HParsedToken *x, void *env) -{ - - struct Env *aux = env; - HParsedToken *dict_t = H_INDEX_TOKEN(x, 0); - Dict *dict = H_CAST(Dict, dict_t); - const HParsedToken *v = NULL; - HParser *bytes_p, *dict_p, *value_p; - struct streamspec *spec; - size_t sz=0; - - /* look for the Length entry -- could be a reference */ - v = dictentry(dict, "Length"); - v = resolve(aux, v); /* resolve indirect references */ - if (v == NULL || v->token_type != TT_SINT || v->sint < 0) { - if (v == NULL) - log_message(7, "kbyteostream: stream /Length missing\n"); - else if (v -> token_type != TT_SINT) - log_message(7, "kbyteostream: stream /Length not an integer\n"); - else if (v < 0) - log_message(7, "kbyteostream: stream /Length negative\n"); - - return p_fail; - } - - sz = (size_t)v->sint; - - dict_p = p_return__m(mm__, dict_t); - bytes_p = p_take__m(mm__, sz, aux); - - spec = h_alloc(mm__, sizeof(struct streamspec)); - spec->dict = dict; - spec->parser = p_bytes; - - value_p = h_action__m(mm__, bytes_p, act_ks_value, spec); - // XXX this should be h_bind, i think?! so failures in the - // postordinate parser propagate... - - return h_sequence__m(mm__, dict_p, value_p, NULL); - } @@ -4426,7 +4371,8 @@ void parse_pagenode ( pieces = h_arena_malloc(arena, sizeof(HBytes*) * numelts); for (int i=0; i<numelts; i++) { entry = H_INDEX_TOKEN(contents, i); - entry = resolve_item(aux, entry, p_byteostm); + entry = resolve(aux, entry); + // XXX check that entry is a stream object of the expected type if (!entry) // TODO: Failure ==> xref error -- Figure out how to handle goto end; HParsedToken *res_strm = H_INDEX_TOKEN(entry, 1);