commit db58f4ce1094a2cbcba547a18458fb2431492595 from: Sven M. Hallberg date: Fri Aug 12 17:42:21 2022 UTC remove p_cstream (and kcontentstream) We can cover the single-stream case by doing what the multi-stream case does: Get the stream object, validate that its value type is TT_BYTES, and run p_textstream parser over those bytes from parse_pagenode. No need for a special version of p_objdef, kstream, or resolve for that matter. commit - 57347e732fd186b907726dd9a27fc2011390e145 commit + db58f4ce1094a2cbcba547a18458fb2431492595 blob - 42adbaf179426ca995afc290db450170511ad12b blob + 1083ef7f27b09a3bb450b1551f8ea3a856562fa9 --- pdf.c +++ pdf.c @@ -2251,7 +2251,6 @@ HParser *p_bytes; HParser *p_textbegin; HParser *p_textstream; HParser *p_trailer; -HParser *p_cstream; HParser *p_dict; @@ -2261,7 +2260,6 @@ HParser *p_dict; HParser *kstream(HAllocator *, const HParsedToken *, void *); HParser *kxstream(HAllocator *, const HParsedToken *, void *); HParser *ktxtstream(HAllocator *, const HParsedToken *, void *); -HParser *kcontentstream(HAllocator *, const HParsedToken *, void *); void init_runlengthdecode_parser(struct Env *aux) @@ -2635,11 +2633,6 @@ init_parser(struct Env *aux) /* text streams */ H_RULE(txtstream, h_bind(txtobjs, ktxtstream, aux)); - - // Page Tree - H_RULE(contentstream, h_left(h_bind(stmbeg, kcontentstream, aux), stmend)); - H_RULE(cstream, SEQ(ws, npair, wel, KW("obj"), ws, contentstream, - OPT(ws), OPT(lws), KW("endobj"))); /* global parser variables */ @@ -2659,7 +2652,6 @@ init_parser(struct Env *aux) /* text parser variables */ p_textbegin = txtbegin; p_textstream = txtstream; - p_cstream = CHX(cstream, objdef); p_dict = dict; p_fail = h_nothing_p(); @@ -3415,105 +3407,7 @@ struct streamspec { * Start Catalog parsing * ******************************************************************** */ -/* - * decode the bytes in 'b' according to metadata in the stream dictionary 'd' - * and parse the result with 'p'. - */ -HParseResult * -decode_contentstream(const Dict *d, HBytes b, HParser *p) -{ - HParseResult *(*filter)(const Dict *, HBytes, HParser *); - const Dict *parms = NULL; - const HParsedToken *v; - HParseResult *res = NULL; - - - /* - * Check if there is additional information in the dictionary - * that we should use to process the content stream - * - * If the data in the stream is encoded, a filter will be specified in - * the dictionary that must be used to decode the data first - * - * TODO:: Handle arrays of filters (chained) and their decode parameters - */ - v = dictentry(d, "Filter"); // look for a filter - - if (v != NULL) { // data is encoded - - - if (v->token_type != TT_BYTES) { - // XXX TT_SEQUENCE would be a filter chain; that’s not supported, yet. - // But it might also be something bogus, in which case we should fail. - return NULL; - } - - if (bytes_eq(v->bytes, "FlateDecode")) - filter = FlateDecode; - else if (bytes_eq(v->bytes, "ASCIIHexDecode")) - filter = ASCIIHexDecode; - else if (bytes_eq(v->bytes, "ASCII85Decode")) - filter = ASCII85Decode; - else if (bytes_eq(v->bytes, "RunLengthDecode")) - filter = RunLengthDecode; - else if (bytes_eq(v->bytes, "LZWDecode")) - filter = LZWDecode; - else { /* filter not supported */ - log_message(SEV_DONTCARE, "decode_stream:: Unsupported Filter [%.*s]\n", - (int)v->bytes.len, v->bytes.token); - return NULL; /* Treat the stream as a byte array */ - } - /* Check for parameters for the filter */ - v = dictentry(d, "DecodeParms"); - if (v && v->token_type == TT_Dict) - parms = v->user; - - res = filter(parms, b, p); - } /* The dictionary provided direction for processing the stream */ - - /* - * It is possible that we should always process the stream as a content stream - * But not yet sure that covers all case. - */ - else { // content stream is not encoded - res = h_parse(p, b.token, b.len); - } - - /* - * There are other parameters that can be passed in the dictionary - * They are not being handled currently - */ -// const int numOptKeys = 3; -// char *optionalKeys[3] = { "F", "FDecodeParms", "DL" }; -// for (int i=0; iast); - HParseResult *res; - /* decode and parse the stream data */ - res = decode_contentstream(spec->dict, bytes, spec->parser); -// if (!res) { -// res = (HParseResult *)p; -// } - - if (res) return (HParsedToken *)res->ast; - else return (HParsedToken *)p->ast; -} - - - - - const HParsedToken * parse_item(struct Env *aux, size_t nr, size_t gen, size_t offset, HParser *p_item) { @@ -3810,66 +3704,10 @@ get_dictoftype( return tok; } - - - - - -/* - * This continuation takes the content stream and processes it for test extraction. - * It is very similar to kstream in approach. It decodes and extracts the stream contents - * and - * It does not consume the string and returns the token as the output. - * - * x = (txtobj ...) - */ -HParser * -kcontentstream(HAllocator *mm__, const HParsedToken *x, void *env) -{ - - struct Env *aux = env; - HParsedToken *dict_t = H_INDEX_TOKEN(x, 0); - Dict *dict = H_CAST(Dict, dict_t); - const HParsedToken *v = NULL; - HParser *bytes_p, *dict_p, *value_p; - struct streamspec *spec; - size_t sz=0; - - /* look for the Length entry -- could be a reference */ - v = dictentry(dict, "Length"); - v = resolve(aux, v); /* resolve indirect references */ - if (v == NULL || v->token_type != TT_SINT || v->sint < 0) { - if (v == NULL) - log_message(7, "kcontentstream: stream /Length missing\n"); - else if (v -> token_type != TT_SINT) - log_message(7, "kcontentstream: stream /Length not an integer\n"); - else if (v < 0) - log_message(7, "kcontentstream: stream /Length negative\n"); - return p_fail; - } - - sz = (size_t)v->sint; - - dict_p = p_return__m(mm__, dict_t); - bytes_p = p_take__m(mm__, sz, aux); - - spec = h_alloc(mm__, sizeof(struct streamspec)); - spec->dict = dict; - - v = dictentry(dict, "Type"); - if (v == NULL) // XXX -> custom type - spec->parser = p_textstream; - else { - fprintf(stdout, "kcontentstream: Not a text or object stream!\n"); - return p_fail; - } - value_p = h_action__m(mm__, bytes_p, act_kcontentstream_value, spec); - return h_sequence__m(mm__, dict_p, value_p, NULL); -} Fontinfo_T * getFontinfo(const Dict *fontdict, char *name, struct Env *aux) @@ -4349,13 +4187,15 @@ void parse_pagenode ( // Process the contents stream or array -- optional contents = dictentry(myDict, "Contents"); - //contents = resolve_item(aux, contents, p_cstream); - // XXX reinstate when code below works for indirect objects - // it currently only handles the case where /Contents is - // a direct object that happens to be an array. + //contents = resolve(aux, contents); + // XXX reinstate when we can distinguish stream objects from + // arryas (TT_SEQUENCE). code below currently only handles + // the case where /Contents is a direct object that happens + // to be an array. if (contents == NULL) return; - if (contents->token_type == TT_SEQUENCE) { + // XXX handle single stream case + if (contents->token_type == TT_SEQUENCE) { /* array case */ size_t numelts = contents->seq->used; size_t bufsz = 0; HBytes bstrm;