commit db58f4ce1094a2cbcba547a18458fb2431492595
from: Sven M. Hallberg <pesco@khjk.org>
date: Fri Aug 12 17:42:21 2022 UTC

remove p_cstream (and kcontentstream)

We can cover the single-stream case by doing what the multi-stream case
does: Get the stream object, validate that its value type is TT_BYTES,
and run p_textstream parser over those bytes from parse_pagenode. No
need for a special version of p_objdef, kstream, or resolve for that
matter.

commit - 57347e732fd186b907726dd9a27fc2011390e145
commit + db58f4ce1094a2cbcba547a18458fb2431492595
blob - 42adbaf179426ca995afc290db450170511ad12b
blob + 1083ef7f27b09a3bb450b1551f8ea3a856562fa9
--- pdf.c
+++ pdf.c
@@ -2251,7 +2251,6 @@ HParser *p_bytes;
 HParser *p_textbegin;
 HParser *p_textstream;
 HParser *p_trailer;
-HParser *p_cstream;
 HParser *p_dict;
 
 
@@ -2261,7 +2260,6 @@ HParser *p_dict;
 HParser *kstream(HAllocator *, const HParsedToken *, void *);
 HParser *kxstream(HAllocator *, const HParsedToken *, void *);
 HParser *ktxtstream(HAllocator *, const HParsedToken *, void *);
-HParser *kcontentstream(HAllocator *, const HParsedToken *, void *);
 
 void
 init_runlengthdecode_parser(struct Env *aux)
@@ -2636,12 +2634,7 @@ init_parser(struct Env *aux)
 	/* text streams */
 	H_RULE(txtstream, h_bind(txtobjs, ktxtstream, aux));
 
-	// Page Tree
-	H_RULE(contentstream, h_left(h_bind(stmbeg, kcontentstream, aux), stmend));
-	H_RULE(cstream, SEQ(ws, npair, wel, KW("obj"), ws, contentstream,
-			OPT(ws), OPT(lws), KW("endobj")));
 
-
 	/* global parser variables */
 	p_pdf        = pdf;
 	p_pdfdbg     = pdfdbg;
@@ -2659,7 +2652,6 @@ init_parser(struct Env *aux)
 	/* text parser variables */
 	p_textbegin  = txtbegin;
 	p_textstream = txtstream;
-	p_cstream    = CHX(cstream, objdef);
 	p_dict       = dict;
 
 	p_fail = h_nothing_p();
@@ -3415,105 +3407,7 @@ struct streamspec {
  * Start Catalog parsing
  * ********************************************************************
  */
-/*
- * decode the bytes in 'b' according to metadata in the stream dictionary 'd'
- * and parse the result with 'p'.
- */
-HParseResult *
-decode_contentstream(const Dict *d, HBytes b, HParser *p)
-{
-	HParseResult *(*filter)(const Dict *, HBytes, HParser *);
-	const Dict *parms = NULL;
-	const HParsedToken *v;
-	HParseResult *res = NULL;
 
-
-	/*
-	 *  Check if there is additional information in the dictionary
-	 *  that we should use to process the content stream
-	 *
-	 *  If the data in the stream is encoded, a filter will be specified in
-	 *  the dictionary that must be used to decode the data first
-	 *
-	 *  TODO:: Handle arrays of filters (chained) and their decode parameters
-	 */
-	v = dictentry(d, "Filter"); // look for a filter
-
-	if (v != NULL) { // data is encoded
-
-
-		if (v->token_type != TT_BYTES) {
-			// XXX TT_SEQUENCE would be a filter chain; that’s not supported, yet.
-			// But it might also be something bogus, in which case we should fail.
-			return NULL;
-		}
-
-		if (bytes_eq(v->bytes, "FlateDecode"))
-			filter = FlateDecode;
-		else if (bytes_eq(v->bytes, "ASCIIHexDecode"))
-			filter = ASCIIHexDecode;
-		else if (bytes_eq(v->bytes, "ASCII85Decode"))
-			filter = ASCII85Decode;
-		else if (bytes_eq(v->bytes, "RunLengthDecode"))
-			filter = RunLengthDecode;
-		else if (bytes_eq(v->bytes, "LZWDecode"))
-			filter = LZWDecode;
-		else {		/* filter not supported */
-			log_message(SEV_DONTCARE, "decode_stream:: Unsupported Filter [%.*s]\n",
-					(int)v->bytes.len, v->bytes.token);
-			return NULL; /* Treat the stream as a byte array */
-		}
-		/* Check for parameters for the filter */
-		v = dictentry(d, "DecodeParms");
-		if (v && v->token_type == TT_Dict)
-			parms = v->user;
-
-		res = filter(parms, b, p);
-	} /* The dictionary provided direction for processing the stream */
-
-	/*
-	 * It is possible that we should always process the stream as a content stream
-	 * But not yet sure that covers all case.
-	 */
-	else { // content stream is not encoded
-		res = h_parse(p, b.token, b.len);
-	}
-
-	/*
-	 * There are other parameters that can be passed in the dictionary
-	 * They are not being handled currently
-	 */
-//	const int numOptKeys = 3;
-//	char *optionalKeys[3] = { "F", "FDecodeParms", "DL" };
-//	for (int i=0; i<numOptKeys; i++) {
-//		v = dictentry(d, optionalKeys[i]);
-//		if (v) fprintf(stderr, "decode_contentstream:: Unsupported Specifications [%s\n]", optionalKeys[i]);
-//	}
-	return res;
-}
-
-
-HParsedToken *
-act_kcontentstream_value(const HParseResult *p, void *u)
-{
-	struct streamspec *spec = u;
-	HBytes bytes = H_CAST_BYTES(p->ast);
-	HParseResult *res;
-
-	/* decode and parse the stream data */
-	res = decode_contentstream(spec->dict, bytes, spec->parser);
-//	if (!res) {
-//		res = (HParseResult *)p;
-//	}
-
-	if (res) return (HParsedToken *)res->ast;
-	else     return (HParsedToken *)p->ast;
-}
-
-
-
-
-
 const HParsedToken *
 parse_item(struct Env *aux, size_t nr, size_t gen, size_t offset, HParser *p_item)
 {
@@ -3815,62 +3709,6 @@ get_dictoftype(
 
 
 
-/*
- * This continuation takes the content stream and processes it for test extraction.
- * It is very similar to kstream in approach. It decodes and extracts the stream contents
- * and
- * It does not consume the string and returns the token as the output.
- *
- * x = (txtobj ...)
- */
-HParser *
-kcontentstream(HAllocator *mm__, const HParsedToken *x, void *env)
-{
-
-	struct Env *aux = env;
-	HParsedToken *dict_t = H_INDEX_TOKEN(x, 0);
-	Dict *dict = H_CAST(Dict, dict_t);
-	const HParsedToken *v = NULL;
-	HParser *bytes_p, *dict_p, *value_p;
-	struct streamspec *spec;
-	size_t sz=0;
-
-	/* look for the Length entry -- could be a reference */
-	v = dictentry(dict, "Length");
-	v = resolve(aux, v);		/* resolve indirect references */
-	if (v == NULL || v->token_type != TT_SINT || v->sint < 0) {
-		if (v == NULL)
-			log_message(7, "kcontentstream: stream /Length missing\n");
-		else if (v -> token_type != TT_SINT)
-			log_message(7, "kcontentstream: stream /Length not an integer\n");
-		else if (v < 0)
-			log_message(7, "kcontentstream: stream /Length negative\n");
-
-		return p_fail;
-	}
-
-	sz = (size_t)v->sint;
-
-	dict_p	= p_return__m(mm__, dict_t);
-	bytes_p = p_take__m(mm__, sz, aux);
-
-	spec = h_alloc(mm__, sizeof(struct streamspec));
-	spec->dict = dict;
-
-	v = dictentry(dict, "Type");
-	if (v == NULL)	// XXX -> custom type
-		spec->parser = p_textstream;
-	else {
-		fprintf(stdout, "kcontentstream: Not a text or object stream!\n");
-		return p_fail;
-	}
-
-	value_p = h_action__m(mm__, bytes_p, act_kcontentstream_value, spec);
-
-	return h_sequence__m(mm__, dict_p, value_p, NULL);
-
-}
-
 Fontinfo_T *
 getFontinfo(const Dict *fontdict, char *name, struct Env *aux)
 {
@@ -4349,13 +4187,15 @@ void parse_pagenode (
 
 	// Process the contents stream or array  -- optional
 	contents = dictentry(myDict, "Contents");
-	//contents = resolve_item(aux, contents, p_cstream);
-		// XXX reinstate when code below works for indirect objects
-		//     it currently only handles the case where /Contents is
-		//     a direct object that happens to be an array.
+	//contents = resolve(aux, contents);
+		// XXX reinstate when we can distinguish stream objects from
+		//     arryas (TT_SEQUENCE). code below currently only handles
+		//     the case where /Contents is a direct object that happens
+		//     to be an array.
 	if (contents == NULL)
 		return;
-	if (contents->token_type == TT_SEQUENCE) {
+	// XXX handle single stream case
+	if (contents->token_type == TT_SEQUENCE) {	/* array case */
 		size_t   numelts = contents->seq->used;
 		size_t   bufsz   = 0;
 		HBytes   bstrm;