Commit Diff


commit - 11ecff401f57af01ad1d2e62a5fd7e9791c9240b
commit + 354df5d9cd87cba2ef291fa9a60e67ac7fbb1729
blob - 8f4fa53f554e3cdf6b23ec64ab81eb273c836ff2
blob + 3e0d8d34283393fd978b0e1807e3dda377e1b14a
--- pdf.c
+++ pdf.c
@@ -158,6 +158,13 @@ dictentry(const Dict *dict, const char *key)
 	return NULL;
 }
 
+
+void ppACL2_xrentry(FILE *stream, const HParsedToken *tok, int indent, int delta)
+{
+	return;
+	
+}
+
 void
 pp_xrentry(FILE *stream, const HParsedToken *tok, int indent, int delta)
 {
@@ -189,7 +196,16 @@ pp_ref(FILE *stream, const HParsedToken *tok, int inde
 	fprintf(stream, "[%zu,%zu]", r->nr, r->gen);
 }
 
+
 void
+ppACL2_ref(FILE *stream, const HParsedToken *tok, int indent, int delta)
+{
+	Ref *r = H_CAST(Ref, tok);
+
+	fprintf(stream, "(list \"ref\" %zu )", r->nr);
+}
+
+void
 pp_parseresult(FILE *stream, const HParsedToken *tok, int indent, int delta)
 {
 	HParseResult *res = H_CAST(HParseResult, tok);
@@ -200,6 +216,7 @@ pp_parseresult(FILE *stream, const HParsedToken *tok, 
 		h_pprint(stream, res->ast, indent, delta);
 }
 
+
 void
 pp_dict(FILE *stream, const HParsedToken *tok, int indent, int delta)
 {
@@ -224,6 +241,35 @@ pp_dict(FILE *stream, const HParsedToken *tok, int ind
 		fprintf(stream, "\n%*s}", indent, "");
 	else
 		fprintf(stream, " }");
+}
+
+
+void
+ppACL2_dict(FILE *stream, const HParsedToken *tok, int indent, int delta)
+{
+	const HCountedArray *k_v;
+
+	if (tok->seq->used == 0) {
+		fprintf(stream, " ");
+		return;
+	}
+
+	fprintf(stream, "(list %*s", delta - 1, "");
+	for (size_t i = 0; i < tok->seq->used; i++) {
+		fprintf(stream, "\n(list ");
+		k_v = tok->seq->elements[i]->seq;
+		if (i > 0)
+			fprintf(stream, "%*s %*s", indent, "", delta - 1, "");
+		h_pprint(stream, k_v->elements[0], indent + delta, delta);
+		fprintf(stream, " ");
+		h_pprint(stream, k_v->elements[1], indent + delta, delta);
+		fprintf(stream, ")");
+	}
+
+	if (tok->seq->used > 2)
+		fprintf(stream, "\n%*s)", indent, "");
+	else
+		fprintf(stream, " )");
 }
 
 /*
@@ -241,13 +287,13 @@ act_digit(const HParseResult *p, void *u)
 HParsedToken *
 act_hlower(const HParseResult *p, void *u)
 {
-	return H_MAKE_UINT(10 + H_CAST_UINT(p->ast) - 'a');
+	return H_MAKE_UINT(H_CAST_UINT(p->ast) - 'a');
 }
 
 HParsedToken *
 act_hupper(const HParseResult *p, void *u)
 {
-	return H_MAKE_UINT(10 + H_CAST_UINT(p->ast) - 'A');
+	return H_MAKE_UINT(H_CAST_UINT(p->ast) - 'A');
 }
 
 HParsedToken*
@@ -543,8 +589,8 @@ HParser *p_violsev;
 HParsedToken *
 act_viol(const HParseResult *p, void *viol)
 {
-	uint32_t severity;
-	uint32_t offset;
+	uint severity;
+	uint offset;
 	HParseResult *severity_parse;
 	viol = (uint8_t *) viol;
 	severity_parse = h_parse(p_violsev, viol, strlen((char *)viol));
@@ -555,15 +601,12 @@ act_viol(const HParseResult *p, void *viol)
 	else {
 	        severity = severity_parse->ast->seq->elements[0]->uint;
 	}
-	offset = p->ast->seq->elements[p->ast->seq->used-1]->uint / 8;
+	offset = p->ast->seq->elements[1]->uint / 8;
 	fprintf(stderr, "VIOLATION[%d]@%d (0x%x): %s\n", severity, offset, offset, (char *) viol);
 	if (strictness && severity > strictness) {
 	        exit(1);
 	}
 	/* Just return the parse AST, drop the h_tell */
-	if (p->ast->seq->used == 1) {
-		return (HParsedToken *) NULL;
-	}
 	return (HParsedToken *) p->ast->seq->elements[0];
 }
 
@@ -657,7 +700,7 @@ act_nesc(const HParseResult *p, void *u)
 	return H_MAKE_UINT(H_FIELD_UINT(1)*16 + H_FIELD_UINT(2));
 }
 
-#define act_schars h_act_flatten
+#define act_str_ h_act_flatten
 #define act_string act_token
 
 HParsedToken *
@@ -671,6 +714,9 @@ act_octal(const HParseResult *p, void *u)
 
 	return H_MAKE_UINT(x);
 }
+#define act_oct3 act_octal
+#define act_oct2 act_octal
+#define act_oct1 act_octal
 
 HParsedToken *
 act_xrent(const HParseResult *p, void *u)
@@ -741,7 +787,7 @@ act_xrstm(const HParseResult *p, void *u)
 
 	dict = H_INDEX_TOKEN(p->ast, 1, 0);
 	res = H_FIELD(HParseResult, 1, 1);	// XXX free this
-	xrefs = res != NULL ? res->ast : NULL;
+	xrefs = res->ast;
 
 	tok = H_MAKE_SEQN(2);
 	tok->seq->elements[0] = (HParsedToken *)xrefs;
@@ -758,14 +804,8 @@ act_xrstm(const HParseResult *p, void *u)
 bool
 validate_xrstm(HParseResult *p, void *u)
 {
-	const HParsedToken *xrefs = H_INDEX_TOKEN(p->ast, 1, 1);
 	const Dict *tdict = H_FIELD(Dict, 1, 0);
 	const HParsedToken *v = dictentry(tdict, "Type");
-
-	if (!xrefs)
-	{
-		return false;
-	}
 
 #if 0
 	if (v == NULL)
@@ -775,7 +815,7 @@ validate_xrstm(HParseResult *p, void *u)
 	else if (bytes_eq(v->bytes, "XRef"))
 		return true;
 	return false;
-#endif	// XXX this block can be removed
+#endif
 
 	return (v != NULL && v->token_type == TT_BYTES &&
 	    bytes_eq(v->bytes, "XRef"));
@@ -889,10 +929,10 @@ void
 init_parser(struct Env *aux)
 {
 	TT_HParseResult = h_allocate_token_new("HParseResult", NULL, pp_parseresult);
-	TT_XREntry =	h_allocate_token_new("XREntry", NULL, pp_xrentry);
-	TT_Ref =	h_allocate_token_new("Ref", NULL, pp_ref);
-	TT_Dict =	h_allocate_token_new("Dict", NULL, pp_dict);
-
+	TT_XREntry =	h_allocate_token_new("XREntry", NULL, ppACL2_xrentry);
+	TT_Ref =	h_allocate_token_new("Ref", NULL, ppACL2_ref);
+	//TT_Dict =	h_allocate_token_new("Dict", NULL, pp_dict);
+TT_Dict =	h_allocate_token_new("", NULL, ppACL2_dict);
 	/* lines */
 	H_RULE(cr,	p_mapch('\r', '\n'));	/* semantic value: \n */
 	H_RULE(lf,	h_ch('\n'));		/* semantic value: \n */
@@ -909,7 +949,6 @@ init_parser(struct Env *aux)
 	//H_RULE(dchar,	IN(DCHARS));			/* delimiter */
 	H_RULE(rchar,	NOT_IN(WCHARS DCHARS));		/* regular */
 	H_RULE(nchar,	NOT_IN(WCHARS DCHARS "#"));	/* name */
-	H_RULE(schar,	NOT_IN("()\n\r\\"));		/* string literal */
 	H_ARULE(digit,	h_ch_range('0', '9'));
 	H_ARULE(pdigit,	h_ch_range('1', '9'));
 	H_ARULE(hlower,	h_ch_range('a', 'f'));
@@ -964,10 +1003,16 @@ init_parser(struct Env *aux)
 	/* numbers */
 	H_ARULE(sign,	CHX(minus, IGN(plus)));
 	H_VRULE(intnn,	nat);
+	#if 1
 	H_ARULE(realnn,	CHX(SEQ(digits, period, digits),	/* 12.3 */
 			    SEQ(digits, period, empty),		/* 123. */
 			    SEQ(empty, period, digits)));	/* .123 */
 		// XXX ^ we _could_ move the "123." case into intnn...
+	#else
+	// XXX the .123 case above somehow leads to a conflict with litstr...
+	H_ARULE(realnn,	CHX(SEQ(digits, period, digits),	/* 12.3 */
+			    SEQ(digits, period, empty)));	/* 123. */
+	#endif
 	H_RULE(numbnn,	CHX(realnn, intnn));
 	H_RULE(snumb,	SEQ(sign, numbnn));
 	H_ARULE(numb,	CHX(snumb, numbnn));
@@ -977,24 +1022,59 @@ init_parser(struct Env *aux)
 	H_ARULE(nstr,	h_many(CHX(nchar, nesc)));	/* '/' is valid */
 	H_RULE(name,	h_right(slash, nstr));
 
-	/* strings */
-	H_RULE(snest,	h_indirect());
+	/* strings
+	 *
+	 * this is so convoluted in order to make it LALR including the
+	 * precedence rules for octal escapes ("\123" vs "\12 3" vs "\1 23")
+	 * and end-of-line ("CRLF" vs "CR LF").
+	 *
+	 * we have to split the base rule 'str' into variants 'str_o' and
+	 * 'str_l' depending on whether they may start with an octal digit or
+	 * linefeed, respectively.
+	 */
+	H_RULE(str_ol,	h_indirect());
+	H_RULE(str_o,	h_indirect());
+	H_RULE(str_l,	h_indirect());
+	H_RULE(str,	h_indirect());
 	H_RULE(bsn,	p_mapch('n', 0x0a));	/* LF */
 	H_RULE(bsr,	p_mapch('r', 0x0d));	/* CR */
 	H_RULE(bst,	p_mapch('t', 0x09));	/* HT */
 	H_RULE(bsb,	p_mapch('b', 0x08));	/* BS (backspace) */
 	H_RULE(bsf,	p_mapch('f', 0x0c));	/* FF */
 	H_RULE(escape,	CHX(bsn, bsr, bst, bsb, bsf, lparen, rparen, bslash));
-	H_ARULE(octal,	CHX(REP(odigit,3), REP(odigit,2), REP(odigit,1)));
-	H_RULE(wrap,	IGN(eol));
-	H_RULE(sesc,	h_right(bslash, CHX(escape, octal, wrap, epsilon)));
-		/* NB: lone backslashes and escaped newlines are ignored */
-	H_ARULE(schars,	h_many(CHX(schar, snest, sesc, eol)));
-	H_RULE(snest_,	SEQ(lparen, schars, rparen));
-	H_RULE(litstr,	h_middle(lparen, schars, rparen));
+	H_ARULE(oct3,	REP(odigit,3));
+	H_ARULE(oct2,	REP(odigit,2));
+	H_ARULE(oct1,	REP(odigit,1));
+	H_RULE(octesc,	CHX(SEQ(oct3, str),
+			    SEQ(oct2, str_o),
+			    SEQ(oct1, str_o)));
+	H_RULE(eolesc,	CHX(SEQ(IGN(crlf), str),
+			    SEQ(IGN(cr), str_l),
+			    SEQ(IGN(lf), str)));
+	H_RULE(schar_o,	NOT_IN("()\n\r\\" "01234567"));
+	H_RULE(schar_e,	NOT_IN("()\n\r\\" "01234567" "nrtbf"));
+	H_RULE(str_o_,	CHX(SEQ(lf, str), str_ol));	/* str "but not" odigit */
+	H_RULE(str_l_,	CHX(SEQ(odigit, str), str_ol));	/* str "but not" lf */
+	H_RULE(str_ol_,	CHX(SEQ(cr, str_l),		/* str "but neither" */
+			    SEQ(crlf, str),
+			    SEQ(schar_o, str),
+			    SEQ(lparen, str, rparen, str),
+			    SEQ(IGN(bslash), escape, str),
+			    SEQ(IGN(bslash), schar_e, str),	/* "lone" bs */
+		/* NB: ^ lone backslashes are to be ignored per spec, but we
+		 * let them "escape" with the following character. this works
+		 * because they are never truly alone. */
+			    SEQ(IGN(bslash), octesc),
+			    SEQ(IGN(bslash), eolesc),		/* line split */
+			    epsilon));
+	H_ARULE(str_,	CHX(SEQ(lf, str), SEQ(odigit, str), str_ol));
+	H_RULE(litstr,	h_middle(lparen, str, rparen));
 	H_RULE(hexstr,	h_middle(langle, MANY_WS(hdigit), rangle));
 	H_ARULE(string,	CHX(litstr, hexstr));
-	h_bind_indirect(snest, snest_);
+	h_bind_indirect(str_ol,	str_ol_);
+	h_bind_indirect(str_o,	str_o_);
+	h_bind_indirect(str_l,	str_l_);
+	h_bind_indirect(str,	str_);
 
 	H_RULE(array,	h_indirect());
 	H_RULE(dict,	h_indirect());
@@ -1052,7 +1132,6 @@ init_parser(struct Env *aux)
 
 	H_RULE(stream,	h_left(h_bind(stmbeg, kstream, aux), stmend));
 		// XXX is whitespace allowed between the eol and "endstream"?
-		// peter wyatt says no. (2020-03-25)
 
 	/*
 	 * file structure
@@ -1079,11 +1158,7 @@ init_parser(struct Env *aux)
 	H_RULE(xrtyp,	CHX(h_ch('n'), h_ch('f')));
 	H_ARULE(xroff,	REP(digit, 10));
 	H_ARULE(xrgen,	REP(digit, 5));
-	H_ARULE(xrent,	SEQ(xroff, IGN(CHX(VIOL(SEQ(lwchar, h_many1(lwchar)), "Multi-WS in xref offset_gen entry (severity=1)"), sp)),
-	    xrgen, IGN(CHX(VIOL(SEQ(lwchar, h_many1(lwchar)), "Multi-WS in xref gen_use entry (severity=1)"), sp)),
-	    xrtyp, IGN(CHX(VIOL(SEQ(wchar, wchar, h_many1(wchar)), "Greater-than-2-byte WS at end of xref entry (severity=1)"),
-	    xreol,
-	    VIOL(SEQ(h_many1(wchar)), "Nonconformant WS at end of xref entry (severity=1)")))));
+	H_ARULE(xrent,	SEQ(xroff, IGN(sp), xrgen, IGN(sp), xrtyp, IGN(xreol)));
 	H_RULE(xrhead,	SEQ(nat, IGN(sp), nat, nl));
 	H_RULE(xrsub,	SEQ(xrhead, h_many(xrent)));
 	H_ARULE(xrefs,	SEQ(KW("xref"), nl, h_many(xrsub)));
@@ -1118,9 +1193,8 @@ init_parser(struct Env *aux)
 
 	H_RULE(xr_td,	SEQ(xrefs, KW("trailer"), ws, dict));
 
-	H_RULE(hdr_junk,    CHX(comment,
-	    VIOL(h_many1(h_butnot(h_ch_range(0, 255), SEQ(npair, wel, KW("obj")))),
-	    "Uncommented junk after header (severity=1)")));
+	H_RULE(hdr_junk,    VIOL(h_many1(h_butnot(h_ch_range(0, 255), objdef)),
+				 "Uncommented junk after header (severity=1)"));
 	H_RULE(tail,	SEQ(body, CHX(SEQ(h_optional(xr_td), startxr),
 				      VIOL(SEQ(xr_td, OPT(SEQ(nl, KW("startxref"), nl, lws, nat, nl)),
 					       OPT(nl), OPT(LIT("%%EOF")), OPT(nl)),
@@ -1587,208 +1661,6 @@ FlateDecode(const Dict *parms, HBytes b, HParser *p)
 
 	if (done == -1)
 		return NULL;
-	return res;
-}
-
-/* LZW helpers */
-
-typedef struct
-{
-	uint8_t *lzw_buf;
-	size_t total_buf_size;
-	size_t write_head;
-	size_t write_tail;
-	uint8_t write_checksum;
-	size_t eof_loc;
-
-	HBytes *input_stream;
-	size_t read_head;
-	size_t read_tail;
-	uint8_t read_checksum;
-} lzwspec;
-
-lzwspec *cur_lzw_spec;
-
-/* used by write_lzw_buffer to get more space for decoding if needed */
-void
-grow_lzw_buffer(size_t amount)
-{
-	uint8_t *ret_buf = realloc(cur_lzw_spec->lzw_buf, (cur_lzw_spec->total_buf_size+amount) * sizeof(uint8_t));
-	if(ret_buf != NULL)
-	{
-		cur_lzw_spec->total_buf_size += amount;
-		cur_lzw_spec->lzw_buf = ret_buf;
-	}
-	else
-	{
-		fprintf(stderr, "LZWDecode: h_arena_realloc() failed");
-		return;
-	}
-}
-
-lzwspec *
-new_lzw_spec(HBytes *bytes)
-{
-	size_t const BUFSIZE = sizeof(uint8_t) * 1024;
-	lzwspec *ret = malloc(sizeof(lzwspec));
-	ret->input_stream = bytes;
-	ret->lzw_buf = malloc(BUFSIZE);
-	ret->total_buf_size = BUFSIZE;
-	return ret;
-}
-
-void
-delete_lzw_spec(lzwspec *spec)
-{
-	free(spec->lzw_buf);
-	free(spec);
-}
-
-void
-bind_lzw_spec(lzwspec *spec)
-{
-	cur_lzw_spec = spec;
-}
-
-
-#include "lzw-lib.h"
-
-/* Buffer writer function for the lzw-ab implementation, with a fixed signature.
- * Although the type is defined as int, it is expected to write one byte at a time.
- * Modifies cur_lzw_spec. Set up the lzw spec to use with bind_lzw_spec() */
-
-void
-write_lzw_buffer(int value)
-{
-	size_t const BUFSIZE = sizeof(uint8_t) * 1024;
-
-	if(!cur_lzw_spec->lzw_buf)
-	{
-		fprintf(stderr, "LZWDecode: lzw_buf is null!");
-		assert(cur_lzw_spec->lzw_buf != NULL);
-	}
-
-	assert(cur_lzw_spec->write_head <= cur_lzw_spec->total_buf_size);
-
-	if (value == EOF) {
-        cur_lzw_spec->lzw_buf[cur_lzw_spec->write_head] = (uint8_t) value;
-        cur_lzw_spec->eof_loc = cur_lzw_spec->write_head;
-        cur_lzw_spec->write_head++;
-        return;
-    }
-
-	/* We can get away with this cast due to writing single bytes. */
-    cur_lzw_spec->lzw_buf[cur_lzw_spec->write_head++] = (uint8_t) value;
-
-	/* If you looked at lzw-ab's code, the write head is reset here
-	 * This function uses write_head as the offset of the last written item */
-    if (cur_lzw_spec->write_head >= cur_lzw_spec->total_buf_size)
-    {
-        grow_lzw_buffer(BUFSIZE);
-    }
-
-    cur_lzw_spec->write_checksum = cur_lzw_spec->write_checksum * 3 + (uint8_t) value;
-}
-
-
-/* Fixed signature function for reading bytes. Modifies cur_lzw_spec. Set cur_lzw_spec
- * with bind_lzw_spec() */
-int read_lzw_buffer(void)
-{
-	uint8_t byte_read;
-	int ret_value;
-
-	/* Input data is already waiting in the buffer */
-    if (cur_lzw_spec->read_head == cur_lzw_spec->read_tail)
-        cur_lzw_spec->read_tail = cur_lzw_spec->input_stream->len;
-
-    if (cur_lzw_spec->read_head < cur_lzw_spec->read_tail)
-    {
-        byte_read = cur_lzw_spec->input_stream->token[cur_lzw_spec->read_head++];
-        cur_lzw_spec->read_checksum = cur_lzw_spec->read_checksum * 3 + byte_read;
-        ret_value = byte_read;
-    }
-    else
-        ret_value = EOF;
-
-    return ret_value;
-}
-
-
-HParseResult *
-LZWDecode(const Dict *parms, HBytes b, HParser *p)
-{
-	struct predictor pred = {1, 1, 8, 1};
-	int (*depredict)(struct predictor *, uint8_t *, size_t);
-	HParseResult *res;
-	int done;
-	int ret;
-	const HParsedToken *v;
-
-	/* set up the predictor (if any) */
-	#define SETPARM(VAR,STR) do {					\
-		v = dictentry(parms, (STR));				\
-		if (v != NULL) {					\
-			if (v->token_type != TT_SINT || v->sint < 0)	\
-				return NULL;				\
-			VAR = v->sint;					\
-		} } while(0)
-	SETPARM(pred.num,	"Predictor");
-	SETPARM(pred.colors,	"Colors");
-	SETPARM(pred.bpc,	"BitsPerComponent");
-	SETPARM(pred.columns,	"Columns");
-	#undef SETPARM
-	if (pred.num == 1)
-		depredict = depred_none;
-	else {
-		if (pred.num >= 10 && pred.num <= 15)
-			depredict = depred_png;
-		else if (pred.num == 2) {
-			/* for 8-bpc TIFF pred. 2, we can reuse PNG Sub */
-			if (pred.bpc == 8) {
-				pred.predfun = pp_sub;	/* predict left */
-				depredict = depred_png;
-			} else {
-				// XXX add general TIFF predictor (bpc != 8)
-				fprintf(stderr, "LZWDecode: /Predictor %d "
-				    "not supported for /BitsPerComponent %d\n",
-				    pred.num, pred.bpc);
-				return NULL;
-			}
-		} else {
-			fprintf(stderr, "LZWDecode: /Predictor %d"
-			    " not supported\n", pred.num);
-			return NULL;
-		}
-
-		/* allocate row buffer */
-		if (pred.columns > (INT_MAX - 7) / pred.colors / pred.bpc) {
-			fprintf(stderr, "LZWDecode: overflow\n");
-			return NULL;
-		}
-		pred.rowsz = (pred.colors * pred.bpc * pred.columns + 7) / 8;
-		pred.buf = calloc(1, pred.rowsz);
-		if (pred.buf == NULL)
-			err(1, "LZWDecode");
-	}
-
-	lzwspec *lzw_spec = new_lzw_spec(&b);
-	bind_lzw_spec(lzw_spec);
-
-	ret = lzw_decompress(write_lzw_buffer, read_lzw_buffer);
-	if (ret) {
-		fprintf(stderr, "lzw_decompress: error (%d)\n", ret);
-		assert(!"LZWDecode: failed to decompress\n");
-	}
-	done = depredict(&pred, cur_lzw_spec->lzw_buf, cur_lzw_spec->write_head-1);
-	assert(!done);	// XXX ITERATIVE
-
-	res = h_parse(p, pred.out, pred.nout);
-	free(pred.out);
-
-	bind_lzw_spec(NULL);
-	delete_lzw_spec(lzw_spec);
-
 	return res;
 }
 
@@ -1886,8 +1758,6 @@ decode_stream(const Dict *d, HBytes b, HParser *p)
 		filter = ASCII85Decode;
 	else if (bytes_eq(v->bytes, "RunLengthDecode"))
 		filter = RunLengthDecode;
-	else if (bytes_eq(v->bytes, "LZWDecode"))
-		filter = LZWDecode;
 	else
 		return NULL;		/* filter not supported */
 
@@ -2447,4 +2317,4 @@ main(int argc, char *argv[])
 	h_pprintln(stdout, res->ast);
 
 	return 0;
-}
+}
\ No newline at end of file