commit 354df5d9cd87cba2ef291fa9a60e67ac7fbb1729 from: Letitia Li via: pompolic date: Wed Jun 03 21:21:00 2020 UTC Adding ACL2-compatible printing commit - 11ecff401f57af01ad1d2e62a5fd7e9791c9240b commit + 354df5d9cd87cba2ef291fa9a60e67ac7fbb1729 blob - 8f4fa53f554e3cdf6b23ec64ab81eb273c836ff2 blob + 3e0d8d34283393fd978b0e1807e3dda377e1b14a --- pdf.c +++ pdf.c @@ -158,6 +158,13 @@ dictentry(const Dict *dict, const char *key) return NULL; } + +void ppACL2_xrentry(FILE *stream, const HParsedToken *tok, int indent, int delta) +{ + return; + +} + void pp_xrentry(FILE *stream, const HParsedToken *tok, int indent, int delta) { @@ -189,7 +196,16 @@ pp_ref(FILE *stream, const HParsedToken *tok, int inde fprintf(stream, "[%zu,%zu]", r->nr, r->gen); } + void +ppACL2_ref(FILE *stream, const HParsedToken *tok, int indent, int delta) +{ + Ref *r = H_CAST(Ref, tok); + + fprintf(stream, "(list \"ref\" %zu )", r->nr); +} + +void pp_parseresult(FILE *stream, const HParsedToken *tok, int indent, int delta) { HParseResult *res = H_CAST(HParseResult, tok); @@ -200,6 +216,7 @@ pp_parseresult(FILE *stream, const HParsedToken *tok, h_pprint(stream, res->ast, indent, delta); } + void pp_dict(FILE *stream, const HParsedToken *tok, int indent, int delta) { @@ -224,6 +241,35 @@ pp_dict(FILE *stream, const HParsedToken *tok, int ind fprintf(stream, "\n%*s}", indent, ""); else fprintf(stream, " }"); +} + + +void +ppACL2_dict(FILE *stream, const HParsedToken *tok, int indent, int delta) +{ + const HCountedArray *k_v; + + if (tok->seq->used == 0) { + fprintf(stream, " "); + return; + } + + fprintf(stream, "(list %*s", delta - 1, ""); + for (size_t i = 0; i < tok->seq->used; i++) { + fprintf(stream, "\n(list "); + k_v = tok->seq->elements[i]->seq; + if (i > 0) + fprintf(stream, "%*s %*s", indent, "", delta - 1, ""); + h_pprint(stream, k_v->elements[0], indent + delta, delta); + fprintf(stream, " "); + h_pprint(stream, k_v->elements[1], indent + delta, delta); + fprintf(stream, ")"); + } + + if (tok->seq->used > 2) + fprintf(stream, "\n%*s)", indent, ""); + else + fprintf(stream, " )"); } /* @@ -241,13 +287,13 @@ act_digit(const HParseResult *p, void *u) HParsedToken * act_hlower(const HParseResult *p, void *u) { - return H_MAKE_UINT(10 + H_CAST_UINT(p->ast) - 'a'); + return H_MAKE_UINT(H_CAST_UINT(p->ast) - 'a'); } HParsedToken * act_hupper(const HParseResult *p, void *u) { - return H_MAKE_UINT(10 + H_CAST_UINT(p->ast) - 'A'); + return H_MAKE_UINT(H_CAST_UINT(p->ast) - 'A'); } HParsedToken* @@ -543,8 +589,8 @@ HParser *p_violsev; HParsedToken * act_viol(const HParseResult *p, void *viol) { - uint32_t severity; - uint32_t offset; + uint severity; + uint offset; HParseResult *severity_parse; viol = (uint8_t *) viol; severity_parse = h_parse(p_violsev, viol, strlen((char *)viol)); @@ -555,15 +601,12 @@ act_viol(const HParseResult *p, void *viol) else { severity = severity_parse->ast->seq->elements[0]->uint; } - offset = p->ast->seq->elements[p->ast->seq->used-1]->uint / 8; + offset = p->ast->seq->elements[1]->uint / 8; fprintf(stderr, "VIOLATION[%d]@%d (0x%x): %s\n", severity, offset, offset, (char *) viol); if (strictness && severity > strictness) { exit(1); } /* Just return the parse AST, drop the h_tell */ - if (p->ast->seq->used == 1) { - return (HParsedToken *) NULL; - } return (HParsedToken *) p->ast->seq->elements[0]; } @@ -657,7 +700,7 @@ act_nesc(const HParseResult *p, void *u) return H_MAKE_UINT(H_FIELD_UINT(1)*16 + H_FIELD_UINT(2)); } -#define act_schars h_act_flatten +#define act_str_ h_act_flatten #define act_string act_token HParsedToken * @@ -671,6 +714,9 @@ act_octal(const HParseResult *p, void *u) return H_MAKE_UINT(x); } +#define act_oct3 act_octal +#define act_oct2 act_octal +#define act_oct1 act_octal HParsedToken * act_xrent(const HParseResult *p, void *u) @@ -741,7 +787,7 @@ act_xrstm(const HParseResult *p, void *u) dict = H_INDEX_TOKEN(p->ast, 1, 0); res = H_FIELD(HParseResult, 1, 1); // XXX free this - xrefs = res != NULL ? res->ast : NULL; + xrefs = res->ast; tok = H_MAKE_SEQN(2); tok->seq->elements[0] = (HParsedToken *)xrefs; @@ -758,14 +804,8 @@ act_xrstm(const HParseResult *p, void *u) bool validate_xrstm(HParseResult *p, void *u) { - const HParsedToken *xrefs = H_INDEX_TOKEN(p->ast, 1, 1); const Dict *tdict = H_FIELD(Dict, 1, 0); const HParsedToken *v = dictentry(tdict, "Type"); - - if (!xrefs) - { - return false; - } #if 0 if (v == NULL) @@ -775,7 +815,7 @@ validate_xrstm(HParseResult *p, void *u) else if (bytes_eq(v->bytes, "XRef")) return true; return false; -#endif // XXX this block can be removed +#endif return (v != NULL && v->token_type == TT_BYTES && bytes_eq(v->bytes, "XRef")); @@ -889,10 +929,10 @@ void init_parser(struct Env *aux) { TT_HParseResult = h_allocate_token_new("HParseResult", NULL, pp_parseresult); - TT_XREntry = h_allocate_token_new("XREntry", NULL, pp_xrentry); - TT_Ref = h_allocate_token_new("Ref", NULL, pp_ref); - TT_Dict = h_allocate_token_new("Dict", NULL, pp_dict); - + TT_XREntry = h_allocate_token_new("XREntry", NULL, ppACL2_xrentry); + TT_Ref = h_allocate_token_new("Ref", NULL, ppACL2_ref); + //TT_Dict = h_allocate_token_new("Dict", NULL, pp_dict); +TT_Dict = h_allocate_token_new("", NULL, ppACL2_dict); /* lines */ H_RULE(cr, p_mapch('\r', '\n')); /* semantic value: \n */ H_RULE(lf, h_ch('\n')); /* semantic value: \n */ @@ -909,7 +949,6 @@ init_parser(struct Env *aux) //H_RULE(dchar, IN(DCHARS)); /* delimiter */ H_RULE(rchar, NOT_IN(WCHARS DCHARS)); /* regular */ H_RULE(nchar, NOT_IN(WCHARS DCHARS "#")); /* name */ - H_RULE(schar, NOT_IN("()\n\r\\")); /* string literal */ H_ARULE(digit, h_ch_range('0', '9')); H_ARULE(pdigit, h_ch_range('1', '9')); H_ARULE(hlower, h_ch_range('a', 'f')); @@ -964,10 +1003,16 @@ init_parser(struct Env *aux) /* numbers */ H_ARULE(sign, CHX(minus, IGN(plus))); H_VRULE(intnn, nat); + #if 1 H_ARULE(realnn, CHX(SEQ(digits, period, digits), /* 12.3 */ SEQ(digits, period, empty), /* 123. */ SEQ(empty, period, digits))); /* .123 */ // XXX ^ we _could_ move the "123." case into intnn... + #else + // XXX the .123 case above somehow leads to a conflict with litstr... + H_ARULE(realnn, CHX(SEQ(digits, period, digits), /* 12.3 */ + SEQ(digits, period, empty))); /* 123. */ + #endif H_RULE(numbnn, CHX(realnn, intnn)); H_RULE(snumb, SEQ(sign, numbnn)); H_ARULE(numb, CHX(snumb, numbnn)); @@ -977,24 +1022,59 @@ init_parser(struct Env *aux) H_ARULE(nstr, h_many(CHX(nchar, nesc))); /* '/' is valid */ H_RULE(name, h_right(slash, nstr)); - /* strings */ - H_RULE(snest, h_indirect()); + /* strings + * + * this is so convoluted in order to make it LALR including the + * precedence rules for octal escapes ("\123" vs "\12 3" vs "\1 23") + * and end-of-line ("CRLF" vs "CR LF"). + * + * we have to split the base rule 'str' into variants 'str_o' and + * 'str_l' depending on whether they may start with an octal digit or + * linefeed, respectively. + */ + H_RULE(str_ol, h_indirect()); + H_RULE(str_o, h_indirect()); + H_RULE(str_l, h_indirect()); + H_RULE(str, h_indirect()); H_RULE(bsn, p_mapch('n', 0x0a)); /* LF */ H_RULE(bsr, p_mapch('r', 0x0d)); /* CR */ H_RULE(bst, p_mapch('t', 0x09)); /* HT */ H_RULE(bsb, p_mapch('b', 0x08)); /* BS (backspace) */ H_RULE(bsf, p_mapch('f', 0x0c)); /* FF */ H_RULE(escape, CHX(bsn, bsr, bst, bsb, bsf, lparen, rparen, bslash)); - H_ARULE(octal, CHX(REP(odigit,3), REP(odigit,2), REP(odigit,1))); - H_RULE(wrap, IGN(eol)); - H_RULE(sesc, h_right(bslash, CHX(escape, octal, wrap, epsilon))); - /* NB: lone backslashes and escaped newlines are ignored */ - H_ARULE(schars, h_many(CHX(schar, snest, sesc, eol))); - H_RULE(snest_, SEQ(lparen, schars, rparen)); - H_RULE(litstr, h_middle(lparen, schars, rparen)); + H_ARULE(oct3, REP(odigit,3)); + H_ARULE(oct2, REP(odigit,2)); + H_ARULE(oct1, REP(odigit,1)); + H_RULE(octesc, CHX(SEQ(oct3, str), + SEQ(oct2, str_o), + SEQ(oct1, str_o))); + H_RULE(eolesc, CHX(SEQ(IGN(crlf), str), + SEQ(IGN(cr), str_l), + SEQ(IGN(lf), str))); + H_RULE(schar_o, NOT_IN("()\n\r\\" "01234567")); + H_RULE(schar_e, NOT_IN("()\n\r\\" "01234567" "nrtbf")); + H_RULE(str_o_, CHX(SEQ(lf, str), str_ol)); /* str "but not" odigit */ + H_RULE(str_l_, CHX(SEQ(odigit, str), str_ol)); /* str "but not" lf */ + H_RULE(str_ol_, CHX(SEQ(cr, str_l), /* str "but neither" */ + SEQ(crlf, str), + SEQ(schar_o, str), + SEQ(lparen, str, rparen, str), + SEQ(IGN(bslash), escape, str), + SEQ(IGN(bslash), schar_e, str), /* "lone" bs */ + /* NB: ^ lone backslashes are to be ignored per spec, but we + * let them "escape" with the following character. this works + * because they are never truly alone. */ + SEQ(IGN(bslash), octesc), + SEQ(IGN(bslash), eolesc), /* line split */ + epsilon)); + H_ARULE(str_, CHX(SEQ(lf, str), SEQ(odigit, str), str_ol)); + H_RULE(litstr, h_middle(lparen, str, rparen)); H_RULE(hexstr, h_middle(langle, MANY_WS(hdigit), rangle)); H_ARULE(string, CHX(litstr, hexstr)); - h_bind_indirect(snest, snest_); + h_bind_indirect(str_ol, str_ol_); + h_bind_indirect(str_o, str_o_); + h_bind_indirect(str_l, str_l_); + h_bind_indirect(str, str_); H_RULE(array, h_indirect()); H_RULE(dict, h_indirect()); @@ -1052,7 +1132,6 @@ init_parser(struct Env *aux) H_RULE(stream, h_left(h_bind(stmbeg, kstream, aux), stmend)); // XXX is whitespace allowed between the eol and "endstream"? - // peter wyatt says no. (2020-03-25) /* * file structure @@ -1079,11 +1158,7 @@ init_parser(struct Env *aux) H_RULE(xrtyp, CHX(h_ch('n'), h_ch('f'))); H_ARULE(xroff, REP(digit, 10)); H_ARULE(xrgen, REP(digit, 5)); - H_ARULE(xrent, SEQ(xroff, IGN(CHX(VIOL(SEQ(lwchar, h_many1(lwchar)), "Multi-WS in xref offset_gen entry (severity=1)"), sp)), - xrgen, IGN(CHX(VIOL(SEQ(lwchar, h_many1(lwchar)), "Multi-WS in xref gen_use entry (severity=1)"), sp)), - xrtyp, IGN(CHX(VIOL(SEQ(wchar, wchar, h_many1(wchar)), "Greater-than-2-byte WS at end of xref entry (severity=1)"), - xreol, - VIOL(SEQ(h_many1(wchar)), "Nonconformant WS at end of xref entry (severity=1)"))))); + H_ARULE(xrent, SEQ(xroff, IGN(sp), xrgen, IGN(sp), xrtyp, IGN(xreol))); H_RULE(xrhead, SEQ(nat, IGN(sp), nat, nl)); H_RULE(xrsub, SEQ(xrhead, h_many(xrent))); H_ARULE(xrefs, SEQ(KW("xref"), nl, h_many(xrsub))); @@ -1118,9 +1193,8 @@ init_parser(struct Env *aux) H_RULE(xr_td, SEQ(xrefs, KW("trailer"), ws, dict)); - H_RULE(hdr_junk, CHX(comment, - VIOL(h_many1(h_butnot(h_ch_range(0, 255), SEQ(npair, wel, KW("obj")))), - "Uncommented junk after header (severity=1)"))); + H_RULE(hdr_junk, VIOL(h_many1(h_butnot(h_ch_range(0, 255), objdef)), + "Uncommented junk after header (severity=1)")); H_RULE(tail, SEQ(body, CHX(SEQ(h_optional(xr_td), startxr), VIOL(SEQ(xr_td, OPT(SEQ(nl, KW("startxref"), nl, lws, nat, nl)), OPT(nl), OPT(LIT("%%EOF")), OPT(nl)), @@ -1587,208 +1661,6 @@ FlateDecode(const Dict *parms, HBytes b, HParser *p) if (done == -1) return NULL; - return res; -} - -/* LZW helpers */ - -typedef struct -{ - uint8_t *lzw_buf; - size_t total_buf_size; - size_t write_head; - size_t write_tail; - uint8_t write_checksum; - size_t eof_loc; - - HBytes *input_stream; - size_t read_head; - size_t read_tail; - uint8_t read_checksum; -} lzwspec; - -lzwspec *cur_lzw_spec; - -/* used by write_lzw_buffer to get more space for decoding if needed */ -void -grow_lzw_buffer(size_t amount) -{ - uint8_t *ret_buf = realloc(cur_lzw_spec->lzw_buf, (cur_lzw_spec->total_buf_size+amount) * sizeof(uint8_t)); - if(ret_buf != NULL) - { - cur_lzw_spec->total_buf_size += amount; - cur_lzw_spec->lzw_buf = ret_buf; - } - else - { - fprintf(stderr, "LZWDecode: h_arena_realloc() failed"); - return; - } -} - -lzwspec * -new_lzw_spec(HBytes *bytes) -{ - size_t const BUFSIZE = sizeof(uint8_t) * 1024; - lzwspec *ret = malloc(sizeof(lzwspec)); - ret->input_stream = bytes; - ret->lzw_buf = malloc(BUFSIZE); - ret->total_buf_size = BUFSIZE; - return ret; -} - -void -delete_lzw_spec(lzwspec *spec) -{ - free(spec->lzw_buf); - free(spec); -} - -void -bind_lzw_spec(lzwspec *spec) -{ - cur_lzw_spec = spec; -} - - -#include "lzw-lib.h" - -/* Buffer writer function for the lzw-ab implementation, with a fixed signature. - * Although the type is defined as int, it is expected to write one byte at a time. - * Modifies cur_lzw_spec. Set up the lzw spec to use with bind_lzw_spec() */ - -void -write_lzw_buffer(int value) -{ - size_t const BUFSIZE = sizeof(uint8_t) * 1024; - - if(!cur_lzw_spec->lzw_buf) - { - fprintf(stderr, "LZWDecode: lzw_buf is null!"); - assert(cur_lzw_spec->lzw_buf != NULL); - } - - assert(cur_lzw_spec->write_head <= cur_lzw_spec->total_buf_size); - - if (value == EOF) { - cur_lzw_spec->lzw_buf[cur_lzw_spec->write_head] = (uint8_t) value; - cur_lzw_spec->eof_loc = cur_lzw_spec->write_head; - cur_lzw_spec->write_head++; - return; - } - - /* We can get away with this cast due to writing single bytes. */ - cur_lzw_spec->lzw_buf[cur_lzw_spec->write_head++] = (uint8_t) value; - - /* If you looked at lzw-ab's code, the write head is reset here - * This function uses write_head as the offset of the last written item */ - if (cur_lzw_spec->write_head >= cur_lzw_spec->total_buf_size) - { - grow_lzw_buffer(BUFSIZE); - } - - cur_lzw_spec->write_checksum = cur_lzw_spec->write_checksum * 3 + (uint8_t) value; -} - - -/* Fixed signature function for reading bytes. Modifies cur_lzw_spec. Set cur_lzw_spec - * with bind_lzw_spec() */ -int read_lzw_buffer(void) -{ - uint8_t byte_read; - int ret_value; - - /* Input data is already waiting in the buffer */ - if (cur_lzw_spec->read_head == cur_lzw_spec->read_tail) - cur_lzw_spec->read_tail = cur_lzw_spec->input_stream->len; - - if (cur_lzw_spec->read_head < cur_lzw_spec->read_tail) - { - byte_read = cur_lzw_spec->input_stream->token[cur_lzw_spec->read_head++]; - cur_lzw_spec->read_checksum = cur_lzw_spec->read_checksum * 3 + byte_read; - ret_value = byte_read; - } - else - ret_value = EOF; - - return ret_value; -} - - -HParseResult * -LZWDecode(const Dict *parms, HBytes b, HParser *p) -{ - struct predictor pred = {1, 1, 8, 1}; - int (*depredict)(struct predictor *, uint8_t *, size_t); - HParseResult *res; - int done; - int ret; - const HParsedToken *v; - - /* set up the predictor (if any) */ - #define SETPARM(VAR,STR) do { \ - v = dictentry(parms, (STR)); \ - if (v != NULL) { \ - if (v->token_type != TT_SINT || v->sint < 0) \ - return NULL; \ - VAR = v->sint; \ - } } while(0) - SETPARM(pred.num, "Predictor"); - SETPARM(pred.colors, "Colors"); - SETPARM(pred.bpc, "BitsPerComponent"); - SETPARM(pred.columns, "Columns"); - #undef SETPARM - if (pred.num == 1) - depredict = depred_none; - else { - if (pred.num >= 10 && pred.num <= 15) - depredict = depred_png; - else if (pred.num == 2) { - /* for 8-bpc TIFF pred. 2, we can reuse PNG Sub */ - if (pred.bpc == 8) { - pred.predfun = pp_sub; /* predict left */ - depredict = depred_png; - } else { - // XXX add general TIFF predictor (bpc != 8) - fprintf(stderr, "LZWDecode: /Predictor %d " - "not supported for /BitsPerComponent %d\n", - pred.num, pred.bpc); - return NULL; - } - } else { - fprintf(stderr, "LZWDecode: /Predictor %d" - " not supported\n", pred.num); - return NULL; - } - - /* allocate row buffer */ - if (pred.columns > (INT_MAX - 7) / pred.colors / pred.bpc) { - fprintf(stderr, "LZWDecode: overflow\n"); - return NULL; - } - pred.rowsz = (pred.colors * pred.bpc * pred.columns + 7) / 8; - pred.buf = calloc(1, pred.rowsz); - if (pred.buf == NULL) - err(1, "LZWDecode"); - } - - lzwspec *lzw_spec = new_lzw_spec(&b); - bind_lzw_spec(lzw_spec); - - ret = lzw_decompress(write_lzw_buffer, read_lzw_buffer); - if (ret) { - fprintf(stderr, "lzw_decompress: error (%d)\n", ret); - assert(!"LZWDecode: failed to decompress\n"); - } - done = depredict(&pred, cur_lzw_spec->lzw_buf, cur_lzw_spec->write_head-1); - assert(!done); // XXX ITERATIVE - - res = h_parse(p, pred.out, pred.nout); - free(pred.out); - - bind_lzw_spec(NULL); - delete_lzw_spec(lzw_spec); - return res; } @@ -1886,8 +1758,6 @@ decode_stream(const Dict *d, HBytes b, HParser *p) filter = ASCII85Decode; else if (bytes_eq(v->bytes, "RunLengthDecode")) filter = RunLengthDecode; - else if (bytes_eq(v->bytes, "LZWDecode")) - filter = LZWDecode; else return NULL; /* filter not supported */ @@ -2447,4 +2317,4 @@ main(int argc, char *argv[]) h_pprintln(stdout, res->ast); return 0; -} +} \ No newline at end of file