commit - 11ecff401f57af01ad1d2e62a5fd7e9791c9240b
commit + 354df5d9cd87cba2ef291fa9a60e67ac7fbb1729
blob - 8f4fa53f554e3cdf6b23ec64ab81eb273c836ff2
blob + 3e0d8d34283393fd978b0e1807e3dda377e1b14a
--- pdf.c
+++ pdf.c
return NULL;
}
+
+void ppACL2_xrentry(FILE *stream, const HParsedToken *tok, int indent, int delta)
+{
+ return;
+
+}
+
void
pp_xrentry(FILE *stream, const HParsedToken *tok, int indent, int delta)
{
fprintf(stream, "[%zu,%zu]", r->nr, r->gen);
}
+
void
+ppACL2_ref(FILE *stream, const HParsedToken *tok, int indent, int delta)
+{
+ Ref *r = H_CAST(Ref, tok);
+
+ fprintf(stream, "(list \"ref\" %zu )", r->nr);
+}
+
+void
pp_parseresult(FILE *stream, const HParsedToken *tok, int indent, int delta)
{
HParseResult *res = H_CAST(HParseResult, tok);
h_pprint(stream, res->ast, indent, delta);
}
+
void
pp_dict(FILE *stream, const HParsedToken *tok, int indent, int delta)
{
fprintf(stream, "\n%*s}", indent, "");
else
fprintf(stream, " }");
+}
+
+
+void
+ppACL2_dict(FILE *stream, const HParsedToken *tok, int indent, int delta)
+{
+ const HCountedArray *k_v;
+
+ if (tok->seq->used == 0) {
+ fprintf(stream, " ");
+ return;
+ }
+
+ fprintf(stream, "(list %*s", delta - 1, "");
+ for (size_t i = 0; i < tok->seq->used; i++) {
+ fprintf(stream, "\n(list ");
+ k_v = tok->seq->elements[i]->seq;
+ if (i > 0)
+ fprintf(stream, "%*s %*s", indent, "", delta - 1, "");
+ h_pprint(stream, k_v->elements[0], indent + delta, delta);
+ fprintf(stream, " ");
+ h_pprint(stream, k_v->elements[1], indent + delta, delta);
+ fprintf(stream, ")");
+ }
+
+ if (tok->seq->used > 2)
+ fprintf(stream, "\n%*s)", indent, "");
+ else
+ fprintf(stream, " )");
}
/*
HParsedToken *
act_hlower(const HParseResult *p, void *u)
{
- return H_MAKE_UINT(10 + H_CAST_UINT(p->ast) - 'a');
+ return H_MAKE_UINT(H_CAST_UINT(p->ast) - 'a');
}
HParsedToken *
act_hupper(const HParseResult *p, void *u)
{
- return H_MAKE_UINT(10 + H_CAST_UINT(p->ast) - 'A');
+ return H_MAKE_UINT(H_CAST_UINT(p->ast) - 'A');
}
HParsedToken*
HParsedToken *
act_viol(const HParseResult *p, void *viol)
{
- uint32_t severity;
- uint32_t offset;
+ uint severity;
+ uint offset;
HParseResult *severity_parse;
viol = (uint8_t *) viol;
severity_parse = h_parse(p_violsev, viol, strlen((char *)viol));
else {
severity = severity_parse->ast->seq->elements[0]->uint;
}
- offset = p->ast->seq->elements[p->ast->seq->used-1]->uint / 8;
+ offset = p->ast->seq->elements[1]->uint / 8;
fprintf(stderr, "VIOLATION[%d]@%d (0x%x): %s\n", severity, offset, offset, (char *) viol);
if (strictness && severity > strictness) {
exit(1);
}
/* Just return the parse AST, drop the h_tell */
- if (p->ast->seq->used == 1) {
- return (HParsedToken *) NULL;
- }
return (HParsedToken *) p->ast->seq->elements[0];
}
return H_MAKE_UINT(H_FIELD_UINT(1)*16 + H_FIELD_UINT(2));
}
-#define act_schars h_act_flatten
+#define act_str_ h_act_flatten
#define act_string act_token
HParsedToken *
return H_MAKE_UINT(x);
}
+#define act_oct3 act_octal
+#define act_oct2 act_octal
+#define act_oct1 act_octal
HParsedToken *
act_xrent(const HParseResult *p, void *u)
dict = H_INDEX_TOKEN(p->ast, 1, 0);
res = H_FIELD(HParseResult, 1, 1); // XXX free this
- xrefs = res != NULL ? res->ast : NULL;
+ xrefs = res->ast;
tok = H_MAKE_SEQN(2);
tok->seq->elements[0] = (HParsedToken *)xrefs;
bool
validate_xrstm(HParseResult *p, void *u)
{
- const HParsedToken *xrefs = H_INDEX_TOKEN(p->ast, 1, 1);
const Dict *tdict = H_FIELD(Dict, 1, 0);
const HParsedToken *v = dictentry(tdict, "Type");
-
- if (!xrefs)
- {
- return false;
- }
#if 0
if (v == NULL)
else if (bytes_eq(v->bytes, "XRef"))
return true;
return false;
-#endif // XXX this block can be removed
+#endif
return (v != NULL && v->token_type == TT_BYTES &&
bytes_eq(v->bytes, "XRef"));
init_parser(struct Env *aux)
{
TT_HParseResult = h_allocate_token_new("HParseResult", NULL, pp_parseresult);
- TT_XREntry = h_allocate_token_new("XREntry", NULL, pp_xrentry);
- TT_Ref = h_allocate_token_new("Ref", NULL, pp_ref);
- TT_Dict = h_allocate_token_new("Dict", NULL, pp_dict);
-
+ TT_XREntry = h_allocate_token_new("XREntry", NULL, ppACL2_xrentry);
+ TT_Ref = h_allocate_token_new("Ref", NULL, ppACL2_ref);
+ //TT_Dict = h_allocate_token_new("Dict", NULL, pp_dict);
+TT_Dict = h_allocate_token_new("", NULL, ppACL2_dict);
/* lines */
H_RULE(cr, p_mapch('\r', '\n')); /* semantic value: \n */
H_RULE(lf, h_ch('\n')); /* semantic value: \n */
//H_RULE(dchar, IN(DCHARS)); /* delimiter */
H_RULE(rchar, NOT_IN(WCHARS DCHARS)); /* regular */
H_RULE(nchar, NOT_IN(WCHARS DCHARS "#")); /* name */
- H_RULE(schar, NOT_IN("()\n\r\\")); /* string literal */
H_ARULE(digit, h_ch_range('0', '9'));
H_ARULE(pdigit, h_ch_range('1', '9'));
H_ARULE(hlower, h_ch_range('a', 'f'));
/* numbers */
H_ARULE(sign, CHX(minus, IGN(plus)));
H_VRULE(intnn, nat);
+ #if 1
H_ARULE(realnn, CHX(SEQ(digits, period, digits), /* 12.3 */
SEQ(digits, period, empty), /* 123. */
SEQ(empty, period, digits))); /* .123 */
// XXX ^ we _could_ move the "123." case into intnn...
+ #else
+ // XXX the .123 case above somehow leads to a conflict with litstr...
+ H_ARULE(realnn, CHX(SEQ(digits, period, digits), /* 12.3 */
+ SEQ(digits, period, empty))); /* 123. */
+ #endif
H_RULE(numbnn, CHX(realnn, intnn));
H_RULE(snumb, SEQ(sign, numbnn));
H_ARULE(numb, CHX(snumb, numbnn));
H_ARULE(nstr, h_many(CHX(nchar, nesc))); /* '/' is valid */
H_RULE(name, h_right(slash, nstr));
- /* strings */
- H_RULE(snest, h_indirect());
+ /* strings
+ *
+ * this is so convoluted in order to make it LALR including the
+ * precedence rules for octal escapes ("\123" vs "\12 3" vs "\1 23")
+ * and end-of-line ("CRLF" vs "CR LF").
+ *
+ * we have to split the base rule 'str' into variants 'str_o' and
+ * 'str_l' depending on whether they may start with an octal digit or
+ * linefeed, respectively.
+ */
+ H_RULE(str_ol, h_indirect());
+ H_RULE(str_o, h_indirect());
+ H_RULE(str_l, h_indirect());
+ H_RULE(str, h_indirect());
H_RULE(bsn, p_mapch('n', 0x0a)); /* LF */
H_RULE(bsr, p_mapch('r', 0x0d)); /* CR */
H_RULE(bst, p_mapch('t', 0x09)); /* HT */
H_RULE(bsb, p_mapch('b', 0x08)); /* BS (backspace) */
H_RULE(bsf, p_mapch('f', 0x0c)); /* FF */
H_RULE(escape, CHX(bsn, bsr, bst, bsb, bsf, lparen, rparen, bslash));
- H_ARULE(octal, CHX(REP(odigit,3), REP(odigit,2), REP(odigit,1)));
- H_RULE(wrap, IGN(eol));
- H_RULE(sesc, h_right(bslash, CHX(escape, octal, wrap, epsilon)));
- /* NB: lone backslashes and escaped newlines are ignored */
- H_ARULE(schars, h_many(CHX(schar, snest, sesc, eol)));
- H_RULE(snest_, SEQ(lparen, schars, rparen));
- H_RULE(litstr, h_middle(lparen, schars, rparen));
+ H_ARULE(oct3, REP(odigit,3));
+ H_ARULE(oct2, REP(odigit,2));
+ H_ARULE(oct1, REP(odigit,1));
+ H_RULE(octesc, CHX(SEQ(oct3, str),
+ SEQ(oct2, str_o),
+ SEQ(oct1, str_o)));
+ H_RULE(eolesc, CHX(SEQ(IGN(crlf), str),
+ SEQ(IGN(cr), str_l),
+ SEQ(IGN(lf), str)));
+ H_RULE(schar_o, NOT_IN("()\n\r\\" "01234567"));
+ H_RULE(schar_e, NOT_IN("()\n\r\\" "01234567" "nrtbf"));
+ H_RULE(str_o_, CHX(SEQ(lf, str), str_ol)); /* str "but not" odigit */
+ H_RULE(str_l_, CHX(SEQ(odigit, str), str_ol)); /* str "but not" lf */
+ H_RULE(str_ol_, CHX(SEQ(cr, str_l), /* str "but neither" */
+ SEQ(crlf, str),
+ SEQ(schar_o, str),
+ SEQ(lparen, str, rparen, str),
+ SEQ(IGN(bslash), escape, str),
+ SEQ(IGN(bslash), schar_e, str), /* "lone" bs */
+ /* NB: ^ lone backslashes are to be ignored per spec, but we
+ * let them "escape" with the following character. this works
+ * because they are never truly alone. */
+ SEQ(IGN(bslash), octesc),
+ SEQ(IGN(bslash), eolesc), /* line split */
+ epsilon));
+ H_ARULE(str_, CHX(SEQ(lf, str), SEQ(odigit, str), str_ol));
+ H_RULE(litstr, h_middle(lparen, str, rparen));
H_RULE(hexstr, h_middle(langle, MANY_WS(hdigit), rangle));
H_ARULE(string, CHX(litstr, hexstr));
- h_bind_indirect(snest, snest_);
+ h_bind_indirect(str_ol, str_ol_);
+ h_bind_indirect(str_o, str_o_);
+ h_bind_indirect(str_l, str_l_);
+ h_bind_indirect(str, str_);
H_RULE(array, h_indirect());
H_RULE(dict, h_indirect());
H_RULE(stream, h_left(h_bind(stmbeg, kstream, aux), stmend));
// XXX is whitespace allowed between the eol and "endstream"?
- // peter wyatt says no. (2020-03-25)
/*
* file structure
H_RULE(xrtyp, CHX(h_ch('n'), h_ch('f')));
H_ARULE(xroff, REP(digit, 10));
H_ARULE(xrgen, REP(digit, 5));
- H_ARULE(xrent, SEQ(xroff, IGN(CHX(VIOL(SEQ(lwchar, h_many1(lwchar)), "Multi-WS in xref offset_gen entry (severity=1)"), sp)),
- xrgen, IGN(CHX(VIOL(SEQ(lwchar, h_many1(lwchar)), "Multi-WS in xref gen_use entry (severity=1)"), sp)),
- xrtyp, IGN(CHX(VIOL(SEQ(wchar, wchar, h_many1(wchar)), "Greater-than-2-byte WS at end of xref entry (severity=1)"),
- xreol,
- VIOL(SEQ(h_many1(wchar)), "Nonconformant WS at end of xref entry (severity=1)")))));
+ H_ARULE(xrent, SEQ(xroff, IGN(sp), xrgen, IGN(sp), xrtyp, IGN(xreol)));
H_RULE(xrhead, SEQ(nat, IGN(sp), nat, nl));
H_RULE(xrsub, SEQ(xrhead, h_many(xrent)));
H_ARULE(xrefs, SEQ(KW("xref"), nl, h_many(xrsub)));
H_RULE(xr_td, SEQ(xrefs, KW("trailer"), ws, dict));
- H_RULE(hdr_junk, CHX(comment,
- VIOL(h_many1(h_butnot(h_ch_range(0, 255), SEQ(npair, wel, KW("obj")))),
- "Uncommented junk after header (severity=1)")));
+ H_RULE(hdr_junk, VIOL(h_many1(h_butnot(h_ch_range(0, 255), objdef)),
+ "Uncommented junk after header (severity=1)"));
H_RULE(tail, SEQ(body, CHX(SEQ(h_optional(xr_td), startxr),
VIOL(SEQ(xr_td, OPT(SEQ(nl, KW("startxref"), nl, lws, nat, nl)),
OPT(nl), OPT(LIT("%%EOF")), OPT(nl)),
if (done == -1)
return NULL;
- return res;
-}
-
-/* LZW helpers */
-
-typedef struct
-{
- uint8_t *lzw_buf;
- size_t total_buf_size;
- size_t write_head;
- size_t write_tail;
- uint8_t write_checksum;
- size_t eof_loc;
-
- HBytes *input_stream;
- size_t read_head;
- size_t read_tail;
- uint8_t read_checksum;
-} lzwspec;
-
-lzwspec *cur_lzw_spec;
-
-/* used by write_lzw_buffer to get more space for decoding if needed */
-void
-grow_lzw_buffer(size_t amount)
-{
- uint8_t *ret_buf = realloc(cur_lzw_spec->lzw_buf, (cur_lzw_spec->total_buf_size+amount) * sizeof(uint8_t));
- if(ret_buf != NULL)
- {
- cur_lzw_spec->total_buf_size += amount;
- cur_lzw_spec->lzw_buf = ret_buf;
- }
- else
- {
- fprintf(stderr, "LZWDecode: h_arena_realloc() failed");
- return;
- }
-}
-
-lzwspec *
-new_lzw_spec(HBytes *bytes)
-{
- size_t const BUFSIZE = sizeof(uint8_t) * 1024;
- lzwspec *ret = malloc(sizeof(lzwspec));
- ret->input_stream = bytes;
- ret->lzw_buf = malloc(BUFSIZE);
- ret->total_buf_size = BUFSIZE;
- return ret;
-}
-
-void
-delete_lzw_spec(lzwspec *spec)
-{
- free(spec->lzw_buf);
- free(spec);
-}
-
-void
-bind_lzw_spec(lzwspec *spec)
-{
- cur_lzw_spec = spec;
-}
-
-
-#include "lzw-lib.h"
-
-/* Buffer writer function for the lzw-ab implementation, with a fixed signature.
- * Although the type is defined as int, it is expected to write one byte at a time.
- * Modifies cur_lzw_spec. Set up the lzw spec to use with bind_lzw_spec() */
-
-void
-write_lzw_buffer(int value)
-{
- size_t const BUFSIZE = sizeof(uint8_t) * 1024;
-
- if(!cur_lzw_spec->lzw_buf)
- {
- fprintf(stderr, "LZWDecode: lzw_buf is null!");
- assert(cur_lzw_spec->lzw_buf != NULL);
- }
-
- assert(cur_lzw_spec->write_head <= cur_lzw_spec->total_buf_size);
-
- if (value == EOF) {
- cur_lzw_spec->lzw_buf[cur_lzw_spec->write_head] = (uint8_t) value;
- cur_lzw_spec->eof_loc = cur_lzw_spec->write_head;
- cur_lzw_spec->write_head++;
- return;
- }
-
- /* We can get away with this cast due to writing single bytes. */
- cur_lzw_spec->lzw_buf[cur_lzw_spec->write_head++] = (uint8_t) value;
-
- /* If you looked at lzw-ab's code, the write head is reset here
- * This function uses write_head as the offset of the last written item */
- if (cur_lzw_spec->write_head >= cur_lzw_spec->total_buf_size)
- {
- grow_lzw_buffer(BUFSIZE);
- }
-
- cur_lzw_spec->write_checksum = cur_lzw_spec->write_checksum * 3 + (uint8_t) value;
-}
-
-
-/* Fixed signature function for reading bytes. Modifies cur_lzw_spec. Set cur_lzw_spec
- * with bind_lzw_spec() */
-int read_lzw_buffer(void)
-{
- uint8_t byte_read;
- int ret_value;
-
- /* Input data is already waiting in the buffer */
- if (cur_lzw_spec->read_head == cur_lzw_spec->read_tail)
- cur_lzw_spec->read_tail = cur_lzw_spec->input_stream->len;
-
- if (cur_lzw_spec->read_head < cur_lzw_spec->read_tail)
- {
- byte_read = cur_lzw_spec->input_stream->token[cur_lzw_spec->read_head++];
- cur_lzw_spec->read_checksum = cur_lzw_spec->read_checksum * 3 + byte_read;
- ret_value = byte_read;
- }
- else
- ret_value = EOF;
-
- return ret_value;
-}
-
-
-HParseResult *
-LZWDecode(const Dict *parms, HBytes b, HParser *p)
-{
- struct predictor pred = {1, 1, 8, 1};
- int (*depredict)(struct predictor *, uint8_t *, size_t);
- HParseResult *res;
- int done;
- int ret;
- const HParsedToken *v;
-
- /* set up the predictor (if any) */
- #define SETPARM(VAR,STR) do { \
- v = dictentry(parms, (STR)); \
- if (v != NULL) { \
- if (v->token_type != TT_SINT || v->sint < 0) \
- return NULL; \
- VAR = v->sint; \
- } } while(0)
- SETPARM(pred.num, "Predictor");
- SETPARM(pred.colors, "Colors");
- SETPARM(pred.bpc, "BitsPerComponent");
- SETPARM(pred.columns, "Columns");
- #undef SETPARM
- if (pred.num == 1)
- depredict = depred_none;
- else {
- if (pred.num >= 10 && pred.num <= 15)
- depredict = depred_png;
- else if (pred.num == 2) {
- /* for 8-bpc TIFF pred. 2, we can reuse PNG Sub */
- if (pred.bpc == 8) {
- pred.predfun = pp_sub; /* predict left */
- depredict = depred_png;
- } else {
- // XXX add general TIFF predictor (bpc != 8)
- fprintf(stderr, "LZWDecode: /Predictor %d "
- "not supported for /BitsPerComponent %d\n",
- pred.num, pred.bpc);
- return NULL;
- }
- } else {
- fprintf(stderr, "LZWDecode: /Predictor %d"
- " not supported\n", pred.num);
- return NULL;
- }
-
- /* allocate row buffer */
- if (pred.columns > (INT_MAX - 7) / pred.colors / pred.bpc) {
- fprintf(stderr, "LZWDecode: overflow\n");
- return NULL;
- }
- pred.rowsz = (pred.colors * pred.bpc * pred.columns + 7) / 8;
- pred.buf = calloc(1, pred.rowsz);
- if (pred.buf == NULL)
- err(1, "LZWDecode");
- }
-
- lzwspec *lzw_spec = new_lzw_spec(&b);
- bind_lzw_spec(lzw_spec);
-
- ret = lzw_decompress(write_lzw_buffer, read_lzw_buffer);
- if (ret) {
- fprintf(stderr, "lzw_decompress: error (%d)\n", ret);
- assert(!"LZWDecode: failed to decompress\n");
- }
- done = depredict(&pred, cur_lzw_spec->lzw_buf, cur_lzw_spec->write_head-1);
- assert(!done); // XXX ITERATIVE
-
- res = h_parse(p, pred.out, pred.nout);
- free(pred.out);
-
- bind_lzw_spec(NULL);
- delete_lzw_spec(lzw_spec);
-
return res;
}
filter = ASCII85Decode;
else if (bytes_eq(v->bytes, "RunLengthDecode"))
filter = RunLengthDecode;
- else if (bytes_eq(v->bytes, "LZWDecode"))
- filter = LZWDecode;
else
return NULL; /* filter not supported */
h_pprintln(stdout, res->ast);
return 0;
-}
+}
\ No newline at end of file