commit - 57347e732fd186b907726dd9a27fc2011390e145
commit + db58f4ce1094a2cbcba547a18458fb2431492595
blob - 42adbaf179426ca995afc290db450170511ad12b
blob + 1083ef7f27b09a3bb450b1551f8ea3a856562fa9
--- pdf.c
+++ pdf.c
HParser *p_textbegin;
HParser *p_textstream;
HParser *p_trailer;
-HParser *p_cstream;
HParser *p_dict;
HParser *kstream(HAllocator *, const HParsedToken *, void *);
HParser *kxstream(HAllocator *, const HParsedToken *, void *);
HParser *ktxtstream(HAllocator *, const HParsedToken *, void *);
-HParser *kcontentstream(HAllocator *, const HParsedToken *, void *);
void
init_runlengthdecode_parser(struct Env *aux)
/* text streams */
H_RULE(txtstream, h_bind(txtobjs, ktxtstream, aux));
-
- // Page Tree
- H_RULE(contentstream, h_left(h_bind(stmbeg, kcontentstream, aux), stmend));
- H_RULE(cstream, SEQ(ws, npair, wel, KW("obj"), ws, contentstream,
- OPT(ws), OPT(lws), KW("endobj")));
/* global parser variables */
/* text parser variables */
p_textbegin = txtbegin;
p_textstream = txtstream;
- p_cstream = CHX(cstream, objdef);
p_dict = dict;
p_fail = h_nothing_p();
* Start Catalog parsing
* ********************************************************************
*/
-/*
- * decode the bytes in 'b' according to metadata in the stream dictionary 'd'
- * and parse the result with 'p'.
- */
-HParseResult *
-decode_contentstream(const Dict *d, HBytes b, HParser *p)
-{
- HParseResult *(*filter)(const Dict *, HBytes, HParser *);
- const Dict *parms = NULL;
- const HParsedToken *v;
- HParseResult *res = NULL;
-
-
- /*
- * Check if there is additional information in the dictionary
- * that we should use to process the content stream
- *
- * If the data in the stream is encoded, a filter will be specified in
- * the dictionary that must be used to decode the data first
- *
- * TODO:: Handle arrays of filters (chained) and their decode parameters
- */
- v = dictentry(d, "Filter"); // look for a filter
-
- if (v != NULL) { // data is encoded
-
-
- if (v->token_type != TT_BYTES) {
- // XXX TT_SEQUENCE would be a filter chain; that’s not supported, yet.
- // But it might also be something bogus, in which case we should fail.
- return NULL;
- }
-
- if (bytes_eq(v->bytes, "FlateDecode"))
- filter = FlateDecode;
- else if (bytes_eq(v->bytes, "ASCIIHexDecode"))
- filter = ASCIIHexDecode;
- else if (bytes_eq(v->bytes, "ASCII85Decode"))
- filter = ASCII85Decode;
- else if (bytes_eq(v->bytes, "RunLengthDecode"))
- filter = RunLengthDecode;
- else if (bytes_eq(v->bytes, "LZWDecode"))
- filter = LZWDecode;
- else { /* filter not supported */
- log_message(SEV_DONTCARE, "decode_stream:: Unsupported Filter [%.*s]\n",
- (int)v->bytes.len, v->bytes.token);
- return NULL; /* Treat the stream as a byte array */
- }
- /* Check for parameters for the filter */
- v = dictentry(d, "DecodeParms");
- if (v && v->token_type == TT_Dict)
- parms = v->user;
-
- res = filter(parms, b, p);
- } /* The dictionary provided direction for processing the stream */
-
- /*
- * It is possible that we should always process the stream as a content stream
- * But not yet sure that covers all case.
- */
- else { // content stream is not encoded
- res = h_parse(p, b.token, b.len);
- }
-
- /*
- * There are other parameters that can be passed in the dictionary
- * They are not being handled currently
- */
-// const int numOptKeys = 3;
-// char *optionalKeys[3] = { "F", "FDecodeParms", "DL" };
-// for (int i=0; i<numOptKeys; i++) {
-// v = dictentry(d, optionalKeys[i]);
-// if (v) fprintf(stderr, "decode_contentstream:: Unsupported Specifications [%s\n]", optionalKeys[i]);
-// }
- return res;
-}
-
-
-HParsedToken *
-act_kcontentstream_value(const HParseResult *p, void *u)
-{
- struct streamspec *spec = u;
- HBytes bytes = H_CAST_BYTES(p->ast);
- HParseResult *res;
- /* decode and parse the stream data */
- res = decode_contentstream(spec->dict, bytes, spec->parser);
-// if (!res) {
-// res = (HParseResult *)p;
-// }
-
- if (res) return (HParsedToken *)res->ast;
- else return (HParsedToken *)p->ast;
-}
-
-
-
-
-
const HParsedToken *
parse_item(struct Env *aux, size_t nr, size_t gen, size_t offset, HParser *p_item)
{
return tok;
}
-
-
-
-
-
-/*
- * This continuation takes the content stream and processes it for test extraction.
- * It is very similar to kstream in approach. It decodes and extracts the stream contents
- * and
- * It does not consume the string and returns the token as the output.
- *
- * x = (txtobj ...)
- */
-HParser *
-kcontentstream(HAllocator *mm__, const HParsedToken *x, void *env)
-{
-
- struct Env *aux = env;
- HParsedToken *dict_t = H_INDEX_TOKEN(x, 0);
- Dict *dict = H_CAST(Dict, dict_t);
- const HParsedToken *v = NULL;
- HParser *bytes_p, *dict_p, *value_p;
- struct streamspec *spec;
- size_t sz=0;
-
- /* look for the Length entry -- could be a reference */
- v = dictentry(dict, "Length");
- v = resolve(aux, v); /* resolve indirect references */
- if (v == NULL || v->token_type != TT_SINT || v->sint < 0) {
- if (v == NULL)
- log_message(7, "kcontentstream: stream /Length missing\n");
- else if (v -> token_type != TT_SINT)
- log_message(7, "kcontentstream: stream /Length not an integer\n");
- else if (v < 0)
- log_message(7, "kcontentstream: stream /Length negative\n");
- return p_fail;
- }
-
- sz = (size_t)v->sint;
-
- dict_p = p_return__m(mm__, dict_t);
- bytes_p = p_take__m(mm__, sz, aux);
-
- spec = h_alloc(mm__, sizeof(struct streamspec));
- spec->dict = dict;
-
- v = dictentry(dict, "Type");
- if (v == NULL) // XXX -> custom type
- spec->parser = p_textstream;
- else {
- fprintf(stdout, "kcontentstream: Not a text or object stream!\n");
- return p_fail;
- }
- value_p = h_action__m(mm__, bytes_p, act_kcontentstream_value, spec);
- return h_sequence__m(mm__, dict_p, value_p, NULL);
-}
Fontinfo_T *
getFontinfo(const Dict *fontdict, char *name, struct Env *aux)
// Process the contents stream or array -- optional
contents = dictentry(myDict, "Contents");
- //contents = resolve_item(aux, contents, p_cstream);
- // XXX reinstate when code below works for indirect objects
- // it currently only handles the case where /Contents is
- // a direct object that happens to be an array.
+ //contents = resolve(aux, contents);
+ // XXX reinstate when we can distinguish stream objects from
+ // arryas (TT_SEQUENCE). code below currently only handles
+ // the case where /Contents is a direct object that happens
+ // to be an array.
if (contents == NULL)
return;
- if (contents->token_type == TT_SEQUENCE) {
+ // XXX handle single stream case
+ if (contents->token_type == TT_SEQUENCE) { /* array case */
size_t numelts = contents->seq->used;
size_t bufsz = 0;
HBytes bstrm;