commit - 4e73234b1dbaf15f9776d9c17cfdb6b343108c75
commit + 53d31efb90ba3586f0da6ef2d0cee999ba562b90
blob - 7c59c8704db10ae825fbbacf4672e7a63a4304aa
blob + bc1057b35cbcba3ce10c8beacb89bebb0fcd0c5f
--- pdf.1.mdoc
+++ pdf.1.mdoc
.Pp
The options are as follows:
.Bl -tag -width Ds
+.It Fl d Cm c
+Decode and parse the given stream object(s) as a single content stream.
+At least one
+.Ar oid
+argument is required.
.It Fl d Cm s
Dump the body data, after filter decoding, of a given stream object.
An
blob - 4962b08bcf3907d45ae0db57127e5cf76c7e88d1
blob + 3bfea7f72603d25746377db94e34368405e44507
--- pdf.1.txt
+++ pdf.1.txt
pdf - validation and inspection of PDF files
SYNOPSIS
- pdf [-qsv] [-d what] [-x txtfile] input.pdf [oid]
+ pdf [-qsv] [-d type] [-x txtfile] input.pdf [oid ...]
DESCRIPTION
The pdf utility attempts to parse and validate the given PDF file. It
prints the resulting AST to standard output using a JSON format.
- The optional oid argument selects a specific object to be printed instead
- of the whole document. It is expected to be of the form "n.g" where n
+ The optional oid arguments select specific objects to be printed instead
+ of the whole document. Each is expected to be of the form "n.g" where n
and g are object and generation numbers, respectively. The generation
number may be omitted to select the latest object matching n.
The options are as follows:
+ -d c Decode and parse the given stream object(s) as a single content
+ stream. At least one oid argument is required.
+
-d s Dump the body data, after filter decoding, of a given stream
object. An oid argument is required.
Document management -- Portable document format -- Part 2: PDF 2.0, ISO
32000-2, 2020.
- January 6, 2023
+ July 2, 2023
blob - 047745e413075672abecbc5108fe1f15345288b6
blob + 90652212d83170dd293b8e3c8df517e6e34af290
--- pdf.c
+++ pdf.c
#include <hammer/glue.h>
#include <math.h>
#include "pdf.h"
+#include "content.h"
#ifdef LOG
#define VIOL(P,VIOL) h_action(h_sequence(P, h_tell(), NULL), act_viol, VIOL)
p_violsev = violsev;
+ init_content_parser();
+
#if 0
// XXX testing
int r;
exit(2);
}
-void
-dumpstream(FILE *f, const HParsedToken *obj)
+/* helper to extract the actual data payload out of a stream object */
+HBytes
+streamdata(const HParsedToken *obj)
{
HParseResult *res;
- HBytes data;
// XXX properly verify that obj is a stream (needs custom token type)
+ /* verify that obj is a stream */
if (obj->token_type != TT_SEQUENCE || obj->seq->used != 2 ||
obj->seq->elements[1]->token_type != TT_HParseResult)
errx(2, "%s: requested object is not a stream", infile);
+ /* extract stream data */
res = H_INDEX(HParseResult, obj, 1);
assert(res != NULL);
assert(res->ast != NULL);
- data = H_CAST_BYTES(res->ast);
+ return H_CAST_BYTES(res->ast);
+}
- fwrite(data.token, 1, data.len, f);
+void
+dumpstream(FILE *f, const HParsedToken *obj)
+{
+ HBytes b;
+
+ b = streamdata(obj);
+ fwrite(b.token, 1, b.len, f);
}
+void
+dumpcstream(FILE *f, const HParsedToken **obj, size_t n)
+{
+ HSuspendedParser *p;
+ HParseResult *res;
+ HBytes b;
+ int i;
+
+ /* start the parse */
+ p = h_parse_start(p_cstream);
+
+ /* feed the concatenation of the streams into the parser */
+ for (i = 0; i < n; i++) {
+ b = streamdata(obj[i]);
+
+ if (i > 0)
+ h_parse_chunk(p, "", 1); /* separator '\0' */
+ h_parse_chunk(p, b.token, b.len);
+ }
+
+ /* finish the parse and print the result */
+ res = h_parse_finish(p);
+ if (res == NULL) {
+ if (!qflag) {
+ fprintf(stderr, "%s: parse error in content stream\n",
+ infile);
+ }
+ exit(1);
+ }
+ if (!qflag)
+ h_pprintln(f, res->ast);
+}
+
/*
* This helper implements the standard backwards parsing strategy to read all
* cross-reference sections and trailer dictionaries, starting from the
/* print desired output */
if (!qflag) {
- if (dflag == 's')
+ if (dflag == 'c')
+ dumpcstream(stdout, obj, argc);
+ else if (dflag == 's')
for (i = 0; i < argc; i++)
dumpstream(stdout, obj[i]);
else if (obj != NULL)