commit - aded37ad86d0f32120068643147415c158458857
commit + ea6ac09d253bcf098c41163d460bbff0d9b17de6
blob - 17787b8e05991c59a17705620cb85583734743e5
blob + 0ca0fffc4a8c6e26b04f6196b65b35d3f0c1e12c
--- pdf.c
+++ pdf.c
const HParsedToken *opstream = H_INDEX_TOKEN(p->ast, 1);
const HParsedToken *tt_text=NULL;
uint8_t *tstr=NULL;
- int textlen=0;
struct textmat tm;
PtNode_T *node = aux->curr_node;
- double cs = node->ts.char_spacing;
- double ws = node->ts.word_spacing;
- double ls = node->ts.line_spacing;
double *px=&tm.cell[4];
double *py=&tm.cell[5];
// TODO:: Handle non-horizontal text
+ char *buf;
+ size_t len = 0, cap = 64; /* characters used/allocated */
+ const char *app; /* characters to append */
+ size_t napp; /* number of characters to append */
+ int newline; /* append a newline first? */
- // Walk through the tokens to determine how much space to allocate
- // Count the number of characters in the stream
- // Concatenate the text into the allocated space
- for (int i =0; i < opstream->seq->used; i++) {
+ buf = malloc(cap);
+ assert(buf != NULL); // XXX
+ // Walk through the tokens and concatenate the pieces
+ for (int i =0; i < opstream->seq->used; i++) {
txte = H_CAST(TextEntry, opstream->seq->elements[i]);
-
- // make sure we are working on the same node as the current node
assert(txte != NULL);
assert(txte->node == node);
+ app = NULL;
+ napp = 0;
+ newline = 0;
+
+ // Process the text operators
switch (txte->type) {
// text state operators
case TS_Tc:
break;
case TS_Tf:
- node->ts.font = txte;
+ node->ts.font = txte;
node->ts.font_size = txte->fref.fontsize;
break;
// text positioning and showing operators
case TP_TD:
node->ts.line_spacing = txte->pos.ty;
+ /* fall through! */
case TP_Td:
if ( (*px == 0.0) && (*py == 0.0) ) { // initialize
*px = txte->pos.tx;
*py = txte->pos.ty;
- // check to see if we are starting a new line
- if ( (node->ts.curr_pos.ty != 0.0) &&
- (node->ts.curr_pos.ty != *py) ) {
- textlen += 1; // add a newline
- }
+ if ((node->ts.curr_pos.ty != 0.0) &&
+ (node->ts.curr_pos.ty != *py) )
+ newline = 1;
} else {
if (txte->pos.ty != 0.0) {
- // we are not rendering -- we just know
- // it is not in the same line if y not
- // equal
- textlen += 1; // add a newline
- *py -= txte->pos.ty; // should this be a +=??
+ *py -= txte->pos.ty;
+ newline = 1;
}
if (txte->pos.tx) { // handle x -- when should we add a space
// TODO:: handle x -- not sure .. for now, ignore
}
}
break;
+
case TP_Tstar:
*py -= node->ts.line_spacing;
- textlen += 1;
+ newline = 1;
break;
case TW_Tqq:
node->ts.word_spacing = txte->aw;
node->ts.char_spacing = txte->ac;
+ /* fall through! */
case TW_Tq:
*py -= node->ts.line_spacing;
- textlen += 1;
+ newline = 1;
+ /* fall through! */
case TW_Tj:
- textlen += txte->tstr.nchars;
+ app = txte->tstr.text;
+ napp = txte->tstr.nchars;
if (node->ts.font != NULL)
*px += txte->tstr.nchars * node->ts.font->fref.fontsize; // TODO:: handle character width from font description
break;
+
case TW_TJ:
- textlen += txte->tarray.flattened.nchars;
+ app = txte->tarray.flattened.text;
+ napp = txte->tarray.flattened.nchars;
if (node->ts.font != NULL)
*px += txte->tarray.flattened.nchars * node->ts.font->fref.fontsize; // TODO:: handle character width from font description
break;
default:
; // ignore
}
+
+ /* grow buf as required, always leave room for a newline */
+ assert(len <= SIZE_MAX - napp - 1); // XXX
+ while (cap < len + napp + 1) {
+ assert(cap < SIZE_MAX / 3); // XXX
+ cap = cap * 3 / 2;
+ buf = realloc(buf, cap);
+ assert(buf != NULL); // XXX
+ }
+
+ /* append text to buf */
+ if (newline)
+ buf[len++] = '\n';
+ if (app != NULL) {
+ memcpy(buf + len, app, napp);
+ len += napp;
+ }
}
+ /* copy buf into arena-allocated tstr and free buf */
+ tstr = h_arena_malloc(p->arena, len);
+ memcpy(tstr, buf, len);
+ free(buf);
+
// Are we within the page bounds? If not, generate a warning
if ( (*px < 0) || (*px > aux->curr_node->mediaBox.tx) ) {
fprintf (stdout, "Final position of the text string is outside media box bounds.\n"
aux->curr_node->mediaBox.tx, *px);
}
- // reset text state
- *px = *py = 0.0;
- node->ts.char_spacing = cs;
- node->ts.word_spacing = ws;
- node->ts.line_spacing = ls;
-
- tstr = h_arena_malloc(p->arena, sizeof(uint8_t) * textlen);
- int idx=0;
- // Now concatenate the pieces
- for (int i =0; i < opstream->seq->used; i++) {
- txte = H_CAST(TextEntry, opstream->seq->elements[i]);
- assert(txte != NULL);
- assert(txte->node == node);
-
- // Process the text operators
- switch (txte->type) {
- // text state operators
- case TS_Tc:
- node->ts.char_spacing = txte->value;
- break;
-
- case TS_Tw:
- node->ts.word_spacing = txte->value;
- break;
-
- case TS_Tz:
- node->ts.horiz_scaling = txte->value;
- break;
-
- case TS_TL:
- node->ts.line_spacing = txte->value;
- break;
-
- case TS_Tf:
- node->ts.font = txte;
- node->ts.font_size = txte->fref.fontsize;
- break;
-
-
- // text positioning and showing operators
- case TP_TD:
- node->ts.line_spacing = txte->pos.ty;
- case TP_Td:
- if ( (*px == 0.0) && (*py == 0.0) ) { // initialize
- *px = txte->pos.tx;
- *py = txte->pos.ty;
- if ( (node->ts.curr_pos.ty != 0.0) &&
- (node->ts.curr_pos.ty != *py) ) {
- tstr[idx] = '\n'; idx += 1;
- }
- } else {
- if (txte->pos.ty != 0.0) {
- tstr[idx] = '\n'; idx += 1;
- *py -= txte->pos.ty; // should this be a +=??
- }
- if (txte->pos.tx) { // handle x -- when should we add a space
- // TODO:: handle x -- not sure .. for now, ignore
- *px += txte->pos.tx;
- }
- }
- break;
-
- case TP_Tstar:
- tstr[idx] = '\n'; idx += 1;
- *py -= node->ts.line_spacing;
- break;
-
-
- case TW_Tqq:
- node->ts.word_spacing = txte->aw;
- node->ts.char_spacing = txte->ac;
- case TW_Tq:
- *py -= node->ts.line_spacing;
- tstr[idx] = '\n'; idx += 1;
- case TW_Tj:
- memcpy(&tstr[idx], txte->tstr.text, txte->tstr.nchars);
- idx += txte->tstr.nchars;
- if (node->ts.font != NULL)
- *px += txte->tstr.nchars * node->ts.font->fref.fontsize; // TODO:: handle character width from font description
- break;
-
-
- case TW_TJ:
- memcpy(&tstr[idx], txte->tarray.flattened.text, txte->tarray.flattened.nchars);
- idx += txte->tarray.flattened.nchars;
- if (node->ts.font != NULL)
- *px += txte->tarray.flattened.nchars * node->ts.font->fref.fontsize; // TODO:: handle character width from font description
- break;
-
- default:
- ; // ignore
- }
- }
- assert(idx == textlen);
-
// update the position on the page
node->ts.curr_pos.tx = *px;
node->ts.curr_pos.ty = *py;
txtobj->type = TW_Tj;
txtobj->tstr.text = tstr;
- txtobj->tstr.nchars = textlen;
- if (textlen)
+ txtobj->tstr.nchars = len;
+ if (len)
txtobj->node = node;
else
txtobj->node = NULL;