commit - 3b3bcd173d9adec4f238ec015acf9cd88c05845d
commit + 1d031337e6b891688dbcdacf72bb17e51487f5f6
blob - 23cf121c93cbf2f07d6ddd74150de47f6180baf8
blob + 3c3a30c92198cfb34695c84edb65d1da7da676ea
--- pdf.c
+++ pdf.c
return H_MAKE_UINT(H_FIELD_UINT(1)*16 + H_FIELD_UINT(2));
}
-#define act_schars h_act_flatten
+#define act_str_ h_act_flatten
#define act_string act_token
HParsedToken *
return H_MAKE_UINT(x);
}
+#define act_oct3 act_octal
+#define act_oct2 act_octal
+#define act_oct1 act_octal
HParsedToken *
act_xrent(const HParseResult *p, void *u)
//H_RULE(dchar, IN(DCHARS)); /* delimiter */
H_RULE(rchar, NOT_IN(WCHARS DCHARS)); /* regular */
H_RULE(nchar, NOT_IN(WCHARS DCHARS "#")); /* name */
- H_RULE(schar, NOT_IN("()\n\r\\")); /* string literal */
H_ARULE(digit, h_ch_range('0', '9'));
H_ARULE(pdigit, h_ch_range('1', '9'));
H_ARULE(hlower, h_ch_range('a', 'f'));
/* numbers */
H_ARULE(sign, CHX(minus, IGN(plus)));
H_VRULE(intnn, nat);
+ #if 1
H_ARULE(realnn, CHX(SEQ(digits, period, digits), /* 12.3 */
SEQ(digits, period, empty), /* 123. */
SEQ(empty, period, digits))); /* .123 */
// XXX ^ we _could_ move the "123." case into intnn...
+ #else
+ // XXX the .123 case above somehow leads to a conflict with litstr...
+ H_ARULE(realnn, CHX(SEQ(digits, period, digits), /* 12.3 */
+ SEQ(digits, period, empty))); /* 123. */
+ #endif
H_RULE(numbnn, CHX(realnn, intnn));
H_RULE(snumb, SEQ(sign, numbnn));
H_ARULE(numb, CHX(snumb, numbnn));
H_ARULE(nstr, h_many(CHX(nchar, nesc))); /* '/' is valid */
H_RULE(name, h_right(slash, nstr));
- /* strings */
- H_RULE(snest, h_indirect());
+ /* strings
+ *
+ * this is so convoluted in order to make it LALR including the
+ * precedence rules for octal escapes ("\123" vs "\12 3" vs "\1 23")
+ * and end-of-line ("CRLF" vs "CR LF").
+ *
+ * we have to split the base rule 'str' into variants 'str_o' and
+ * 'str_l' depending on whether they may start with an octal digit or
+ * linefeed, respectively.
+ */
+ H_RULE(str_ol, h_indirect());
+ H_RULE(str_o, h_indirect());
+ H_RULE(str_l, h_indirect());
+ H_RULE(str, h_indirect());
H_RULE(bsn, p_mapch('n', 0x0a)); /* LF */
H_RULE(bsr, p_mapch('r', 0x0d)); /* CR */
H_RULE(bst, p_mapch('t', 0x09)); /* HT */
H_RULE(bsb, p_mapch('b', 0x08)); /* BS (backspace) */
H_RULE(bsf, p_mapch('f', 0x0c)); /* FF */
H_RULE(escape, CHX(bsn, bsr, bst, bsb, bsf, lparen, rparen, bslash));
- H_ARULE(octal, CHX(REP(odigit,3), REP(odigit,2), REP(odigit,1)));
- H_RULE(wrap, IGN(eol));
- H_RULE(sesc, h_right(bslash, CHX(escape, octal, wrap, epsilon)));
- /* NB: lone backslashes and escaped newlines are ignored */
- H_ARULE(schars, h_many(CHX(schar, snest, sesc, eol)));
- H_RULE(snest_, SEQ(lparen, schars, rparen));
- H_RULE(litstr, h_middle(lparen, schars, rparen));
+ H_ARULE(oct3, REP(odigit,3));
+ H_ARULE(oct2, REP(odigit,2));
+ H_ARULE(oct1, REP(odigit,1));
+ H_RULE(octesc, CHX(SEQ(oct3, str),
+ SEQ(oct2, str_o),
+ SEQ(oct1, str_o)));
+ H_RULE(eolesc, CHX(SEQ(IGN(crlf), str),
+ SEQ(IGN(cr), str_l),
+ SEQ(IGN(lf), str)));
+ H_RULE(schar_o, NOT_IN("()\n\r\\" "01234567"));
+ H_RULE(schar_e, NOT_IN("()\n\r\\" "01234567" "nrtbf"));
+ H_RULE(str_o_, CHX(SEQ(lf, str), str_ol)); /* str "but not" odigit */
+ H_RULE(str_l_, CHX(SEQ(odigit, str), str_ol)); /* str "but not" lf */
+ H_RULE(str_ol_, CHX(SEQ(cr, str_l), /* str "but neither" */
+ SEQ(crlf, str),
+ SEQ(schar_o, str),
+ SEQ(lparen, str, rparen, str),
+ SEQ(IGN(bslash), escape, str),
+ SEQ(IGN(bslash), schar_e, str), /* "lone" bs */
+ /* NB: ^ lone backslashes are to be ignored per spec, but we
+ * let them "escape" with the following character. this works
+ * because they are never truly alone. */
+ SEQ(IGN(bslash), octesc),
+ SEQ(IGN(bslash), eolesc), /* line split */
+ epsilon));
+ H_ARULE(str_, CHX(SEQ(lf, str), SEQ(odigit, str), str_ol));
+ H_RULE(litstr, h_middle(lparen, str, rparen));
H_RULE(hexstr, h_middle(langle, MANY_WS(hdigit), rangle));
H_ARULE(string, CHX(litstr, hexstr));
- h_bind_indirect(snest, snest_);
+ h_bind_indirect(str_ol, str_ol_);
+ h_bind_indirect(str_o, str_o_);
+ h_bind_indirect(str_l, str_l_);
+ h_bind_indirect(str, str_);
H_RULE(array, h_indirect());
H_RULE(dict, h_indirect());
p_epsilon = epsilon;
p_return_0 = h_action(epsilon, act_return_uint, (void *)0);
p_return_1 = h_action(epsilon, act_return_uint, (void *)1);
+
+#if 0
+ // XXX testing
+ int r;
+ void errx(int, const char *, ...);
+ HParser *p = obj;
+ if ((r = h_compile(p, PB_LALR, NULL)) != 0)
+ errx(1, "h_compile() failed: %d", r);
+ errx(0, "OK");
+#endif
}