commit 1d031337e6b891688dbcdacf72bb17e51487f5f6 from: Sven M. Hallberg date: Thu Feb 20 12:50:02 2020 UTC intermediate commit: work towards making all of 'obj' LALR commit - 3b3bcd173d9adec4f238ec015acf9cd88c05845d commit + 1d031337e6b891688dbcdacf72bb17e51487f5f6 blob - 23cf121c93cbf2f07d6ddd74150de47f6180baf8 blob + 3c3a30c92198cfb34695c84edb65d1da7da676ea --- pdf.c +++ pdf.c @@ -327,7 +327,7 @@ act_nesc(const HParseResult *p, void *u) return H_MAKE_UINT(H_FIELD_UINT(1)*16 + H_FIELD_UINT(2)); } -#define act_schars h_act_flatten +#define act_str_ h_act_flatten #define act_string act_token HParsedToken * @@ -341,6 +341,9 @@ act_octal(const HParseResult *p, void *u) return H_MAKE_UINT(x); } +#define act_oct3 act_octal +#define act_oct2 act_octal +#define act_oct1 act_octal HParsedToken * act_xrent(const HParseResult *p, void *u) @@ -494,7 +497,6 @@ init_parser(struct Env *aux) //H_RULE(dchar, IN(DCHARS)); /* delimiter */ H_RULE(rchar, NOT_IN(WCHARS DCHARS)); /* regular */ H_RULE(nchar, NOT_IN(WCHARS DCHARS "#")); /* name */ - H_RULE(schar, NOT_IN("()\n\r\\")); /* string literal */ H_ARULE(digit, h_ch_range('0', '9')); H_ARULE(pdigit, h_ch_range('1', '9')); H_ARULE(hlower, h_ch_range('a', 'f')); @@ -549,10 +551,16 @@ init_parser(struct Env *aux) /* numbers */ H_ARULE(sign, CHX(minus, IGN(plus))); H_VRULE(intnn, nat); + #if 1 H_ARULE(realnn, CHX(SEQ(digits, period, digits), /* 12.3 */ SEQ(digits, period, empty), /* 123. */ SEQ(empty, period, digits))); /* .123 */ // XXX ^ we _could_ move the "123." case into intnn... + #else + // XXX the .123 case above somehow leads to a conflict with litstr... + H_ARULE(realnn, CHX(SEQ(digits, period, digits), /* 12.3 */ + SEQ(digits, period, empty))); /* 123. */ + #endif H_RULE(numbnn, CHX(realnn, intnn)); H_RULE(snumb, SEQ(sign, numbnn)); H_ARULE(numb, CHX(snumb, numbnn)); @@ -562,24 +570,59 @@ init_parser(struct Env *aux) H_ARULE(nstr, h_many(CHX(nchar, nesc))); /* '/' is valid */ H_RULE(name, h_right(slash, nstr)); - /* strings */ - H_RULE(snest, h_indirect()); + /* strings + * + * this is so convoluted in order to make it LALR including the + * precedence rules for octal escapes ("\123" vs "\12 3" vs "\1 23") + * and end-of-line ("CRLF" vs "CR LF"). + * + * we have to split the base rule 'str' into variants 'str_o' and + * 'str_l' depending on whether they may start with an octal digit or + * linefeed, respectively. + */ + H_RULE(str_ol, h_indirect()); + H_RULE(str_o, h_indirect()); + H_RULE(str_l, h_indirect()); + H_RULE(str, h_indirect()); H_RULE(bsn, p_mapch('n', 0x0a)); /* LF */ H_RULE(bsr, p_mapch('r', 0x0d)); /* CR */ H_RULE(bst, p_mapch('t', 0x09)); /* HT */ H_RULE(bsb, p_mapch('b', 0x08)); /* BS (backspace) */ H_RULE(bsf, p_mapch('f', 0x0c)); /* FF */ H_RULE(escape, CHX(bsn, bsr, bst, bsb, bsf, lparen, rparen, bslash)); - H_ARULE(octal, CHX(REP(odigit,3), REP(odigit,2), REP(odigit,1))); - H_RULE(wrap, IGN(eol)); - H_RULE(sesc, h_right(bslash, CHX(escape, octal, wrap, epsilon))); - /* NB: lone backslashes and escaped newlines are ignored */ - H_ARULE(schars, h_many(CHX(schar, snest, sesc, eol))); - H_RULE(snest_, SEQ(lparen, schars, rparen)); - H_RULE(litstr, h_middle(lparen, schars, rparen)); + H_ARULE(oct3, REP(odigit,3)); + H_ARULE(oct2, REP(odigit,2)); + H_ARULE(oct1, REP(odigit,1)); + H_RULE(octesc, CHX(SEQ(oct3, str), + SEQ(oct2, str_o), + SEQ(oct1, str_o))); + H_RULE(eolesc, CHX(SEQ(IGN(crlf), str), + SEQ(IGN(cr), str_l), + SEQ(IGN(lf), str))); + H_RULE(schar_o, NOT_IN("()\n\r\\" "01234567")); + H_RULE(schar_e, NOT_IN("()\n\r\\" "01234567" "nrtbf")); + H_RULE(str_o_, CHX(SEQ(lf, str), str_ol)); /* str "but not" odigit */ + H_RULE(str_l_, CHX(SEQ(odigit, str), str_ol)); /* str "but not" lf */ + H_RULE(str_ol_, CHX(SEQ(cr, str_l), /* str "but neither" */ + SEQ(crlf, str), + SEQ(schar_o, str), + SEQ(lparen, str, rparen, str), + SEQ(IGN(bslash), escape, str), + SEQ(IGN(bslash), schar_e, str), /* "lone" bs */ + /* NB: ^ lone backslashes are to be ignored per spec, but we + * let them "escape" with the following character. this works + * because they are never truly alone. */ + SEQ(IGN(bslash), octesc), + SEQ(IGN(bslash), eolesc), /* line split */ + epsilon)); + H_ARULE(str_, CHX(SEQ(lf, str), SEQ(odigit, str), str_ol)); + H_RULE(litstr, h_middle(lparen, str, rparen)); H_RULE(hexstr, h_middle(langle, MANY_WS(hdigit), rangle)); H_ARULE(string, CHX(litstr, hexstr)); - h_bind_indirect(snest, snest_); + h_bind_indirect(str_ol, str_ol_); + h_bind_indirect(str_o, str_o_); + h_bind_indirect(str_l, str_l_); + h_bind_indirect(str, str_); H_RULE(array, h_indirect()); H_RULE(dict, h_indirect()); @@ -687,6 +730,16 @@ init_parser(struct Env *aux) p_epsilon = epsilon; p_return_0 = h_action(epsilon, act_return_uint, (void *)0); p_return_1 = h_action(epsilon, act_return_uint, (void *)1); + +#if 0 + // XXX testing + int r; + void errx(int, const char *, ...); + HParser *p = obj; + if ((r = h_compile(p, PB_LALR, NULL)) != 0) + errx(1, "h_compile() failed: %d", r); + errx(0, "OK"); +#endif }