commit d632b3b70d9b6e226557e450243d0efd5e39105a from: Sven M. Hallberg date: Mon Dec 05 03:48:56 2022 UTC negative memoization fixes commit - e10003ea19a486f1c252b5d5a9aec9e5ca8d683e commit + d632b3b70d9b6e226557e450243d0efd5e39105a blob - 3c0e10b3e2b91d31bda7df666cbfbbfb056092eb blob + 56420c89d5fdd54c9f1cbb6811d56afea7b8674a --- ini_n.c +++ ini_n.c @@ -103,24 +103,22 @@ DEF_(leftbr, CHAR('[')) DEF_(rightbr, CHAR(']')) DEF_(eol, CHOICE(nl, end)) -DEF_(bra, MEMO(wss, leftbr)) +DEF_(bra, SEQ(wss, leftbr)) DEF_(ket, SEQ(rightbr, wss)) DEF_(comment, OPT(semi, lchars)) DEF_(sname, MANY1(schar)) DEF_(key, MANY1(kchar)) DEF_(value, MANY1(lchar)) -DEF_(empty, MEMO(wss, comment, nl)) +DEF_(empty, SEQ(wss, comment, nl)) DEF_(empties, MANY(empty)) -DEF (header_, TRY(empties, bra, sname, ket, eol)) -DEF_(header, MEMO(header_)) -DEF (entry_, TRY(empties, key, eq, value, eol)) -DEF_(entry, MEMO(entry_)) +DEF_(header, SEQ(empties, bra, sname, ket, eol)) +DEF_(entry, SEQ(empties, key, eq, value, eol)) DEF_(entries, MANY(entry)) -DEF_(tail, MEMO(eof, wss, eol, anys) || EPSILON) +DEF_(tail, OPT(eof, wss, eol, anys)) DEF_(sects, MANY(header, entries)) -DEF_(inifile, SEQ(sects, empties, wss, tail)) +DEF_(inifile, TRY(sects, empties, wss, tail)) bool trace(void *ctx, void *env, const char *s, size_t len) @@ -129,9 +127,6 @@ trace(void *ctx, void *env, const char *s, size_t len) const char *begin = env; size_t pos; - if (env == NULL) - return true; - pos = s - begin; printf("%4zx: %4zu byte %s\n", pos, len, nt); return true; @@ -181,9 +176,6 @@ main(int argc, char *argv[]) assert(cache.table == NULL); assert(cache.capacity == 0); assert(cache.nused == 0); - //cache.capacity = 128; - //cache.table = calloc(cache.capacity, sizeof(cache.table[0])); - //assert(cache.table != NULL); /* run parser */ p = inifile((struct stream){input, input + sz}, &cache, (void *)input); blob - c0944d6aa7cafe1ad2f542aa564623476f45d467 blob + 36c111a0ae1ca28ace0c6759f83458c8ce5dd28c --- minip_n.h +++ minip_n.h @@ -15,12 +15,23 @@ struct cache; typedef const char *parser(struct stream, struct cache *, void *); /* define a parser (nonterminal) */ -#define DEF(NT, EXPR) \ - static const char * \ - NT(struct stream inp_, struct cache *cac_, void *env_) \ - { \ - const char *p_ = inp_.p; \ - return (EXPR) ? p_ : NULL; \ +#define DEF(NT, EXPR) \ + static const char * \ + NT(struct stream inp_, struct cache *cac_, void *env_) \ + { \ + const char *p_ = inp_.p; \ + struct memento *mem_; \ + \ + /* look up (pos,sym) in cache and return if found */ \ + if ((mem_ = lookup(cac_, inp_.p, __func__)) != NULL) \ + return mem_->value; \ + \ + /* else run the expression and return if it matched */ \ + if (EXPR) \ + return p_; \ + \ + /* else, return and memoize a negative result */ \ + return store(cac_, inp_.p, __func__, NULL); \ } /* primitive expressions */ @@ -33,11 +44,11 @@ typedef const char *parser(struct stream, struct cache #define OMEGA (p_ = inp_.end) /* - * logical operators allowed on expressions: + * logical operators allowed on expressions *sans* actions: * - * || (ordered) choice; first match wins * && restriction (lookahead); rhs is consumed - * ! negation; only allowed to the left of && + * || ordered choice + * ! negation; only allowed to the left of && (to avoid consumption) */ /* variable-length argument lists */ @@ -49,25 +60,38 @@ typedef const char *parser(struct stream, struct cache #define CHOICE(...) (p_ = choice_(inp_, cac_, env_, PARGS(__VA_ARGS__))) #define SEQ(...) (p_ = seq_(inp_, cac_, env_, PARGS(__VA_ARGS__))) -/* derived combinators; arguments form an implicit SEQ */ -#define MANY(...) (p_ = many_(inp_, cac_, env_, PARGS(__VA_ARGS__))) -#define MANY1(...) (p_ = many1_(inp_, cac_, env_, PARGS(__VA_ARGS__))) -#define OPT(...) (SEQ(__VA_ARGS__) || EPSILON) +/* secondary combinators -- could be derived from the basics */ +#define MANY(X) (p_ = many_(inp_, cac_, env_, X)) +#define MANY1(X) (p_ = many1_(inp_, cac_, env_, X)) +#define OPT(X) ((p_ = (X)(inp_, cac_, env_)) || EPSILON) +/* NB: + * because memoization only applies to nonterminals and the above combinators + * (MANY, MANY1, OPT) form implicit choices with them, we can't use variable + * argument lists that form an implicit SEQ here. otherwise, since those + * anonymous argument sequences would not be memoized (memoization only happens + * on actual nonterminals), actions could be executed in them, even if the + * sequence failed to match in a later element. + */ #include /* * semantic actions: * - * - attach to an expression with &&: (EXPR) && ACTION(f, ctx) + * - attach to an expression with &&: EXPR && ACTION(f, ctx) * - receive an arbitrary context parameter via ACTION()'s second argument. * - receive an arbitrary environment pointer from the top-level parser call. * - execute (only) after their subordinate parser(s) matched. * - can also act as validations by returning the intended result. + * + * Thanks to prerecognition and negative memoization, it is also possible to + * attach actions before the expression. E.g.: + * + * ACTION(print, "[") && EXPR && ACTION(print, "]") */ typedef bool action(void *, void *, const char *, size_t); -#define ACTION(F, C) (F)((C), env_, inp_.p, p_ - inp_.p) +#define ACTION(F, C) (env_ == NULL || (F)((C), env_, inp_.p, p_ - inp_.p)) /* @@ -82,14 +106,21 @@ typedef bool action(void *, void *, const char *, size * in any case, beware of the overhead. since TRY executes the sequence twice, * it can easily lead to a substantial increase in recognition time. */ -#define MATCH(...) seq_(inp_, NULL, NULL, PARGS(__VA_ARGS__)) +#define MATCH(...) seq_(inp_, cac_, NULL, PARGS(__VA_ARGS__)) #define TRY(...) ((env_ == NULL || MATCH(__VA_ARGS__)) && SEQ(__VA_ARGS__)) /* - * memoization of parse results (~ packrat parsing) + * general memoization of parse results (~ packrat parsing) + * + * handles the positive cases, but only when actions are enabled, i.e. env + * pointer is non-null. otherwise, actions would be lost - cf. TRY(). + * + * NB: negative results are always cached - see DEF(). + * + * usage: DEF(nt, EXPR && MEMO) */ -#define MEMO(...) (p_ = memo_(inp_, cac_, env_, __func__, PARGS(__VA_ARGS__))) +#define MEMO (env_ == NULL || store(cac_, inp_.p, __func__, p_)) /* an entry in the memoization hash table */ struct memento { @@ -184,13 +215,13 @@ choice_(struct stream inp, struct cache *cac, void *en } static inline const char * -many_(struct stream inp, struct cache *cac, void *env, parser *ps[], size_t n) +many_(struct stream inp, struct cache *cac, void *env, parser *pa) { const char *bak; for (;;) { bak = inp.p; - inp.p = seq_(inp, cac, env, ps, n); + inp.p = pa(inp, cac, env); if (inp.p == NULL) return bak; assert(inp.p > bak); /* progress */ @@ -198,14 +229,15 @@ many_(struct stream inp, struct cache *cac, void *env, } static inline const char * -many1_(struct stream inp, struct cache *cac, void *env, parser *ps[], size_t n) +many1_(struct stream inp, struct cache *cac, void *env, parser *pa) { - inp.p = seq_(inp, cac, env, ps, n); + inp.p = pa(inp, cac, env); if (inp.p == NULL) return NULL; - return many_(inp, cac, env, ps, n); + return many_(inp, cac, env, pa); } + #include /* calloc */ #include /* UINT_MAX */ #include /* SIZE_MAX */ @@ -316,14 +348,14 @@ grow(struct cache *cache) cache->capacity = newcap; } -static inline void +static inline const void * store(struct cache *cache, const void *key, const void *key2, const void *val) { struct memento *mem; unsigned int h, s; if (cache == NULL) - return; + return val; /* grow hashtable if necessary */ if (cache->nused >= cache->capacity * 9 / 10) @@ -347,27 +379,8 @@ store(struct cache *cache, const void *key, const void mem->key2 = key2; mem->value = val; cache->nused++; -} -static inline const char * -memo_(struct stream inp, struct cache *cache, void *env, const char *sym, - parser *ps[], size_t n) -{ - struct memento *mem; - const char *res; - - /* look up (pos,sym) in cache and return if found */ - mem = lookup(cache, inp.p, sym); - if (mem != NULL) - return mem->value; - - /* else run the argument sequence */ - res = seq_(inp, cache, env, ps, n); - - /* and store the result */ - store(cache, inp.p, sym, res); - - return res; + return val; } #endif /* MINIP_N_H_ */