commit - e10003ea19a486f1c252b5d5a9aec9e5ca8d683e
commit + d632b3b70d9b6e226557e450243d0efd5e39105a
blob - 3c0e10b3e2b91d31bda7df666cbfbbfb056092eb
blob + 56420c89d5fdd54c9f1cbb6811d56afea7b8674a
--- ini_n.c
+++ ini_n.c
DEF_(rightbr, CHAR(']'))
DEF_(eol, CHOICE(nl, end))
-DEF_(bra, MEMO(wss, leftbr))
+DEF_(bra, SEQ(wss, leftbr))
DEF_(ket, SEQ(rightbr, wss))
DEF_(comment, OPT(semi, lchars))
DEF_(sname, MANY1(schar))
DEF_(key, MANY1(kchar))
DEF_(value, MANY1(lchar))
-DEF_(empty, MEMO(wss, comment, nl))
+DEF_(empty, SEQ(wss, comment, nl))
DEF_(empties, MANY(empty))
-DEF (header_, TRY(empties, bra, sname, ket, eol))
-DEF_(header, MEMO(header_))
-DEF (entry_, TRY(empties, key, eq, value, eol))
-DEF_(entry, MEMO(entry_))
+DEF_(header, SEQ(empties, bra, sname, ket, eol))
+DEF_(entry, SEQ(empties, key, eq, value, eol))
DEF_(entries, MANY(entry))
-DEF_(tail, MEMO(eof, wss, eol, anys) || EPSILON)
+DEF_(tail, OPT(eof, wss, eol, anys))
DEF_(sects, MANY(header, entries))
-DEF_(inifile, SEQ(sects, empties, wss, tail))
+DEF_(inifile, TRY(sects, empties, wss, tail))
bool
trace(void *ctx, void *env, const char *s, size_t len)
const char *begin = env;
size_t pos;
- if (env == NULL)
- return true;
-
pos = s - begin;
printf("%4zx: %4zu byte %s\n", pos, len, nt);
return true;
assert(cache.table == NULL);
assert(cache.capacity == 0);
assert(cache.nused == 0);
- //cache.capacity = 128;
- //cache.table = calloc(cache.capacity, sizeof(cache.table[0]));
- //assert(cache.table != NULL);
/* run parser */
p = inifile((struct stream){input, input + sz}, &cache, (void *)input);
blob - c0944d6aa7cafe1ad2f542aa564623476f45d467
blob + 36c111a0ae1ca28ace0c6759f83458c8ce5dd28c
--- minip_n.h
+++ minip_n.h
typedef const char *parser(struct stream, struct cache *, void *);
/* define a parser (nonterminal) */
-#define DEF(NT, EXPR) \
- static const char * \
- NT(struct stream inp_, struct cache *cac_, void *env_) \
- { \
- const char *p_ = inp_.p; \
- return (EXPR) ? p_ : NULL; \
+#define DEF(NT, EXPR) \
+ static const char * \
+ NT(struct stream inp_, struct cache *cac_, void *env_) \
+ { \
+ const char *p_ = inp_.p; \
+ struct memento *mem_; \
+ \
+ /* look up (pos,sym) in cache and return if found */ \
+ if ((mem_ = lookup(cac_, inp_.p, __func__)) != NULL) \
+ return mem_->value; \
+ \
+ /* else run the expression and return if it matched */ \
+ if (EXPR) \
+ return p_; \
+ \
+ /* else, return and memoize a negative result */ \
+ return store(cac_, inp_.p, __func__, NULL); \
}
/* primitive expressions */
#define OMEGA (p_ = inp_.end)
/*
- * logical operators allowed on expressions:
+ * logical operators allowed on expressions *sans* actions:
*
- * || (ordered) choice; first match wins
* && restriction (lookahead); rhs is consumed
- * ! negation; only allowed to the left of &&
+ * || ordered choice
+ * ! negation; only allowed to the left of && (to avoid consumption)
*/
/* variable-length argument lists */
#define CHOICE(...) (p_ = choice_(inp_, cac_, env_, PARGS(__VA_ARGS__)))
#define SEQ(...) (p_ = seq_(inp_, cac_, env_, PARGS(__VA_ARGS__)))
-/* derived combinators; arguments form an implicit SEQ */
-#define MANY(...) (p_ = many_(inp_, cac_, env_, PARGS(__VA_ARGS__)))
-#define MANY1(...) (p_ = many1_(inp_, cac_, env_, PARGS(__VA_ARGS__)))
-#define OPT(...) (SEQ(__VA_ARGS__) || EPSILON)
+/* secondary combinators -- could be derived from the basics */
+#define MANY(X) (p_ = many_(inp_, cac_, env_, X))
+#define MANY1(X) (p_ = many1_(inp_, cac_, env_, X))
+#define OPT(X) ((p_ = (X)(inp_, cac_, env_)) || EPSILON)
+/* NB:
+ * because memoization only applies to nonterminals and the above combinators
+ * (MANY, MANY1, OPT) form implicit choices with them, we can't use variable
+ * argument lists that form an implicit SEQ here. otherwise, since those
+ * anonymous argument sequences would not be memoized (memoization only happens
+ * on actual nonterminals), actions could be executed in them, even if the
+ * sequence failed to match in a later element.
+ */
#include <stdbool.h>
/*
* semantic actions:
*
- * - attach to an expression with &&: (EXPR) && ACTION(f, ctx)
+ * - attach to an expression with &&: EXPR && ACTION(f, ctx)
* - receive an arbitrary context parameter via ACTION()'s second argument.
* - receive an arbitrary environment pointer from the top-level parser call.
* - execute (only) after their subordinate parser(s) matched.
* - can also act as validations by returning the intended result.
+ *
+ * Thanks to prerecognition and negative memoization, it is also possible to
+ * attach actions before the expression. E.g.:
+ *
+ * ACTION(print, "[") && EXPR && ACTION(print, "]")
*/
typedef bool action(void *, void *, const char *, size_t);
-#define ACTION(F, C) (F)((C), env_, inp_.p, p_ - inp_.p)
+#define ACTION(F, C) (env_ == NULL || (F)((C), env_, inp_.p, p_ - inp_.p))
/*
* in any case, beware of the overhead. since TRY executes the sequence twice,
* it can easily lead to a substantial increase in recognition time.
*/
-#define MATCH(...) seq_(inp_, NULL, NULL, PARGS(__VA_ARGS__))
+#define MATCH(...) seq_(inp_, cac_, NULL, PARGS(__VA_ARGS__))
#define TRY(...) ((env_ == NULL || MATCH(__VA_ARGS__)) && SEQ(__VA_ARGS__))
/*
- * memoization of parse results (~ packrat parsing)
+ * general memoization of parse results (~ packrat parsing)
+ *
+ * handles the positive cases, but only when actions are enabled, i.e. env
+ * pointer is non-null. otherwise, actions would be lost - cf. TRY().
+ *
+ * NB: negative results are always cached - see DEF().
+ *
+ * usage: DEF(nt, EXPR && MEMO)
*/
-#define MEMO(...) (p_ = memo_(inp_, cac_, env_, __func__, PARGS(__VA_ARGS__)))
+#define MEMO (env_ == NULL || store(cac_, inp_.p, __func__, p_))
/* an entry in the memoization hash table */
struct memento {
}
static inline const char *
-many_(struct stream inp, struct cache *cac, void *env, parser *ps[], size_t n)
+many_(struct stream inp, struct cache *cac, void *env, parser *pa)
{
const char *bak;
for (;;) {
bak = inp.p;
- inp.p = seq_(inp, cac, env, ps, n);
+ inp.p = pa(inp, cac, env);
if (inp.p == NULL)
return bak;
assert(inp.p > bak); /* progress */
}
static inline const char *
-many1_(struct stream inp, struct cache *cac, void *env, parser *ps[], size_t n)
+many1_(struct stream inp, struct cache *cac, void *env, parser *pa)
{
- inp.p = seq_(inp, cac, env, ps, n);
+ inp.p = pa(inp, cac, env);
if (inp.p == NULL)
return NULL;
- return many_(inp, cac, env, ps, n);
+ return many_(inp, cac, env, pa);
}
+
#include <stdlib.h> /* calloc */
#include <limits.h> /* UINT_MAX */
#include <stdint.h> /* SIZE_MAX */
cache->capacity = newcap;
}
-static inline void
+static inline const void *
store(struct cache *cache, const void *key, const void *key2, const void *val)
{
struct memento *mem;
unsigned int h, s;
if (cache == NULL)
- return;
+ return val;
/* grow hashtable if necessary */
if (cache->nused >= cache->capacity * 9 / 10)
mem->key2 = key2;
mem->value = val;
cache->nused++;
-}
-static inline const char *
-memo_(struct stream inp, struct cache *cache, void *env, const char *sym,
- parser *ps[], size_t n)
-{
- struct memento *mem;
- const char *res;
-
- /* look up (pos,sym) in cache and return if found */
- mem = lookup(cache, inp.p, sym);
- if (mem != NULL)
- return mem->value;
-
- /* else run the argument sequence */
- res = seq_(inp, cache, env, ps, n);
-
- /* and store the result */
- store(cache, inp.p, sym, res);
-
- return res;
+ return val;
}
#endif /* MINIP_N_H_ */