Commit Diff


commit - e10003ea19a486f1c252b5d5a9aec9e5ca8d683e
commit + d632b3b70d9b6e226557e450243d0efd5e39105a
blob - 3c0e10b3e2b91d31bda7df666cbfbbfb056092eb
blob + 56420c89d5fdd54c9f1cbb6811d56afea7b8674a
--- ini_n.c
+++ ini_n.c
@@ -103,24 +103,22 @@ DEF_(leftbr,	CHAR('['))
 DEF_(rightbr,	CHAR(']'))
 
 DEF_(eol,	CHOICE(nl, end))
-DEF_(bra,	MEMO(wss, leftbr))
+DEF_(bra,	SEQ(wss, leftbr))
 DEF_(ket,	SEQ(rightbr, wss))
 DEF_(comment,	OPT(semi, lchars))
 DEF_(sname,	MANY1(schar))
 DEF_(key,	MANY1(kchar))
 DEF_(value,	MANY1(lchar))
 
-DEF_(empty,	MEMO(wss, comment, nl))
+DEF_(empty,	SEQ(wss, comment, nl))
 DEF_(empties,	MANY(empty))
-DEF (header_,	TRY(empties, bra, sname, ket, eol))
-DEF_(header,	MEMO(header_))
-DEF (entry_,	TRY(empties, key, eq, value, eol))
-DEF_(entry,	MEMO(entry_))
+DEF_(header,	SEQ(empties, bra, sname, ket, eol))
+DEF_(entry,	SEQ(empties, key, eq, value, eol))
 DEF_(entries,	MANY(entry))
 
-DEF_(tail,	MEMO(eof, wss, eol, anys) || EPSILON)
+DEF_(tail,	OPT(eof, wss, eol, anys))
 DEF_(sects,	MANY(header, entries))
-DEF_(inifile,	SEQ(sects, empties, wss, tail))
+DEF_(inifile,	TRY(sects, empties, wss, tail))
 
 bool
 trace(void *ctx, void *env, const char *s, size_t len)
@@ -129,9 +127,6 @@ trace(void *ctx, void *env, const char *s, size_t len)
 	const char *begin = env;
 	size_t pos;
 
-	if (env == NULL)
-		return true;
-
 	pos = s - begin;
 	printf("%4zx: %4zu byte %s\n", pos, len, nt);
 	return true;
@@ -181,9 +176,6 @@ main(int argc, char *argv[])
 	assert(cache.table == NULL);
 	assert(cache.capacity == 0);
 	assert(cache.nused == 0);
-	//cache.capacity = 128;
-	//cache.table = calloc(cache.capacity, sizeof(cache.table[0]));
-	//assert(cache.table != NULL);
 
 	/* run parser */
 	p = inifile((struct stream){input, input + sz}, &cache, (void *)input);
blob - c0944d6aa7cafe1ad2f542aa564623476f45d467
blob + 36c111a0ae1ca28ace0c6759f83458c8ce5dd28c
--- minip_n.h
+++ minip_n.h
@@ -15,12 +15,23 @@ struct cache;
 typedef const char *parser(struct stream, struct cache *, void *);
 
 /* define a parser (nonterminal) */
-#define DEF(NT, EXPR)						\
-	static const char *					\
-	NT(struct stream inp_, struct cache *cac_, void *env_)	\
-	{							\
-		const char *p_ = inp_.p;			\
-		return (EXPR) ? p_ : NULL;			\
+#define DEF(NT, EXPR)							\
+	static const char *						\
+	NT(struct stream inp_, struct cache *cac_, void *env_)		\
+	{								\
+		const char *p_ = inp_.p;				\
+		struct memento *mem_;					\
+									\
+		/* look up (pos,sym) in cache and return if found */	\
+		if ((mem_ = lookup(cac_, inp_.p, __func__)) != NULL)	\
+			return mem_->value;				\
+									\
+		/* else run the expression and return if it matched */	\
+		if (EXPR)						\
+			return p_;					\
+									\
+		/* else, return and memoize a negative result */	\
+		return store(cac_, inp_.p, __func__, NULL);		\
 	}
 
 /* primitive expressions */
@@ -33,11 +44,11 @@ typedef const char *parser(struct stream, struct cache
 #define OMEGA		(p_ = inp_.end)
 
 /*
- * logical operators allowed on expressions:
+ * logical operators allowed on expressions *sans* actions:
  *
- *  ||	(ordered) choice; first match wins
  *  &&	restriction (lookahead); rhs is consumed
- *  !	negation; only allowed to the left of &&
+ *  ||	ordered choice
+ *  !	negation; only allowed to the left of && (to avoid consumption)
  */
 
 /* variable-length argument lists */
@@ -49,25 +60,38 @@ typedef const char *parser(struct stream, struct cache
 #define CHOICE(...)	(p_ = choice_(inp_, cac_, env_, PARGS(__VA_ARGS__)))
 #define SEQ(...)	(p_ = seq_(inp_, cac_, env_, PARGS(__VA_ARGS__)))
 
-/* derived combinators; arguments form an implicit SEQ */
-#define MANY(...)	(p_ = many_(inp_, cac_, env_, PARGS(__VA_ARGS__)))
-#define MANY1(...)	(p_ = many1_(inp_, cac_, env_, PARGS(__VA_ARGS__)))
-#define OPT(...)	(SEQ(__VA_ARGS__) || EPSILON)
+/* secondary combinators -- could be derived from the basics */
+#define MANY(X)		(p_ = many_(inp_, cac_, env_, X))
+#define MANY1(X)	(p_ = many1_(inp_, cac_, env_, X))
+#define OPT(X)		((p_ = (X)(inp_, cac_, env_)) || EPSILON)
 
+/* NB:
+ * because memoization only applies to nonterminals and the above combinators
+ * (MANY, MANY1, OPT) form implicit choices with them, we can't use variable
+ * argument lists that form an implicit SEQ here. otherwise, since those
+ * anonymous argument sequences would not be memoized (memoization only happens
+ * on actual nonterminals), actions could be executed in them, even if the
+ * sequence failed to match in a later element.
+ */
 
 #include <stdbool.h>
 
 /*
  * semantic actions:
  *
- * - attach to an expression with &&: (EXPR) && ACTION(f, ctx)
+ * - attach to an expression with &&: EXPR && ACTION(f, ctx)
  * - receive an arbitrary context parameter via ACTION()'s second argument.
  * - receive an arbitrary environment pointer from the top-level parser call.
  * - execute (only) after their subordinate parser(s) matched.
  * - can also act as validations by returning the intended result.
+ *
+ * Thanks to prerecognition and negative memoization, it is also possible to
+ * attach actions before the expression. E.g.:
+ *
+ *     ACTION(print, "[") && EXPR && ACTION(print, "]")
  */
 typedef bool action(void *, void *, const char *, size_t);
-#define ACTION(F, C)	(F)((C), env_, inp_.p, p_ - inp_.p)
+#define ACTION(F, C)	(env_ == NULL || (F)((C), env_, inp_.p, p_ - inp_.p))
 
 
 /*
@@ -82,14 +106,21 @@ typedef bool action(void *, void *, const char *, size
  * in any case, beware of the overhead. since TRY executes the sequence twice,
  * it can easily lead to a substantial increase in recognition time.
  */
-#define MATCH(...)	seq_(inp_, NULL, NULL, PARGS(__VA_ARGS__))
+#define MATCH(...)	seq_(inp_, cac_, NULL, PARGS(__VA_ARGS__))
 #define TRY(...)	((env_ == NULL || MATCH(__VA_ARGS__)) && SEQ(__VA_ARGS__))
 
 
 /*
- * memoization of parse results (~ packrat parsing)
+ * general memoization of parse results (~ packrat parsing)
+ *
+ * handles the positive cases, but only when actions are enabled, i.e. env
+ * pointer is non-null. otherwise, actions would be lost - cf. TRY().
+ *
+ * NB: negative results are always cached - see DEF().
+ *
+ * usage: DEF(nt, EXPR && MEMO)
  */
-#define MEMO(...)	(p_ = memo_(inp_, cac_, env_, __func__, PARGS(__VA_ARGS__)))
+#define MEMO	(env_ == NULL || store(cac_, inp_.p, __func__, p_))
 
 /* an entry in the memoization hash table */
 struct memento {
@@ -184,13 +215,13 @@ choice_(struct stream inp, struct cache *cac, void *en
 }
 
 static inline const char *
-many_(struct stream inp, struct cache *cac, void *env, parser *ps[], size_t n)
+many_(struct stream inp, struct cache *cac, void *env, parser *pa)
 {
 	const char *bak;
 
 	for (;;) {
 		bak = inp.p;
-		inp.p = seq_(inp, cac, env, ps, n);
+		inp.p = pa(inp, cac, env);
 		if (inp.p == NULL)
 			return bak;
 		assert(inp.p > bak);	/* progress */
@@ -198,14 +229,15 @@ many_(struct stream inp, struct cache *cac, void *env,
 }
 
 static inline const char *
-many1_(struct stream inp, struct cache *cac, void *env, parser *ps[], size_t n)
+many1_(struct stream inp, struct cache *cac, void *env, parser *pa)
 {
-	inp.p = seq_(inp, cac, env, ps, n);
+	inp.p = pa(inp, cac, env);
 	if (inp.p == NULL)
 		return NULL;
-	return many_(inp, cac, env, ps, n);
+	return many_(inp, cac, env, pa);
 }
 
+
 #include <stdlib.h>	/* calloc */
 #include <limits.h>	/* UINT_MAX */
 #include <stdint.h>	/* SIZE_MAX */
@@ -316,14 +348,14 @@ grow(struct cache *cache)
 	cache->capacity = newcap;
 }
 
-static inline void
+static inline const void *
 store(struct cache *cache, const void *key, const void *key2, const void *val)
 {
 	struct memento *mem;
 	unsigned int h, s;
 
 	if (cache == NULL)
-		return;
+		return val;
 
 	/* grow hashtable if necessary */
 	if (cache->nused >= cache->capacity * 9 / 10)
@@ -347,27 +379,8 @@ store(struct cache *cache, const void *key, const void
 	mem->key2 = key2;
 	mem->value = val;
 	cache->nused++;
-}
 
-static inline const char *
-memo_(struct stream inp, struct cache *cache, void *env, const char *sym,
-    parser *ps[], size_t n)
-{
-	struct memento *mem;
-	const char *res;
-
-	/* look up (pos,sym) in cache and return if found */
-	mem = lookup(cache, inp.p, sym);
-	if (mem != NULL)
-		return mem->value;
-
-	/* else run the argument sequence */
-	res = seq_(inp, cache, env, ps, n);
-
-	/* and store the result */
-	store(cache, inp.p, sym, res);
-
-	return res;
+	return val;
 }
 
 #endif /* MINIP_N_H_ */