commit 30d8c294a20149ef5d6c21f9727e5a85c8a814d3
from: Sven M. Hallberg <pesco@khjk.org>
date: Fri Feb 11 16:25:10 2022 UTC

produce output while no input is possible

commit - 43cc6062210e39cc517683aa2b419b2dc1a4d944
commit + 30d8c294a20149ef5d6c21f9727e5a85c8a814d3
blob - c7323c7d2b5a930b765a19eda9edbed6106bdd7e
blob + a59e1746b69c19fb9b3313c1c3052b11fce2b306
--- nfa.c
+++ nfa.c
@@ -622,19 +622,24 @@ matchchr(NFA a, const struct set *act, struct set *nex
 
 /*
  * try to produce an output symbol given the current active set.
+ * also activate the corresponding target state in next.
  *
  * output is predicative, i.e. out() may reject a given range (return -1).
  * in that case we continue trying other possibilities. the first to succeed
  * wins.
  *
- * returns the symbol produced or -1 if no output is possible.
+ * returns 0 on success or -1 if no output is possible.
  */
 static int
-generate(struct prep *pr, const struct set *act)
+generate(struct prep *pr, const struct set *act, struct set *next)
 {
+	NFA a = pr->a;
 	struct range r;
 	size_t k;
-	int x;
+	int x, b;
+
+	if (pr->out == NULL)
+		return -1;				/* no output possible */
 
 	FOREACHSTATE(pr->a, k, act) {
 		r = pr->a.state[k].chr;
@@ -647,11 +652,11 @@ generate(struct prep *pr, const struct set *act)
 		assert(r.n < INT_MAX - r.base);		/* no overflow */
 		assert(r.base + r.n >= OUTPUT);		/* all output */
 
-		if (pr->out != NULL) {
-			/* attempt output */
-			x = pr->out(r, k, pr->pos, pr->env);
-			if (x != -1)			/* success? */
-				return x;
+		x = pr->out(r, k, pr->pos, pr->env);	/* attempt output */
+		if (x != -1) {
+			b = matchchr(a, act, next, x);	/* fill next */
+			assert(b == 1);			/* x can only match */
+			return 0;			/* success */
 		}
 	}
 
@@ -663,9 +668,9 @@ nfastep(struct prep *pr, int x, struct set *act, struc
 {
 	NFA a = pr->a;
 	bunch_t *tmp;
-	int r = 0;				/* consume input? */
+	int r = 0;					/* consume input? */
 
-	printstates(act, x);			/* diagnostic output */
+	printstates(act, x);				/* diagnostic output */
 
 	/*
 	 * form epsilon closure of act - also handles inner input.
@@ -673,18 +678,13 @@ nfastep(struct prep *pr, int x, struct set *act, struc
 	 */
 	epsilon_closure(pr, act, next);
 
-	if (inset(act, a.size))			/* final state? */
-		pr->match = pr->pos;		/* save position */
+	if (inset(act, a.size))				/* final state? */
+		pr->match = pr->pos;			/* save position */
 
-	if (matchchr(a, act, next, x))		/* consume input? */
+	if (x < OUTPUT && matchchr(a, act, next, x))	/* match input? */
 		r = 1;
-	else {
-		x = generate(pr, act);		/* generate output? */
-		if (x == -1)
-			r = -1;			/* no output edges here */
-		if (!matchchr(a, act, next, x))	/* fill next */
-			r = -1;			/* should not happen */
-	}
+	else
+		r = generate(pr, act, next);		/* attempt output */
 
 	/* swap sets */
 	tmp = act->bits;
@@ -694,6 +694,23 @@ nfastep(struct prep *pr, int x, struct set *act, struc
 	return r;
 }
 
+static int
+inputactive(const struct prep *pr)
+{
+	struct state s;
+	size_t k;
+
+	FOREACHSTATE(pr->a, k, &pr->act) {
+		s = pr->a.state[k];
+
+		/* NB: assume that chr does not cross range bounds */
+		if (s.chr.n >= CHAR_MIN && s.chr.n <= END)
+			return 1;
+	}
+
+	return 0;
+}
+
 int
 nfacont(struct prep *pr, const char *input, size_t sz)
 {
@@ -706,13 +723,18 @@ nfacont(struct prep *pr, const char *input, size_t sz)
 		pr->pos += r;
 	}
 
+	/* run until output ceases */
+	while(!inputactive(pr))
+		if (nfastep(pr, OUTPUT, &pr->act, &pr->next) == -1)
+			break;
+
 	return pr->match;
 }
 
 int
 nfaend(struct prep *pr)
 {
-	/* run until output ceases */
+	/* run until output ceases or END matches */
 	while(nfastep(pr, END, &pr->act, &pr->next) == 0);
 	return pr->match;
 }