Commit Diff


commit - ed4d5d022fc7fef0a97e5bcd973f93d3f5eaa331
commit + 43cc6062210e39cc517683aa2b419b2dc1a4d944
blob - 1722e03753bff277083f0fa53e6b78aab8d46692
blob + c7323c7d2b5a930b765a19eda9edbed6106bdd7e
--- nfa.c
+++ nfa.c
@@ -421,6 +421,9 @@ struct prep {
 	struct set next;	/* set of "next active" states */
 	struct set eps;		/* which states carry epsilon edges */
 
+	int pos;		/* current input position */
+	int match;		/* length of longest match so far */
+
 	int sz;
 	bunch_t buf[];
 };
@@ -432,6 +435,8 @@ nfareset(struct prep *pr)
 	pr->act.sz = pr->next.sz = pr->sz;
 	pr->act.bits = pr->buf;
 	pr->next.bits = pr->buf + pr->sz;
+	pr->pos = 0;
+	pr->match = -1;		/* no match */
 
 	memset(pr->act.bits, 0, pr->act.sz * sizeof(bunch_t));
 	setinsert(&pr->act, 0);
@@ -482,7 +487,7 @@ nfaprep(NFA a, sub_t *sub, out_t *out, void *env)
  * returns 1 iff any states remain in the 'work' set, 0 otherwise.
  */
 static int
-epsilon_step(struct prep *pr, struct set *act, struct set *work, const char *p)
+epsilon_step(struct prep *pr, struct set *act, struct set *work)
 {
 	NFA a = pr->a;
 	struct state s;
@@ -519,7 +524,7 @@ epsilon_step(struct prep *pr, struct set *act, struct 
 			 */
 			if (s.chr.base == 0 ||
 			    (pr->sub != NULL &&
-			     pr->sub(s.chr.base, k, p, pr->env))) {
+			     pr->sub(s.chr.base, k, pr->pos, pr->env))) {
 				x = k + s.edge_chr;
 				setinsert(work, x);
 				setinsert(act, x);
@@ -552,13 +557,13 @@ epsilon_step(struct prep *pr, struct set *act, struct 
 }
 
 static void
-epsilon_closure(struct prep *pr, struct set *act, struct set *wrk, const char *p)
+epsilon_closure(struct prep *pr, struct set *act, struct set *wrk)
 {
 	/* initial workset = initial active set */
 	assert(wrk->sz == act->sz);
 	memcpy(wrk->bits, act->bits, act->sz * sizeof(bunch_t));
 
-	while (epsilon_step(pr, act, wrk, p));
+	while (epsilon_step(pr, act, wrk));
 	/* NB: wrk is now empty */
 }
 
@@ -625,7 +630,7 @@ matchchr(NFA a, const struct set *act, struct set *nex
  * returns the symbol produced or -1 if no output is possible.
  */
 static int
-generate(struct prep *pr, const struct set *act, const char *p)
+generate(struct prep *pr, const struct set *act)
 {
 	struct range r;
 	size_t k;
@@ -643,7 +648,8 @@ generate(struct prep *pr, const struct set *act, const
 		assert(r.base + r.n >= OUTPUT);		/* all output */
 
 		if (pr->out != NULL) {
-			x = pr->out(r, k, p, pr->env);	/* attempt output */
+			/* attempt output */
+			x = pr->out(r, k, pr->pos, pr->env);
 			if (x != -1)			/* success? */
 				return x;
 		}
@@ -652,66 +658,72 @@ generate(struct prep *pr, const struct set *act, const
 	return -1;					/* no output possible */
 }
 
-int
-nfacont(struct prep *pr, const char *input, size_t sz)
+static int
+nfastep(struct prep *pr, int x, struct set *act, struct set *next)
 {
 	NFA a = pr->a;
-	struct set act, next;
 	bunch_t *tmp;
-	int i, x;
-	int result = -1;				/* no match */
-	const char *p;
+	int r = 0;				/* consume input? */
 
-	/* get state sets from prep */
-	act = pr->act;
-	next = pr->next;
+	printstates(act, x);			/* diagnostic output */
 
-	/* iterate over the input */
-	for(i = 0; i <= sz; ) {
-		p = input + i;				/* input pointer */
-		x = (i < sz ? *p : END);		/* input symbol */
-
-		printstates(&act, x);			/* diagnostic output */
-
-		/*
-		 * form epsilon closure of act - also handles inner input.
-		 * uses, and incidentally clears, our 'next' set for its work.
-		 */
-		// XXX inner input not possible after last outer input
-		//     - but since last input symbol is END, this is okay
-		epsilon_closure(pr, &act, &next, p);
-
-		if (inset(&act, a.size))		/* final state? */
-			result = i;			/* save position */
-
-		if (matchchr(a, &act, &next, x))	/* consume input? */
-			i++;
-		else {
-			x = generate(pr, &act, p);	/* generate output? */
-			if (x == -1)
-				break;	/* no output edges here */
-			if (!matchchr(a, &act, &next, x))	/* fill next */
-				break;	/* should not happen */
-		}
+	/*
+	 * form epsilon closure of act - also handles inner input.
+	 * uses, and incidentally clears, our 'next' set for its work.
+	 */
+	epsilon_closure(pr, act, next);
 
-		/* swap sets */
-		tmp = act.bits;
-		act.bits = next.bits;
-		next.bits = tmp;
+	if (inset(act, a.size))			/* final state? */
+		pr->match = pr->pos;		/* save position */
+
+	if (matchchr(a, act, next, x))		/* consume input? */
+		r = 1;
+	else {
+		x = generate(pr, act);		/* generate output? */
+		if (x == -1)
+			r = -1;			/* no output edges here */
+		if (!matchchr(a, act, next, x))	/* fill next */
+			r = -1;			/* should not happen */
 	}
 
-	/* store state sets back into prep for future use */
-	pr->act = act;
-	pr->next = next;
+	/* swap sets */
+	tmp = act->bits;
+	act->bits = next->bits;
+	next->bits = tmp;
 
-	return result;
+	return r;
 }
 
 int
+nfacont(struct prep *pr, const char *input, size_t sz)
+{
+	int i, r;
+
+	for(i = 0; i < sz; i += r) {
+		r = nfastep(pr, input[i], &pr->act, &pr->next);
+		if (r == -1)			/* no match */
+			break;
+		pr->pos += r;
+	}
+
+	return pr->match;
+}
+
+int
+nfaend(struct prep *pr)
+{
+	/* run until output ceases */
+	while(nfastep(pr, END, &pr->act, &pr->next) == 0);
+	return pr->match;
+}
+
+int
 nfarun(struct prep *pr, const char *input, size_t sz)
 {
 	nfareset(pr);
-	return nfacont(pr, input, sz);
+	nfacont(pr, input, sz);
+	nfaend(pr);
+	return pr->match;
 }
 
 int
blob - 22a469b798cb78f7b90b30d82f3a274e4ddf577b
blob + 32fe3ed0306698b10a5c55d7fc0a7f170c32983d
--- nfa.h
+++ nfa.h
@@ -63,13 +63,14 @@ NFA star(NFA);
 
 /* running NFAs */
 
-typedef int sub_t(int symbol, size_t state, const char *pos, void *env);
-typedef int out_t(struct range r, size_t state, const char *pos, void *env);
+typedef int sub_t(int symbol, size_t state, int, void *env);
+typedef int out_t(struct range r, size_t state, int, void *env);
 
 struct prep;
 struct prep *nfaprep(NFA, sub_t *, out_t *, void *);
 void nfareset(struct prep *);
 int nfarun(struct prep *, const char *, size_t);
 int nfacont(struct prep *, const char *, size_t);
+int nfaend(struct prep *);
 
 int nfamatch(NFA, sub_t *, out_t *, void *, const char *, size_t);
blob - 109812125ad9e36833c3be6cb874930d70a822b6
blob + 5a9bec5e272c9ba8094181042bf6d785a5e6da23
--- test_nfa.c
+++ test_nfa.c
@@ -92,21 +92,16 @@ struct mind {
 };
 
 int
-sub(int symbol, size_t state, const char *pos, void *env_)
+sub(int symbol, size_t state, int i, void *env_)
 {
 	struct mind *env = env_;
 
-#if 0	/* diagnostic output */
-	size_t i;
 	assert(env->input != NULL);
-	assert(pos >= env->input);
-	i = pos - env->input;
-#endif
 
 	switch (symbol) {
 	case BEGWORD:
-		//printf("%8s[%3zu] pos %zu\n", "BEGWORD", state, i);
-		env->begword = pos;
+		//printf("%8s[%3zu] pos %d\n", "BEGWORD", state, i);
+		env->begword = env->input + i;
 		return 1;
 	}
 
@@ -114,29 +109,27 @@ sub(int symbol, size_t state, const char *pos, void *e
 }
 
 int
-out_print(struct range r, size_t state, const char *pos, void *env_)
+out_print(struct range r, size_t state, int i, void *env_)
 {
-	size_t i, l;
+	int l;
 	struct mind *env = env_;
 
 	assert(env->input != NULL);
-	assert(pos >= env->input);
-	i = pos - env->input;
 
 	/* inward output - actions */
 	assert(r.n == 1);
 	switch (r.base) {
 	case ATSIGN:
-		env->atsign = pos;
-		printf("%8s[%3zu] pos %zu\n", "ATSIGN", state, i - 1);
+		env->atsign = env->input + i;
+		printf("%8s[%3zu] pos %d\n", "ATSIGN", state, i - 1);
 		return r.base;
 	case WORD:
+		env->word = env->input + i;
 		assert(env->begword != NULL);
-		assert(env->begword < pos);
-		env->word = pos;
+		assert(env->begword < env->word);
 		l = env->word - env->begword;
-		printf("%8s[%3zu] pos %zu\t%.*s\n", "WORD", state, i - l,
-		    (int)l, env->begword);
+		printf("%8s[%3zu] pos %d\t%.*s\n", "WORD", state, i - l,
+		    l, env->begword);
 		return r.base;
 	}
 
@@ -144,23 +137,22 @@ out_print(struct range r, size_t state, const char *po
 }
 
 int
-out_bench(struct range r, size_t state, const char *pos, void *env_)
+out_bench(struct range r, size_t state, int i, void *env_)
 {
 	struct mind *env = env_;
 
 	assert(env->input != NULL);
-	assert(pos >= env->input);
 
 	/* inward output - actions */
 	assert(r.n == 1);
 	switch (r.base) {
 	case ATSIGN:
-		env->atsign = pos;
+		env->atsign = env->input + i;
 		return r.base;
 	case WORD:
+		env->word = env->input + i;
 		assert(env->begword != NULL);
-		assert(env->begword < pos);
-		env->word = pos;
+		assert(env->begword < env->word);
 		return r.base;
 	}