Commit Diff


commit - de66aa24036fc3e84c90ac19bd56bef38e903358
commit + eb08a8335cd7f30115fa269134f3ca646fe30a31
blob - 8e6144e2fb5739dda4c25c3ec02fea82e713db47
blob + b3dafac618072d85714de9d1caca101445ee6f14
--- nfa.c
+++ nfa.c
@@ -445,21 +445,22 @@ printstates(const struct set *set, int x)
 }
 
 
-typedef int sub_t(struct range r, size_t state, const char *pos, void *env);
+typedef int sub_t(int symbol, size_t state, const char *pos, void *env);
+typedef int out_t(struct range r, size_t state, const char *pos, void *env);
 
 struct prep {
 	NFA a;
 	sub_t *sub;		/* the "subconscious" mind of the automaton */
-	void *env;		/* user-supplied environment for sub() */
+	out_t *out;		/* output subroutine */
+	void *env;		/* user-supplied environment for sub and out */
 	struct set eps;		/* which states carry epsilon edges */
-	int out[128];		/* for collecting possible output symbols */
 
 	int sz;
 	bunch_t buf[];
 };
 
 struct prep *
-nfaprep(NFA a, sub_t *sub, void *env)
+nfaprep(NFA a, sub_t *sub, out_t *out, void *env)
 {
 	struct prep *pr;
 	int setsz;
@@ -473,6 +474,7 @@ nfaprep(NFA a, sub_t *sub, void *env)
 	pr->a = a;
 	pr->sz = setsz;
 	pr->sub = sub;
+	pr->out = out;
 	pr->env = env;
 
 #if BITSET
@@ -537,7 +539,7 @@ epsilon_step(struct prep *pr, struct set *act, struct 
 			 */
 			if (s.chr.base == 0 ||
 			    (pr->sub != NULL &&
-			     pr->sub(s.chr, k, p, pr->env))) {
+			     pr->sub(s.chr.base, k, p, pr->env))) {
 				x = k + s.edge_chr;
 				setinsert(work, x);
 				setinsert(act, x);
@@ -633,14 +635,22 @@ matchchr(NFA a, const struct set *act, struct set *nex
 	return match;	/* i.e. "is next non-empty" */
 }
 
+/*
+ * try to produce an output symbol given the current active set.
+ *
+ * output is predicative, i.e. out() may reject a given range (return -1).
+ * in that case we continue trying other possibilities. the first to succeed
+ * wins.
+ *
+ * returns the symbol produced or -1 if no output is possible.
+ */
 int
 generate(struct prep *pr, const struct set *act, const char *p)
 {
 	struct range r;
-	size_t n, k;
+	size_t k;
 	int x;
 
-	n = 0;
 	FOREACHSTATE(pr->a, k, act) {
 		r = pr->a.state[k].chr;
 
@@ -652,26 +662,14 @@ generate(struct prep *pr, const struct set *act, const
 		assert(r.n < INT_MAX - r.base);		/* no overflow */
 		assert(r.base + r.n >= OUTPUT);		/* all output */
 
-		/* call sub() to reduce the range to one symbol (or none) */
-		if (pr->sub != NULL)
-			x = pr->sub(r, k, p, pr->env);
-		else
-			x = -1;
-
-		/*
-		 * collect the possible output symbols in pr->out.
-		 * NB: silently ignore any that would overflow the buffer.
-		 */
-		if (x >= r.base && x < r.base + r.n && n < sizeof pr->out)
-			pr->out[n++] = x;
+		if (pr->out != NULL) {
+			x = pr->out(r, k, p, pr->env);	/* attempt output */
+			if (x != -1)			/* success? */
+				return x;
+		}
 	}
-	if (n > 0) {
-		/* pick a random output symbol and return it */
-		x = pr->out[n == 1 ? 0 : arc4random() % n];
-		// XXX call out() on x - here or in match_()?
-		return x;
-	} else
-		return -1;
+
+	return -1;					/* no output possible */
 }
 
 int
@@ -716,7 +714,7 @@ match_(struct prep *pr, const char *input, size_t sz)
 			x = generate(pr, &act, p);	/* generate output? */
 			if (x == -1)
 				break;	/* no output edges here */
-			if (!matchchr(a, &act, &next, x))
+			if (!matchchr(a, &act, &next, x))	/* fill next */
 				break;	/* should not happen */
 		}
 
@@ -730,12 +728,12 @@ match_(struct prep *pr, const char *input, size_t sz)
 }
 
 int
-match(NFA a, sub_t *sub, void *env, const char *input, size_t sz)
+match(NFA a, sub_t *sub, out_t *out, void *env, const char *input, size_t sz)
 {
 	struct prep *pr;
 	int result;
 
-	pr = nfaprep(a, sub, env);
+	pr = nfaprep(a, sub, out, env);
 	result = match_(pr, input, sz);
 	free(pr);
 
@@ -823,44 +821,59 @@ enum ausgaben {
 };
 
 struct mind {
-	const char *input, *word, *wordend;
+	const char *input, *begword, *word, *atsign;
 };
 
 int
-sub(struct range r, size_t state, const char *pos, void *env_)
+sub(int symbol, size_t state, const char *pos, void *env_)
 {
-#if 1	/* diagnostic output */
-	size_t i, l;
 	struct mind *env = env_;
 
+#if 0	/* diagnostic output */
+	size_t i;
 	assert(env->input != NULL);
 	assert(pos >= env->input);
 	i = pos - env->input;
+#endif
 
-	switch (r.base) {
-	/* inner input */
+	switch (symbol) {
 	case BEGWORD:
 		//printf("%8s[%3zu] pos %zu\n", "BEGWORD", state, i);
-		env->word = pos;
-		return r.base;
+		env->begword = pos;
+		return 1;
+	}
+
+	return 0;	/* my mind is blank */
+}
+
+int
+out(struct range r, size_t state, const char *pos, void *env_)
+{
+	size_t i, l;
+	struct mind *env = env_;
+
+	assert(env->input != NULL);
+	assert(pos >= env->input);
+	i = pos - env->input;
+
 	/* inward output - actions */
+	assert(r.n == 1);
+	switch (r.base) {
 	case ATSIGN:
-		//printf("%8s[%3zu] pos %zu\n", "ATSIGN", state, i - 1);
+		env->atsign = pos;
+		printf("%8s[%3zu] pos %zu\n", "ATSIGN", state, i - 1);
 		return r.base;
 	case WORD:
-		assert(env->word != NULL);
-		assert(env->word < pos);
-		l = pos - env->word;
-		//printf("%8s[%3zu] pos %zu\t%.*s\n", "WORD", state, i - l,
-		//    (int)l, env->word);
-		env->wordend = pos;
+		assert(env->begword != NULL);
+		assert(env->begword < pos);
+		env->word = pos;
+		l = env->word - env->begword;
+		printf("%8s[%3zu] pos %zu\t%.*s\n", "WORD", state, i - l,
+		    (int)l, env->begword);
 		return r.base;
 	}
 
-	return 0;	/* my mind is blank */
-#else
-	return r.base;	/* anything goes */
-#endif
+	return -1;	/* what can i do? */
 }
 
 #include <time.h>	/* clock_gettime */
@@ -895,7 +908,7 @@ benchmark(NFA a)
 	size_t sz;
 
 	env.input = inputs[0];
-	pr = nfaprep(a, sub, &env);
+	pr = nfaprep(a, sub, out, &env);
 
 	/* warmup */
 	sz = strlen(inputs[0]);
@@ -972,7 +985,7 @@ main(int argc, char **argv)
 		inp = argv[i];
 		sz = strlen(inp);
 		env.input = inp;
-		n = match(nfa, sub, &env, inp, sz);
+		n = match(nfa, sub, out, &env, inp, sz);
 		if (n == -1)
 			printf("no ");
 		else if (n < sz)