commit - de66aa24036fc3e84c90ac19bd56bef38e903358
commit + eb08a8335cd7f30115fa269134f3ca646fe30a31
blob - 8e6144e2fb5739dda4c25c3ec02fea82e713db47
blob + b3dafac618072d85714de9d1caca101445ee6f14
--- nfa.c
+++ nfa.c
}
-typedef int sub_t(struct range r, size_t state, const char *pos, void *env);
+typedef int sub_t(int symbol, size_t state, const char *pos, void *env);
+typedef int out_t(struct range r, size_t state, const char *pos, void *env);
struct prep {
NFA a;
sub_t *sub; /* the "subconscious" mind of the automaton */
- void *env; /* user-supplied environment for sub() */
+ out_t *out; /* output subroutine */
+ void *env; /* user-supplied environment for sub and out */
struct set eps; /* which states carry epsilon edges */
- int out[128]; /* for collecting possible output symbols */
int sz;
bunch_t buf[];
};
struct prep *
-nfaprep(NFA a, sub_t *sub, void *env)
+nfaprep(NFA a, sub_t *sub, out_t *out, void *env)
{
struct prep *pr;
int setsz;
pr->a = a;
pr->sz = setsz;
pr->sub = sub;
+ pr->out = out;
pr->env = env;
#if BITSET
*/
if (s.chr.base == 0 ||
(pr->sub != NULL &&
- pr->sub(s.chr, k, p, pr->env))) {
+ pr->sub(s.chr.base, k, p, pr->env))) {
x = k + s.edge_chr;
setinsert(work, x);
setinsert(act, x);
return match; /* i.e. "is next non-empty" */
}
+/*
+ * try to produce an output symbol given the current active set.
+ *
+ * output is predicative, i.e. out() may reject a given range (return -1).
+ * in that case we continue trying other possibilities. the first to succeed
+ * wins.
+ *
+ * returns the symbol produced or -1 if no output is possible.
+ */
int
generate(struct prep *pr, const struct set *act, const char *p)
{
struct range r;
- size_t n, k;
+ size_t k;
int x;
- n = 0;
FOREACHSTATE(pr->a, k, act) {
r = pr->a.state[k].chr;
assert(r.n < INT_MAX - r.base); /* no overflow */
assert(r.base + r.n >= OUTPUT); /* all output */
- /* call sub() to reduce the range to one symbol (or none) */
- if (pr->sub != NULL)
- x = pr->sub(r, k, p, pr->env);
- else
- x = -1;
-
- /*
- * collect the possible output symbols in pr->out.
- * NB: silently ignore any that would overflow the buffer.
- */
- if (x >= r.base && x < r.base + r.n && n < sizeof pr->out)
- pr->out[n++] = x;
+ if (pr->out != NULL) {
+ x = pr->out(r, k, p, pr->env); /* attempt output */
+ if (x != -1) /* success? */
+ return x;
+ }
}
- if (n > 0) {
- /* pick a random output symbol and return it */
- x = pr->out[n == 1 ? 0 : arc4random() % n];
- // XXX call out() on x - here or in match_()?
- return x;
- } else
- return -1;
+
+ return -1; /* no output possible */
}
int
x = generate(pr, &act, p); /* generate output? */
if (x == -1)
break; /* no output edges here */
- if (!matchchr(a, &act, &next, x))
+ if (!matchchr(a, &act, &next, x)) /* fill next */
break; /* should not happen */
}
}
int
-match(NFA a, sub_t *sub, void *env, const char *input, size_t sz)
+match(NFA a, sub_t *sub, out_t *out, void *env, const char *input, size_t sz)
{
struct prep *pr;
int result;
- pr = nfaprep(a, sub, env);
+ pr = nfaprep(a, sub, out, env);
result = match_(pr, input, sz);
free(pr);
};
struct mind {
- const char *input, *word, *wordend;
+ const char *input, *begword, *word, *atsign;
};
int
-sub(struct range r, size_t state, const char *pos, void *env_)
+sub(int symbol, size_t state, const char *pos, void *env_)
{
-#if 1 /* diagnostic output */
- size_t i, l;
struct mind *env = env_;
+#if 0 /* diagnostic output */
+ size_t i;
assert(env->input != NULL);
assert(pos >= env->input);
i = pos - env->input;
+#endif
- switch (r.base) {
- /* inner input */
+ switch (symbol) {
case BEGWORD:
//printf("%8s[%3zu] pos %zu\n", "BEGWORD", state, i);
- env->word = pos;
- return r.base;
+ env->begword = pos;
+ return 1;
+ }
+
+ return 0; /* my mind is blank */
+}
+
+int
+out(struct range r, size_t state, const char *pos, void *env_)
+{
+ size_t i, l;
+ struct mind *env = env_;
+
+ assert(env->input != NULL);
+ assert(pos >= env->input);
+ i = pos - env->input;
+
/* inward output - actions */
+ assert(r.n == 1);
+ switch (r.base) {
case ATSIGN:
- //printf("%8s[%3zu] pos %zu\n", "ATSIGN", state, i - 1);
+ env->atsign = pos;
+ printf("%8s[%3zu] pos %zu\n", "ATSIGN", state, i - 1);
return r.base;
case WORD:
- assert(env->word != NULL);
- assert(env->word < pos);
- l = pos - env->word;
- //printf("%8s[%3zu] pos %zu\t%.*s\n", "WORD", state, i - l,
- // (int)l, env->word);
- env->wordend = pos;
+ assert(env->begword != NULL);
+ assert(env->begword < pos);
+ env->word = pos;
+ l = env->word - env->begword;
+ printf("%8s[%3zu] pos %zu\t%.*s\n", "WORD", state, i - l,
+ (int)l, env->begword);
return r.base;
}
- return 0; /* my mind is blank */
-#else
- return r.base; /* anything goes */
-#endif
+ return -1; /* what can i do? */
}
#include <time.h> /* clock_gettime */
size_t sz;
env.input = inputs[0];
- pr = nfaprep(a, sub, &env);
+ pr = nfaprep(a, sub, out, &env);
/* warmup */
sz = strlen(inputs[0]);
inp = argv[i];
sz = strlen(inp);
env.input = inp;
- n = match(nfa, sub, &env, inp, sz);
+ n = match(nfa, sub, out, &env, inp, sz);
if (n == -1)
printf("no ");
else if (n < sz)