commit - ed4d5d022fc7fef0a97e5bcd973f93d3f5eaa331
commit + 43cc6062210e39cc517683aa2b419b2dc1a4d944
blob - 1722e03753bff277083f0fa53e6b78aab8d46692
blob + c7323c7d2b5a930b765a19eda9edbed6106bdd7e
--- nfa.c
+++ nfa.c
struct set next; /* set of "next active" states */
struct set eps; /* which states carry epsilon edges */
+ int pos; /* current input position */
+ int match; /* length of longest match so far */
+
int sz;
bunch_t buf[];
};
pr->act.sz = pr->next.sz = pr->sz;
pr->act.bits = pr->buf;
pr->next.bits = pr->buf + pr->sz;
+ pr->pos = 0;
+ pr->match = -1; /* no match */
memset(pr->act.bits, 0, pr->act.sz * sizeof(bunch_t));
setinsert(&pr->act, 0);
* returns 1 iff any states remain in the 'work' set, 0 otherwise.
*/
static int
-epsilon_step(struct prep *pr, struct set *act, struct set *work, const char *p)
+epsilon_step(struct prep *pr, struct set *act, struct set *work)
{
NFA a = pr->a;
struct state s;
*/
if (s.chr.base == 0 ||
(pr->sub != NULL &&
- pr->sub(s.chr.base, k, p, pr->env))) {
+ pr->sub(s.chr.base, k, pr->pos, pr->env))) {
x = k + s.edge_chr;
setinsert(work, x);
setinsert(act, x);
}
static void
-epsilon_closure(struct prep *pr, struct set *act, struct set *wrk, const char *p)
+epsilon_closure(struct prep *pr, struct set *act, struct set *wrk)
{
/* initial workset = initial active set */
assert(wrk->sz == act->sz);
memcpy(wrk->bits, act->bits, act->sz * sizeof(bunch_t));
- while (epsilon_step(pr, act, wrk, p));
+ while (epsilon_step(pr, act, wrk));
/* NB: wrk is now empty */
}
* returns the symbol produced or -1 if no output is possible.
*/
static int
-generate(struct prep *pr, const struct set *act, const char *p)
+generate(struct prep *pr, const struct set *act)
{
struct range r;
size_t k;
assert(r.base + r.n >= OUTPUT); /* all output */
if (pr->out != NULL) {
- x = pr->out(r, k, p, pr->env); /* attempt output */
+ /* attempt output */
+ x = pr->out(r, k, pr->pos, pr->env);
if (x != -1) /* success? */
return x;
}
return -1; /* no output possible */
}
-int
-nfacont(struct prep *pr, const char *input, size_t sz)
+static int
+nfastep(struct prep *pr, int x, struct set *act, struct set *next)
{
NFA a = pr->a;
- struct set act, next;
bunch_t *tmp;
- int i, x;
- int result = -1; /* no match */
- const char *p;
+ int r = 0; /* consume input? */
- /* get state sets from prep */
- act = pr->act;
- next = pr->next;
+ printstates(act, x); /* diagnostic output */
- /* iterate over the input */
- for(i = 0; i <= sz; ) {
- p = input + i; /* input pointer */
- x = (i < sz ? *p : END); /* input symbol */
-
- printstates(&act, x); /* diagnostic output */
-
- /*
- * form epsilon closure of act - also handles inner input.
- * uses, and incidentally clears, our 'next' set for its work.
- */
- // XXX inner input not possible after last outer input
- // - but since last input symbol is END, this is okay
- epsilon_closure(pr, &act, &next, p);
-
- if (inset(&act, a.size)) /* final state? */
- result = i; /* save position */
-
- if (matchchr(a, &act, &next, x)) /* consume input? */
- i++;
- else {
- x = generate(pr, &act, p); /* generate output? */
- if (x == -1)
- break; /* no output edges here */
- if (!matchchr(a, &act, &next, x)) /* fill next */
- break; /* should not happen */
- }
+ /*
+ * form epsilon closure of act - also handles inner input.
+ * uses, and incidentally clears, our 'next' set for its work.
+ */
+ epsilon_closure(pr, act, next);
- /* swap sets */
- tmp = act.bits;
- act.bits = next.bits;
- next.bits = tmp;
+ if (inset(act, a.size)) /* final state? */
+ pr->match = pr->pos; /* save position */
+
+ if (matchchr(a, act, next, x)) /* consume input? */
+ r = 1;
+ else {
+ x = generate(pr, act); /* generate output? */
+ if (x == -1)
+ r = -1; /* no output edges here */
+ if (!matchchr(a, act, next, x)) /* fill next */
+ r = -1; /* should not happen */
}
- /* store state sets back into prep for future use */
- pr->act = act;
- pr->next = next;
+ /* swap sets */
+ tmp = act->bits;
+ act->bits = next->bits;
+ next->bits = tmp;
- return result;
+ return r;
}
int
+nfacont(struct prep *pr, const char *input, size_t sz)
+{
+ int i, r;
+
+ for(i = 0; i < sz; i += r) {
+ r = nfastep(pr, input[i], &pr->act, &pr->next);
+ if (r == -1) /* no match */
+ break;
+ pr->pos += r;
+ }
+
+ return pr->match;
+}
+
+int
+nfaend(struct prep *pr)
+{
+ /* run until output ceases */
+ while(nfastep(pr, END, &pr->act, &pr->next) == 0);
+ return pr->match;
+}
+
+int
nfarun(struct prep *pr, const char *input, size_t sz)
{
nfareset(pr);
- return nfacont(pr, input, sz);
+ nfacont(pr, input, sz);
+ nfaend(pr);
+ return pr->match;
}
int
blob - 22a469b798cb78f7b90b30d82f3a274e4ddf577b
blob + 32fe3ed0306698b10a5c55d7fc0a7f170c32983d
--- nfa.h
+++ nfa.h
/* running NFAs */
-typedef int sub_t(int symbol, size_t state, const char *pos, void *env);
-typedef int out_t(struct range r, size_t state, const char *pos, void *env);
+typedef int sub_t(int symbol, size_t state, int, void *env);
+typedef int out_t(struct range r, size_t state, int, void *env);
struct prep;
struct prep *nfaprep(NFA, sub_t *, out_t *, void *);
void nfareset(struct prep *);
int nfarun(struct prep *, const char *, size_t);
int nfacont(struct prep *, const char *, size_t);
+int nfaend(struct prep *);
int nfamatch(NFA, sub_t *, out_t *, void *, const char *, size_t);
blob - 109812125ad9e36833c3be6cb874930d70a822b6
blob + 5a9bec5e272c9ba8094181042bf6d785a5e6da23
--- test_nfa.c
+++ test_nfa.c
};
int
-sub(int symbol, size_t state, const char *pos, void *env_)
+sub(int symbol, size_t state, int i, void *env_)
{
struct mind *env = env_;
-#if 0 /* diagnostic output */
- size_t i;
assert(env->input != NULL);
- assert(pos >= env->input);
- i = pos - env->input;
-#endif
switch (symbol) {
case BEGWORD:
- //printf("%8s[%3zu] pos %zu\n", "BEGWORD", state, i);
- env->begword = pos;
+ //printf("%8s[%3zu] pos %d\n", "BEGWORD", state, i);
+ env->begword = env->input + i;
return 1;
}
}
int
-out_print(struct range r, size_t state, const char *pos, void *env_)
+out_print(struct range r, size_t state, int i, void *env_)
{
- size_t i, l;
+ int l;
struct mind *env = env_;
assert(env->input != NULL);
- assert(pos >= env->input);
- i = pos - env->input;
/* inward output - actions */
assert(r.n == 1);
switch (r.base) {
case ATSIGN:
- env->atsign = pos;
- printf("%8s[%3zu] pos %zu\n", "ATSIGN", state, i - 1);
+ env->atsign = env->input + i;
+ printf("%8s[%3zu] pos %d\n", "ATSIGN", state, i - 1);
return r.base;
case WORD:
+ env->word = env->input + i;
assert(env->begword != NULL);
- assert(env->begword < pos);
- env->word = pos;
+ assert(env->begword < env->word);
l = env->word - env->begword;
- printf("%8s[%3zu] pos %zu\t%.*s\n", "WORD", state, i - l,
- (int)l, env->begword);
+ printf("%8s[%3zu] pos %d\t%.*s\n", "WORD", state, i - l,
+ l, env->begword);
return r.base;
}
}
int
-out_bench(struct range r, size_t state, const char *pos, void *env_)
+out_bench(struct range r, size_t state, int i, void *env_)
{
struct mind *env = env_;
assert(env->input != NULL);
- assert(pos >= env->input);
/* inward output - actions */
assert(r.n == 1);
switch (r.base) {
case ATSIGN:
- env->atsign = pos;
+ env->atsign = env->input + i;
return r.base;
case WORD:
+ env->word = env->input + i;
assert(env->begword != NULL);
- assert(env->begword < pos);
- env->word = pos;
+ assert(env->begword < env->word);
return r.base;
}