commit eb08a8335cd7f30115fa269134f3ca646fe30a31 from: Sven M. Hallberg date: Fri Jan 28 17:02:11 2022 UTC separate sub() and out() commit - de66aa24036fc3e84c90ac19bd56bef38e903358 commit + eb08a8335cd7f30115fa269134f3ca646fe30a31 blob - 8e6144e2fb5739dda4c25c3ec02fea82e713db47 blob + b3dafac618072d85714de9d1caca101445ee6f14 --- nfa.c +++ nfa.c @@ -445,21 +445,22 @@ printstates(const struct set *set, int x) } -typedef int sub_t(struct range r, size_t state, const char *pos, void *env); +typedef int sub_t(int symbol, size_t state, const char *pos, void *env); +typedef int out_t(struct range r, size_t state, const char *pos, void *env); struct prep { NFA a; sub_t *sub; /* the "subconscious" mind of the automaton */ - void *env; /* user-supplied environment for sub() */ + out_t *out; /* output subroutine */ + void *env; /* user-supplied environment for sub and out */ struct set eps; /* which states carry epsilon edges */ - int out[128]; /* for collecting possible output symbols */ int sz; bunch_t buf[]; }; struct prep * -nfaprep(NFA a, sub_t *sub, void *env) +nfaprep(NFA a, sub_t *sub, out_t *out, void *env) { struct prep *pr; int setsz; @@ -473,6 +474,7 @@ nfaprep(NFA a, sub_t *sub, void *env) pr->a = a; pr->sz = setsz; pr->sub = sub; + pr->out = out; pr->env = env; #if BITSET @@ -537,7 +539,7 @@ epsilon_step(struct prep *pr, struct set *act, struct */ if (s.chr.base == 0 || (pr->sub != NULL && - pr->sub(s.chr, k, p, pr->env))) { + pr->sub(s.chr.base, k, p, pr->env))) { x = k + s.edge_chr; setinsert(work, x); setinsert(act, x); @@ -633,14 +635,22 @@ matchchr(NFA a, const struct set *act, struct set *nex return match; /* i.e. "is next non-empty" */ } +/* + * try to produce an output symbol given the current active set. + * + * output is predicative, i.e. out() may reject a given range (return -1). + * in that case we continue trying other possibilities. the first to succeed + * wins. + * + * returns the symbol produced or -1 if no output is possible. + */ int generate(struct prep *pr, const struct set *act, const char *p) { struct range r; - size_t n, k; + size_t k; int x; - n = 0; FOREACHSTATE(pr->a, k, act) { r = pr->a.state[k].chr; @@ -652,26 +662,14 @@ generate(struct prep *pr, const struct set *act, const assert(r.n < INT_MAX - r.base); /* no overflow */ assert(r.base + r.n >= OUTPUT); /* all output */ - /* call sub() to reduce the range to one symbol (or none) */ - if (pr->sub != NULL) - x = pr->sub(r, k, p, pr->env); - else - x = -1; - - /* - * collect the possible output symbols in pr->out. - * NB: silently ignore any that would overflow the buffer. - */ - if (x >= r.base && x < r.base + r.n && n < sizeof pr->out) - pr->out[n++] = x; + if (pr->out != NULL) { + x = pr->out(r, k, p, pr->env); /* attempt output */ + if (x != -1) /* success? */ + return x; + } } - if (n > 0) { - /* pick a random output symbol and return it */ - x = pr->out[n == 1 ? 0 : arc4random() % n]; - // XXX call out() on x - here or in match_()? - return x; - } else - return -1; + + return -1; /* no output possible */ } int @@ -716,7 +714,7 @@ match_(struct prep *pr, const char *input, size_t sz) x = generate(pr, &act, p); /* generate output? */ if (x == -1) break; /* no output edges here */ - if (!matchchr(a, &act, &next, x)) + if (!matchchr(a, &act, &next, x)) /* fill next */ break; /* should not happen */ } @@ -730,12 +728,12 @@ match_(struct prep *pr, const char *input, size_t sz) } int -match(NFA a, sub_t *sub, void *env, const char *input, size_t sz) +match(NFA a, sub_t *sub, out_t *out, void *env, const char *input, size_t sz) { struct prep *pr; int result; - pr = nfaprep(a, sub, env); + pr = nfaprep(a, sub, out, env); result = match_(pr, input, sz); free(pr); @@ -823,44 +821,59 @@ enum ausgaben { }; struct mind { - const char *input, *word, *wordend; + const char *input, *begword, *word, *atsign; }; int -sub(struct range r, size_t state, const char *pos, void *env_) +sub(int symbol, size_t state, const char *pos, void *env_) { -#if 1 /* diagnostic output */ - size_t i, l; struct mind *env = env_; +#if 0 /* diagnostic output */ + size_t i; assert(env->input != NULL); assert(pos >= env->input); i = pos - env->input; +#endif - switch (r.base) { - /* inner input */ + switch (symbol) { case BEGWORD: //printf("%8s[%3zu] pos %zu\n", "BEGWORD", state, i); - env->word = pos; - return r.base; + env->begword = pos; + return 1; + } + + return 0; /* my mind is blank */ +} + +int +out(struct range r, size_t state, const char *pos, void *env_) +{ + size_t i, l; + struct mind *env = env_; + + assert(env->input != NULL); + assert(pos >= env->input); + i = pos - env->input; + /* inward output - actions */ + assert(r.n == 1); + switch (r.base) { case ATSIGN: - //printf("%8s[%3zu] pos %zu\n", "ATSIGN", state, i - 1); + env->atsign = pos; + printf("%8s[%3zu] pos %zu\n", "ATSIGN", state, i - 1); return r.base; case WORD: - assert(env->word != NULL); - assert(env->word < pos); - l = pos - env->word; - //printf("%8s[%3zu] pos %zu\t%.*s\n", "WORD", state, i - l, - // (int)l, env->word); - env->wordend = pos; + assert(env->begword != NULL); + assert(env->begword < pos); + env->word = pos; + l = env->word - env->begword; + printf("%8s[%3zu] pos %zu\t%.*s\n", "WORD", state, i - l, + (int)l, env->begword); return r.base; } - return 0; /* my mind is blank */ -#else - return r.base; /* anything goes */ -#endif + return -1; /* what can i do? */ } #include /* clock_gettime */ @@ -895,7 +908,7 @@ benchmark(NFA a) size_t sz; env.input = inputs[0]; - pr = nfaprep(a, sub, &env); + pr = nfaprep(a, sub, out, &env); /* warmup */ sz = strlen(inputs[0]); @@ -972,7 +985,7 @@ main(int argc, char **argv) inp = argv[i]; sz = strlen(inp); env.input = inp; - n = match(nfa, sub, &env, inp, sz); + n = match(nfa, sub, out, &env, inp, sz); if (n == -1) printf("no "); else if (n < sz)