commit 30d8c294a20149ef5d6c21f9727e5a85c8a814d3 from: Sven M. Hallberg <pesco@khjk.org> date: Fri Feb 11 16:25:10 2022 UTC produce output while no input is possible commit - 43cc6062210e39cc517683aa2b419b2dc1a4d944 commit + 30d8c294a20149ef5d6c21f9727e5a85c8a814d3 blob - c7323c7d2b5a930b765a19eda9edbed6106bdd7e blob + a59e1746b69c19fb9b3313c1c3052b11fce2b306 --- nfa.c +++ nfa.c @@ -622,19 +622,24 @@ matchchr(NFA a, const struct set *act, struct set *nex /* * try to produce an output symbol given the current active set. + * also activate the corresponding target state in next. * * output is predicative, i.e. out() may reject a given range (return -1). * in that case we continue trying other possibilities. the first to succeed * wins. * - * returns the symbol produced or -1 if no output is possible. + * returns 0 on success or -1 if no output is possible. */ static int -generate(struct prep *pr, const struct set *act) +generate(struct prep *pr, const struct set *act, struct set *next) { + NFA a = pr->a; struct range r; size_t k; - int x; + int x, b; + + if (pr->out == NULL) + return -1; /* no output possible */ FOREACHSTATE(pr->a, k, act) { r = pr->a.state[k].chr; @@ -647,11 +652,11 @@ generate(struct prep *pr, const struct set *act) assert(r.n < INT_MAX - r.base); /* no overflow */ assert(r.base + r.n >= OUTPUT); /* all output */ - if (pr->out != NULL) { - /* attempt output */ - x = pr->out(r, k, pr->pos, pr->env); - if (x != -1) /* success? */ - return x; + x = pr->out(r, k, pr->pos, pr->env); /* attempt output */ + if (x != -1) { + b = matchchr(a, act, next, x); /* fill next */ + assert(b == 1); /* x can only match */ + return 0; /* success */ } } @@ -663,9 +668,9 @@ nfastep(struct prep *pr, int x, struct set *act, struc { NFA a = pr->a; bunch_t *tmp; - int r = 0; /* consume input? */ + int r = 0; /* consume input? */ - printstates(act, x); /* diagnostic output */ + printstates(act, x); /* diagnostic output */ /* * form epsilon closure of act - also handles inner input. @@ -673,18 +678,13 @@ nfastep(struct prep *pr, int x, struct set *act, struc */ epsilon_closure(pr, act, next); - if (inset(act, a.size)) /* final state? */ - pr->match = pr->pos; /* save position */ + if (inset(act, a.size)) /* final state? */ + pr->match = pr->pos; /* save position */ - if (matchchr(a, act, next, x)) /* consume input? */ + if (x < OUTPUT && matchchr(a, act, next, x)) /* match input? */ r = 1; - else { - x = generate(pr, act); /* generate output? */ - if (x == -1) - r = -1; /* no output edges here */ - if (!matchchr(a, act, next, x)) /* fill next */ - r = -1; /* should not happen */ - } + else + r = generate(pr, act, next); /* attempt output */ /* swap sets */ tmp = act->bits; @@ -694,6 +694,23 @@ nfastep(struct prep *pr, int x, struct set *act, struc return r; } +static int +inputactive(const struct prep *pr) +{ + struct state s; + size_t k; + + FOREACHSTATE(pr->a, k, &pr->act) { + s = pr->a.state[k]; + + /* NB: assume that chr does not cross range bounds */ + if (s.chr.n >= CHAR_MIN && s.chr.n <= END) + return 1; + } + + return 0; +} + int nfacont(struct prep *pr, const char *input, size_t sz) { @@ -706,13 +723,18 @@ nfacont(struct prep *pr, const char *input, size_t sz) pr->pos += r; } + /* run until output ceases */ + while(!inputactive(pr)) + if (nfastep(pr, OUTPUT, &pr->act, &pr->next) == -1) + break; + return pr->match; } int nfaend(struct prep *pr) { - /* run until output ceases */ + /* run until output ceases or END matches */ while(nfastep(pr, END, &pr->act, &pr->next) == 0); return pr->match; }