commit d06d6aeb4fa8728f00ec1c65372e083ef15bdc1e from: Sven M. Hallberg date: Thu Mar 24 19:13:59 2022 UTC switch to unsigned char note: char can be portably converted to unsigned char (modulo arithmetic) but the converse is not true (undefined behavior)! commit - 1ffbb49cc704cab9fe4226cc3e2ea0f95cb1b4e3 commit + d06d6aeb4fa8728f00ec1c65372e083ef15bdc1e blob - 271ffcea40c0d6daeceae6d7df3da694ff0a19d2 blob + d9f741cecdf57b87a0fe389aac6ebfa3c13c6d4b --- nfa.c +++ nfa.c @@ -62,21 +62,21 @@ range_(int x, int y) /* [x, y) */ } NFA -range(char min, char max) +range(unsigned char min, unsigned char max) { assert (min <= max); return range_(min, max + 1); } NFA -nrange(char min, char max) +nrange(unsigned char min, unsigned char max) { assert (min <= max); return range_(max + 1, min); /* negative range */ } NFA -chr(char x) +chr(unsigned char x) { return range(x, x); } @@ -84,14 +84,14 @@ chr(char x) NFA any(void) { - return range_(CHAR_MIN, OUTPUT); /* any input symbol */ + return range_(0, OUTPUT); /* any input symbol */ /* i.e. everything up to END */ } NFA symbol(int x) /* input only */ { - assert (x >= CHAR_MIN); + assert (x >= 0); assert (x < OUTPUT); return range_(x, x + 1); } @@ -136,7 +136,7 @@ think(int x) * iteration in epsilon_closure() below. */ assert (x >= INNER); - assert (x < CHAR_MIN); + assert (x < 0); return range_(x, x); } @@ -772,10 +772,11 @@ nfastep(struct prep *pr, int x, struct set *act, struc int nfacont(struct prep *pr, const char *input, size_t sz) { - int i, r; + int i, r, x; for(i = 0; i < sz; i += r) { - r = nfastep(pr, input[i], &pr->act, &pr->next); + x = (unsigned char)input[i]; + r = nfastep(pr, x, &pr->act, &pr->next); if (r == -1) /* no match */ break; pr->pos += r; blob - 34a70ddea66684a948f180773f4061be2d0cf89c blob + 90937283569bbf8a3d2a6f72b575ecde791bd44b --- nfa.h +++ nfa.h @@ -2,19 +2,19 @@ * pesco 2021-2022, isc license */ -#include /* INT_MAX, INT_MIN */ +#include /* UCHAR_MAX, INT_MAX, INT_MIN */ #include /* size_t */ -#if CHAR_MAX == INT_MAX /* it could happen */ -#error need INT_MAX > CHAR_MAX +#if UCHAR_MAX >= INT_MAX /* it could happen */ +#error need INT_MAX > UCHAR_MAX #endif /* - * Note: INT_MAX > CHAR_MAX implies sizeof(int) > sizeof(char), and since + * Note: INT_MAX > UCHAR_MAX implies sizeof(int) > sizeof(char), and since * sizeof(char) = 1, the number of bits in an int is n * CHAR_BIT with n > 1. * In other words, int has at least CHAR_BIT bits more than char. CHAR_BIT is * greater than 1 (actually, at least 8), so it follows that the number - * INT_MAX / 2 is still greater than CHAR_MAX (by at least 7 bits). + * INT_MAX / 2 is still greater than UCHAR_MAX (by at least 7 bits). */ #define END (INT_MAX / 2) /* special input symbol meaning end of input */ #define OUTPUT (END + 1) /* start of output range */ @@ -38,9 +38,9 @@ typedef struct nfa NFA; /* elementary NFAs */ NFA fail(void); -NFA range(char, char); -NFA nrange(char, char); -NFA chr(char); +NFA range(unsigned char, unsigned char); +NFA nrange(unsigned char, unsigned char); +NFA chr(unsigned char); NFA any(void); NFA symbol(int); /* input only */ NFA output(int);