commit 3ec4a81bdd81899ccab1606de9a343b576c775f1 from: Sven M. Hallberg date: Fri Aug 07 18:16:51 2020 UTC factor lookup(), store(), and grow() out of memo() commit - bebd7c2cb6957b68f5a5f2a124e95766dda2018c commit + 3ec4a81bdd81899ccab1606de9a343b576c775f1 blob - 9d2b286e385f476e21d96b592915d680646b7313 blob + c0944d6aa7cafe1ad2f542aa564623476f45d467 --- minip_n.h +++ minip_n.h @@ -93,13 +93,10 @@ typedef bool action(void *, void *, const char *, size /* an entry in the memoization hash table */ struct memento { - const void *mem_key; /* NULL: unused entry */ - const void *mem_key2; /* NULL: dummy entry -- may be reused */ - const void *mem_value; + const void *key; /* NULL: unused entry */ + const void *key2; /* NULL: dummy entry -- may be reused */ + const void *value; }; -#define mem_position mem_key -#define mem_symbol mem_key2 -#define mem_result mem_value struct cache { struct memento *table; @@ -215,128 +212,161 @@ many1_(struct stream inp, struct cache *cac, void *env #include // XXX debug -static inline const char * -memo_(struct stream inp, struct cache *cache, void *env, const char *sym, - parser *ps[], size_t n) +static inline unsigned int +hash(const void *key, const void *key2) { - struct memento *mem; - const char *res; + unsigned int h = 5831; /* djbhash; f(h,x) = 33h + x */ size_t i; - unsigned int h0 = 5831; /* djbhash; f(h,x) = 33h + x */ + + for (i=0; i < sizeof key; i++) + h = h * 33 + ((unsigned char *)&key)[i]; + for (i=0; i < sizeof key2; i++) + h = h * 33 + ((unsigned char *)&key2)[i]; + + return h; +} + +/* NB: + * the hash math is modulo cache->capacity, but when that is a power of + * two, we can run the intermediates modulo UINT_MAX+1. + */ + +static inline struct memento * +lookup(const struct cache *cache, const void *key, const void *key2) +{ + struct memento *mem; unsigned int h, s; + size_t i; if (cache == NULL) - return seq_(inp, cache, env, ps, n); + return NULL; - /* NB: - * the hash math is modulo cache->capacity, but when that is a power of - * two, we can run the intermediates modulo UINT_MAX+1. - */ + h = hash(key, key2); - /* hash (pos,sym) */ - for (i=0; i < sizeof inp.p; i++) - h0 = h0 * 33 + ((unsigned char *)&inp.p)[i]; - for (i=0; i < sizeof sym; i++) - h0 = h0 * 33 + ((unsigned char *)&sym)[i]; - - /* look up (pos,sym) in cache and return if found */ /* NB: count steps, since the table could be filled with dummies */ - for (i=0, h=h0, s=1; i < cache->capacity; i++, h+=s, s*=5) { + for (i=0, s=1; i < cache->capacity; i++, h+=s, s*=5) { mem = cache->table + (h % cache->capacity); - if (mem->mem_key == NULL) /* not in table */ + if (mem->key == NULL) /* not in table */ break; - if (mem->mem_key2 == NULL) /* dummy entry */ + if (mem->key2 == NULL) /* dummy entry */ continue; - if (mem->mem_position == inp.p && mem->mem_symbol == sym) - return mem->mem_result; /* match */ + if (mem->key == key && mem->key2 == key2) + return mem; /* match */ } - /* else run the argument sequence */ - res = seq_(inp, cache, env, ps, n); + return NULL; +} - /* grow hashtable if necessary */ - if (cache->nused >= cache->capacity * 9 / 10) { - struct memento *newp; - size_t newcap; - size_t j; +static inline void +grow(struct cache *cache) +{ + struct memento *mem, *newp; + unsigned int h, s; + size_t i, newcap; - /* realloc XXX nicer err handling */ - assert(cache->capacity <= UINT_MAX / 2); - assert(cache->capacity <= SIZE_MAX / sizeof *cache->table); - if (cache->capacity == 0) /* initial alloc */ - newcap = 4; - else - newcap = cache->capacity * 2; -#ifdef PROFGROW - size_t count=0, count2=0; - fprintf(stderr, "\tgrow at %zu/%zu -> newcap %zu\n", - cache->nused, cache->capacity, newcap); -#endif - newp = calloc(newcap, sizeof *cache->table); - assert(newp != NULL); - - /* relocate items */ - for (i=0; i < cache->capacity; i++) { - const char *pi = cache->table[i].mem_position; - const char *si = cache->table[i].mem_symbol; - - if (pi == NULL || si == NULL) /* unused or dummy */ - continue; - - /* hash the value (pi,si) in table cell i */ - h = 5831; - for (j=0; j < sizeof pi; j++) - h = h * 33 + ((unsigned char *)&pi)[j]; - for (j=0; si[j] != '\0'; j++) - h = h * 33 + (unsigned char)si[j]; - - /* find the correct place for it in the new table */ - for (s=1; ; h+=s, s*=5) { - h %= newcap; - mem = newp + h; - if (mem->mem_key == NULL) /* unused */ - break; - } + /* realloc XXX nicer err handling */ + assert(cache->capacity <= UINT_MAX / 2); + assert(cache->capacity <= SIZE_MAX / sizeof *cache->table); + if (cache->capacity == 0) /* initial alloc */ + newcap = 4; + else + newcap = cache->capacity * 2; #ifdef PROFGROW - if (s != 1) - count2++; - if (h != i) - count++; + size_t count=0, count2=0; + fprintf(stderr, "\tgrow at %zu/%zu -> newcap %zu\n", + cache->nused, cache->capacity, newcap); #endif + newp = calloc(newcap, sizeof *cache->table); + assert(newp != NULL); - /* copy the entry to the new array */ - *mem = cache->table[i]; + /* relocate items */ + for (i=0; i < cache->capacity; i++) { + const void *k = cache->table[i].key; + const char *k2 = cache->table[i].key2; + + if (k == NULL || k2 == NULL) /* unused or dummy */ + continue; + + /* find the correct place for (k,k2) in the new table */ + for (h = hash(k, k2), s=1; ; h+=s, s*=5) { + h %= newcap; + mem = newp + h; + if (mem->key == NULL) /* unused */ + break; } #ifdef PROFGROW - fprintf(stderr, "\tpos. changed on %zu/%zu entries = %.1f%%\n", - count, cache->nused, count * 100.0 / cache->nused); - fprintf(stderr, "\tnow %zu/%zu are off-base, i.e. %.1f%%\n", - count2, cache->nused, count2 * 100.0 / cache->nused); + if (s != 1) + count2++; + if (h != i) + count++; #endif - free(cache->table); - cache->table = newp; - cache->capacity = newcap; + /* copy the entry to the new array */ + *mem = cache->table[i]; } +#ifdef PROFGROW + fprintf(stderr, "\tpos. changed on %zu/%zu entries = %.1f%%\n", + count, cache->nused, count * 100.0 / cache->nused); + fprintf(stderr, "\tnow %zu/%zu are off-base, i.e. %.1f%%\n", + count2, cache->nused, count2 * 100.0 / cache->nused); +#endif + free(cache->table); + cache->table = newp; + cache->capacity = newcap; +} + +static inline void +store(struct cache *cache, const void *key, const void *key2, const void *val) +{ + struct memento *mem; + unsigned int h, s; + + if (cache == NULL) + return; + + /* grow hashtable if necessary */ + if (cache->nused >= cache->capacity * 9 / 10) + grow(cache); + /* find our place in the table */ - assert(cache->nused < cache->capacity); /* NB: loop will terminate because dummies count as free here */ - for (h=h0, s=1; ; h+=s, s*=5) { + h = hash(key, key2); + assert(cache->nused < cache->capacity); + for (s=1; ; h+=s, s*=5) { mem = cache->table + (h % cache->capacity); - if (mem->mem_key == NULL) /* unused entry */ + if (mem->key == NULL) /* unused entry */ break; - if (mem->mem_key2 == NULL) /* dummy entry */ - break; /* reuse! */ + if (mem->key2 == NULL) /* dummy entry */ + break; /* reuse! */ } /* save the result */ - assert(mem->mem_key == NULL || mem->mem_key2 == NULL); - mem->mem_position = inp.p; - mem->mem_symbol = sym; - mem->mem_result = res; + assert(mem->key == NULL || mem->key2 == NULL); + mem->key = key; + mem->key2 = key2; + mem->value = val; cache->nused++; +} +static inline const char * +memo_(struct stream inp, struct cache *cache, void *env, const char *sym, + parser *ps[], size_t n) +{ + struct memento *mem; + const char *res; + + /* look up (pos,sym) in cache and return if found */ + mem = lookup(cache, inp.p, sym); + if (mem != NULL) + return mem->value; + + /* else run the argument sequence */ + res = seq_(inp, cache, env, ps, n); + + /* and store the result */ + store(cache, inp.p, sym, res); + return res; }