commit e46cabf4dabf47bee184092e4cdb372e9076f3ca from: Sven M. Hallberg date: Mon Dec 05 04:15:37 2022 UTC add ini_j (json output variant) commit - 6fdc24670880d6e8e15741e7a8b212b9474d6b6d commit + e46cabf4dabf47bee184092e4cdb372e9076f3ca blob - 17edf08425ceb13ebd2f487bcfd80cdf4816b93d blob + bcb4b3f4cd14e31b2406f1b529508c88664fbbbc --- Makefile +++ Makefile @@ -1,6 +1,6 @@ CFLAGS += -std=c99 -Wall -TARGETS = ini_r ini_a ini_m ini_n +TARGETS = ini_r ini_a ini_m ini_n ini_j all: $(TARGETS) @@ -8,6 +8,7 @@ ini_r : ini_r.c minip_r.h ini_a : ini_a.c minip_a.h ini_m : ini_m.c minip_m.h ini_n : ini_n.c minip_n.h +ini_j : ini_j.c minip_n.h clean: @@ -18,5 +19,6 @@ test: all ./ini_a test.ini | grep -v ' [01] byte ' ./ini_m test.ini | grep -v ' [01] byte ' ./ini_n test.ini | grep -v ' [01] byte ' + ./ini_j test.ini 2>/dev/null .PHONY: all test clean blob - /dev/null blob + 086e5b4eaa2fe24e2a2109830a8ff0a9824ed904 (mode 644) --- /dev/null +++ ini_j.c @@ -0,0 +1,267 @@ +/* + * demo: (simple) ini files, converted to json + * + * EBNFish: + * + * inifile = {sect} {empty} {ws} [tail] + * sect = header {entry} + * tail = "EOF" {ws} eol {any} + * + * header = {empty} bra sname ket eol + * entry = {empty} key "=" value eol + * empty = {ws} [comment] nl + * + * (* tokens *) + * eol = nl | end + * bra = {ws} "[" + * ket = "]" {ws} + * comment = ";" {lchar} + * sname = {lchar - "]"}+ + * key = {lchar - "="}+ + * value = {lchar}+ + * + * (* character classes *) + * lchar = ? ASCII codes 9 (HT), 0x20-0x7e (SP, alnum, punct) ? + * ws = ? ASCII codes 32 (SP), 9 (HT) ? + * nl = ? ASCII code 10 (LF) ? + * end = ? end of input ? + * any = ? any character ? + * + * converted to be compatible with our combinators: + * + * inifile = sects empties wss tail + * sect = header entries + * tail = eof wss eol anys + * sects = {sect} + * otail = [tail] + * + * header = empties bra sname ket eol + * entries = empties key eq value eol + * empties = wss comment nl + * entries = {entry} + * empties = {empty} + * + * eol = nl | end + * bra = wss leftbr + * ket = rightbr wss + * comment = semi lchars + * ocmnt = [comment] + * sname = {schar}+ + * key = {kchar}+ + * value = {lchar}+ + * + * eof = "EOF" + * eq = "=" + * semi = ";" + * leftbr = "[" + * rightbr = "]" + * + * lchars = {lchar} + * wss = {ws} + * anys = {any} + * + * lchar = ? ASCII codes 9 (HT), 0x20-0x7e (SP, alnum, punct) ? + * schar = ? ASCII codes 9, 0x20-0x5c, 0x5e-0x7e ? + * kchar = ? ASCII codes 9, 0x20-0x3c, 0x3e-0x7e ? + * ws = ? ASCII codes 32 (SP), 9 (HT) ? + * nl = ? ASCII code 10 (LF) ? + * end = ? end of input ? + * any = ? any character ? + * + * note how right-hand sides have one of the following forms: + * + * - a sequence of nonterminals (SEQ) + * - a single nonterminal inside { } (MANY) + * - a single nonterminal inside { }+ (MANY1) + * - a single nonterminal inside [ ] (OPT) + * - a choice of nonterminals (CHOICE) + * - a string of characters (STRING) + * - a single character (CHAR) + * + * finally, we can realize the "special sequences" (? ... ?) using END, + * ANYCHAR, RANGE, and expression choice ||. + */ + +#include "minip_n.h" +#include + +action trace, print, print_string; +#define DEF_(NT, EXPR) DEF(NT, (EXPR) && ACTION(trace, #NT)) + +DEF (lchar, CHAR('\t') || RANGE(0x20, 0x7e)) +DEF (schar, CHAR('\t') || RANGE(0x20, 0x5c) || RANGE(0x5e, 0x7e)) + /* = !CHAR(']') && SEQ(lchar) */ +DEF (kchar, !CHAR('=') && SEQ(lchar)) +DEF (ws, CHAR('\t') || CHAR(' ')) +DEF_(nl, CHAR('\n')) +DEF_(end, END) +DEF_(any, ANYCHAR) + +DEF_(lchars, MANY(lchar)) +DEF_(wss, MANY(ws)) +DEF_(anys, MANY(any)) /* = OMEGA */ + +DEF_(eof, STRING("EOF")) +DEF_(eq, CHAR('=') + && ACTION(print, ": ")) +DEF_(semi, CHAR(';')) +DEF_(leftbr, CHAR('[')) +DEF_(rightbr, CHAR(']')) + +DEF_(eol, CHOICE(nl, end)) +DEF_(bra, SEQ(wss, leftbr)) +DEF_(ket, SEQ(rightbr, wss)) +DEF_(comment, SEQ(semi, lchars)) +DEF (ocmnt, OPT(comment)) +DEF_(sname, MANY1(schar) + && ACTION(print_string, " ")) +DEF_(key, MANY1(kchar) + && ACTION(print_string, " ")) +DEF_(value, MANY1(lchar) + && ACTION(print_string, "")) + +DEF_(out_comma, ACTION(print, ",\n")) + +DEF_(empty, SEQ(wss, ocmnt, nl)) +DEF_(empties, MANY(empty)) +DEF_(header, SEQ(empties, bra, sname, ket, eol) + && ACTION(print, ": {\n")) +DEF_(entry, SEQ(empties, key, eq, value, eol)) +DEF (entry_x, SEQ(out_comma, entry)) +DEF (entries_x, MANY(entry_x)) +DEF_(entries, SEQ(entry, entries_x) + && ACTION(print, "\n }")) + +DEF_(tail, SEQ(eof, wss, eol, anys)) +DEF (otail, OPT(tail)) +DEF_(sect, SEQ(header, entries)) +// (A) +DEF (sect_x, SEQ(out_comma, sect)) +DEF (sects_x, MANY(sect_x)) +DEF (sects1, SEQ(sect, sects_x)) +// (B) +//static parser sects2; +//DEF (sect_x, SEQ(out_comma, sect)) +//DEF (sects3, SEQ(sect_x, sects2)) +//DEF (sects2, OPT(sects3)) +//DEF (sects1, SEQ(sect, sects2)) +// (C) +//static parser sects1; +//DEF (sects2, SEQ(sect, out_comma, sects1)) /* if only tail-recursive */ +//DEF (sects1, CHOICE(sects2, sect)) +DEF_(sects, ACTION(print, "{\n") && + OPT(sects1) + && ACTION(print, "\n}\n")) +DEF_(inifile, TRY(sects, empties, wss, otail)) + +bool +trace(void *ctx, void *env, const char *s, size_t len) +{ + const char *nt = ctx; + const char *begin = env; + size_t pos; + + pos = s - begin; + fprintf(stderr, "%4zx: %4zu byte %s\n", pos, len, nt); + return true; +} + +bool +print(void *ctx, void *env, const char *s, size_t len) +{ + fputs(ctx, stdout); + return true; +} + +bool +print_string(void *ctx, void *env, const char *s, size_t len) +{ + size_t i; + unsigned char c; + + fputs(ctx, stdout); /* prefix string */ + + putchar('"'); + for (i = 0; i < len; i++) { + c = (unsigned char)s[i]; + assert(c <= 127); // XXX only ASCII + /* these must be escaped */ + if (c == '"') + fputs("\\\"", stdout); + else if (c == '\\') + fputs("\\\\", stdout); + else if (c == '\t') + fputs("\\t", stdout); + else if (c <= 0x1F || c == 0x7F) /* i.e. iscntrl */ + printf("\\u00%.2hhx", c); + else + putchar(c); + } + putchar('"'); + + return true; +} + + +#include +#include /* calloc */ +#include /* open, lseek */ +#include /* mmap */ +#include +#include + +extern char *__progname; + +/* + * run the 'inifile' parser on a file given on the command line. + */ +int +main(int argc, char *argv[]) +{ + struct cache cache = {}; + const char *infile; + const char *input; + const char *p; + int fd; + off_t o; + size_t sz, pos; + + if (argc < 2) { + fprintf(stderr, "usage: %s file\n", __progname); + return 3; + } + infile = argv[1]; + + /* mmap input */ + if ((fd = open(infile, O_RDONLY)) == -1) + err(2, "%s", infile); + if ((o = lseek(fd, 0, SEEK_END)) == -1) + err(2, "lseek"); + sz = o; + input = mmap(NULL, sz, PROT_READ, MAP_PRIVATE, fd, 0); + if (input == MAP_FAILED) + err(2, "mmap"); + + /* result cache */ + assert(cache.table == NULL); + assert(cache.capacity == 0); + assert(cache.nused == 0); + + /* run parser */ + p = inifile((struct stream){input, input + sz}, &cache, (void *)input); + if (p == NULL) { + fprintf(stderr, "%s: syntax error\n", infile); + return 1; + } + assert(p > input); + assert(p <= input + sz); + pos = p - input; + if (pos < sz) { + fprintf(stderr, "%s: syntax error (after pos. %zu/%zu)\n", + infile, pos, sz); + return 1; + } + fprintf(stderr, "success (consumed %zu/%zu bytes of input)\n", pos, sz); + + return 0; +}