commit 903947a29f50a9398009e743b1b17a6b349541a9 from: Sven M. Hallberg date: Mon Dec 05 04:21:36 2022 UTC forgot to add ini_r.c commit - 9d2eb8f8b610e4c920b0c463618df5452e4641c6 commit + 903947a29f50a9398009e743b1b17a6b349541a9 blob - /dev/null blob + cd996c612addccf81c289b86ac36f0366cb961c9 (mode 644) --- /dev/null +++ ini_r.c @@ -0,0 +1,172 @@ +/* + * demo: (simple) ini files + * + * EBNFish: + * + * inifile = {sect} {empty} {ws} [tail] + * sect = header {entry} + * tail = "EOF" {ws} eol {any} + * + * header = {empty} bra sname ket eol + * entry = {empty} key "=" value eol + * empty = {ws} [comment] nl + * + * (* tokens *) + * eol = nl | end + * bra = {ws} "[" + * ket = "]" {ws} + * comment = ";" {lchar} + * sname = {lchar - "]"}+ + * key = {lchar - "="}+ + * value = {lchar}+ + * + * (* character classes *) + * lchar = ? ASCII codes 9 (HT), 0x20-0x7e (SP, alnum, punct) ? + * ws = ? ASCII codes 32 (SP), 9 (HT) ? + * nl = ? ASCII code 10 (LF) ? + * end = ? end of input ? + * any = ? any character ? + * + * converted to be compatible with our combinators: + * + * inifile = sects empties wss tail + * sects = {header entries} + * tail = [eof wss eol anys] + * + * header = empties bra sname ket eol + * entries = {empties key eq value eol} + * empties = {wss comment nl} + * + * eol = nl | end + * bra = wss leftbr + * ket = rightbr wss + * comment = [semi lchars] + * sname = {schar}+ + * key = {kchar}+ + * value = {lchar}+ + * + * eof = "EOF" + * eq = "=" + * semi = ";" + * leftbr = "[" + * rightbr = "]" + * + * lchars = {lchar} + * wss = {ws} + * anys = {any} + * + * lchar = ? ASCII codes 9 (HT), 0x20-0x7e (SP, alnum, punct) ? + * schar = ? ASCII codes 9, 0x20-0x5c, 0x5e-0x7e ? + * kchar = ? ASCII codes 9, 0x20-0x3c, 0x3e-0x7e ? + * ws = ? ASCII codes 32 (SP), 9 (HT) ? + * nl = ? ASCII code 10 (LF) ? + * end = ? end of input ? + * any = ? any character ? + * + * note how right-hand sides have one of the following forms: + * + * - a sequence of nonterminals (SEQ) + * - a sequence of nonterminals inside { } (MANY) + * - a sequence of nonterminals inside { }+ (MANY1) + * - a sequence of nonterminals inside [ ] (OPT) + * - a choice of nonterminals (CHOICE) + * - a string of characters (STRING) + * - a single character (CHAR) + * + * finally, we can realize the "special sequences" (? ... ?) using END, + * ANYCHAR, RANGE, and expression choice ||. + */ + +#include "minip_r.h" + +DEF(lchar, CHAR('\t') || RANGE(0x20, 0x7e)) +DEF(schar, CHAR('\t') || RANGE(0x20, 0x5c) || RANGE(0x5e, 0x7e)) + /* = !CHAR(']') && SEQ(lchar) */ +DEF(kchar, !CHAR('=') && SEQ(lchar)) +DEF(ws, CHAR('\t') || CHAR(' ')) +DEF(nl, CHAR('\n')) +DEF(end, END) +DEF(any, ANYCHAR) + +DEF(lchars, MANY(lchar)) +DEF(wss, MANY(ws)) +DEF(anys, MANY(any)) /* = OMEGA */ + +DEF(eof, STRING("EOF")) +DEF(eq, CHAR('=')) +DEF(semi, CHAR(';')) +DEF(leftbr, CHAR('[')) +DEF(rightbr, CHAR(']')) + +DEF(eol, CHOICE(nl, end)) +DEF(bra, SEQ(wss, leftbr)) +DEF(ket, SEQ(rightbr, wss)) +DEF(comment, OPT(semi, lchars)) +DEF(sname, MANY1(schar)) +DEF(key, MANY1(kchar)) +DEF(value, MANY1(lchar)) + +DEF(empties, MANY(wss, comment, nl)) +DEF(header, SEQ(empties, bra, sname, ket, eol)) +DEF(entries, MANY(empties, key, eq, value, eol)) + +DEF(tail, OPT(eof, wss, eol, anys)) +DEF(sects, MANY(header, entries)) +DEF(inifile, SEQ(sects, empties, wss, tail)) + + +#include +#include /* open, lseek */ +#include /* mmap */ +#include +#include + +extern char *__progname; + +/* + * run the 'inifile' parser on a file given on the command line. + */ +int +main(int argc, char *argv[]) +{ + const char *infile; + const char *input; + const char *p; + int fd; + off_t o; + size_t sz, pos; + + if (argc < 2) { + fprintf(stderr, "usage: %s file\n", __progname); + return 3; + } + infile = argv[1]; + + /* mmap input */ + if ((fd = open(infile, O_RDONLY)) == -1) + err(2, "%s", infile); + if ((o = lseek(fd, 0, SEEK_END)) == -1) + err(2, "lseek"); + sz = o; + input = mmap(NULL, sz, PROT_READ, MAP_PRIVATE, fd, 0); + if (input == MAP_FAILED) + err(2, "mmap"); + + /* run parser */ + p = inifile(input, input + sz); + if (p == NULL) { + fprintf(stderr, "%s: syntax error\n", infile); + return 1; + } + assert(p > input); + assert(p <= input + sz); + pos = p - input; + if (pos < sz) { + fprintf(stderr, "%s: syntax error (after pos. %zu/%zu)\n", + infile, pos, sz); + return 1; + } + printf("success (consumed %zu/%zu bytes of input)\n", pos, sz); + + return 0; +}