commit - 9d2eb8f8b610e4c920b0c463618df5452e4641c6
commit + 903947a29f50a9398009e743b1b17a6b349541a9
blob - /dev/null
blob + cd996c612addccf81c289b86ac36f0366cb961c9 (mode 644)
--- /dev/null
+++ ini_r.c
+/*
+ * demo: (simple) ini files
+ *
+ * EBNFish:
+ *
+ * inifile = {sect} {empty} {ws} [tail]
+ * sect = header {entry}
+ * tail = "EOF" {ws} eol {any}
+ *
+ * header = {empty} bra sname ket eol
+ * entry = {empty} key "=" value eol
+ * empty = {ws} [comment] nl
+ *
+ * (* tokens *)
+ * eol = nl | end
+ * bra = {ws} "["
+ * ket = "]" {ws}
+ * comment = ";" {lchar}
+ * sname = {lchar - "]"}+
+ * key = {lchar - "="}+
+ * value = {lchar}+
+ *
+ * (* character classes *)
+ * lchar = ? ASCII codes 9 (HT), 0x20-0x7e (SP, alnum, punct) ?
+ * ws = ? ASCII codes 32 (SP), 9 (HT) ?
+ * nl = ? ASCII code 10 (LF) ?
+ * end = ? end of input ?
+ * any = ? any character ?
+ *
+ * converted to be compatible with our combinators:
+ *
+ * inifile = sects empties wss tail
+ * sects = {header entries}
+ * tail = [eof wss eol anys]
+ *
+ * header = empties bra sname ket eol
+ * entries = {empties key eq value eol}
+ * empties = {wss comment nl}
+ *
+ * eol = nl | end
+ * bra = wss leftbr
+ * ket = rightbr wss
+ * comment = [semi lchars]
+ * sname = {schar}+
+ * key = {kchar}+
+ * value = {lchar}+
+ *
+ * eof = "EOF"
+ * eq = "="
+ * semi = ";"
+ * leftbr = "["
+ * rightbr = "]"
+ *
+ * lchars = {lchar}
+ * wss = {ws}
+ * anys = {any}
+ *
+ * lchar = ? ASCII codes 9 (HT), 0x20-0x7e (SP, alnum, punct) ?
+ * schar = ? ASCII codes 9, 0x20-0x5c, 0x5e-0x7e ?
+ * kchar = ? ASCII codes 9, 0x20-0x3c, 0x3e-0x7e ?
+ * ws = ? ASCII codes 32 (SP), 9 (HT) ?
+ * nl = ? ASCII code 10 (LF) ?
+ * end = ? end of input ?
+ * any = ? any character ?
+ *
+ * note how right-hand sides have one of the following forms:
+ *
+ * - a sequence of nonterminals (SEQ)
+ * - a sequence of nonterminals inside { } (MANY)
+ * - a sequence of nonterminals inside { }+ (MANY1)
+ * - a sequence of nonterminals inside [ ] (OPT)
+ * - a choice of nonterminals (CHOICE)
+ * - a string of characters (STRING)
+ * - a single character (CHAR)
+ *
+ * finally, we can realize the "special sequences" (? ... ?) using END,
+ * ANYCHAR, RANGE, and expression choice ||.
+ */
+
+#include "minip_r.h"
+
+DEF(lchar, CHAR('\t') || RANGE(0x20, 0x7e))
+DEF(schar, CHAR('\t') || RANGE(0x20, 0x5c) || RANGE(0x5e, 0x7e))
+ /* = !CHAR(']') && SEQ(lchar) */
+DEF(kchar, !CHAR('=') && SEQ(lchar))
+DEF(ws, CHAR('\t') || CHAR(' '))
+DEF(nl, CHAR('\n'))
+DEF(end, END)
+DEF(any, ANYCHAR)
+
+DEF(lchars, MANY(lchar))
+DEF(wss, MANY(ws))
+DEF(anys, MANY(any)) /* = OMEGA */
+
+DEF(eof, STRING("EOF"))
+DEF(eq, CHAR('='))
+DEF(semi, CHAR(';'))
+DEF(leftbr, CHAR('['))
+DEF(rightbr, CHAR(']'))
+
+DEF(eol, CHOICE(nl, end))
+DEF(bra, SEQ(wss, leftbr))
+DEF(ket, SEQ(rightbr, wss))
+DEF(comment, OPT(semi, lchars))
+DEF(sname, MANY1(schar))
+DEF(key, MANY1(kchar))
+DEF(value, MANY1(lchar))
+
+DEF(empties, MANY(wss, comment, nl))
+DEF(header, SEQ(empties, bra, sname, ket, eol))
+DEF(entries, MANY(empties, key, eq, value, eol))
+
+DEF(tail, OPT(eof, wss, eol, anys))
+DEF(sects, MANY(header, entries))
+DEF(inifile, SEQ(sects, empties, wss, tail))
+
+
+#include <stdio.h>
+#include <fcntl.h> /* open, lseek */
+#include <sys/mman.h> /* mmap */
+#include <err.h>
+#include <assert.h>
+
+extern char *__progname;
+
+/*
+ * run the 'inifile' parser on a file given on the command line.
+ */
+int
+main(int argc, char *argv[])
+{
+ const char *infile;
+ const char *input;
+ const char *p;
+ int fd;
+ off_t o;
+ size_t sz, pos;
+
+ if (argc < 2) {
+ fprintf(stderr, "usage: %s file\n", __progname);
+ return 3;
+ }
+ infile = argv[1];
+
+ /* mmap input */
+ if ((fd = open(infile, O_RDONLY)) == -1)
+ err(2, "%s", infile);
+ if ((o = lseek(fd, 0, SEEK_END)) == -1)
+ err(2, "lseek");
+ sz = o;
+ input = mmap(NULL, sz, PROT_READ, MAP_PRIVATE, fd, 0);
+ if (input == MAP_FAILED)
+ err(2, "mmap");
+
+ /* run parser */
+ p = inifile(input, input + sz);
+ if (p == NULL) {
+ fprintf(stderr, "%s: syntax error\n", infile);
+ return 1;
+ }
+ assert(p > input);
+ assert(p <= input + sz);
+ pos = p - input;
+ if (pos < sz) {
+ fprintf(stderr, "%s: syntax error (after pos. %zu/%zu)\n",
+ infile, pos, sz);
+ return 1;
+ }
+ printf("success (consumed %zu/%zu bytes of input)\n", pos, sz);
+
+ return 0;
+}