commit - 6fdc24670880d6e8e15741e7a8b212b9474d6b6d
commit + e46cabf4dabf47bee184092e4cdb372e9076f3ca
blob - 17edf08425ceb13ebd2f487bcfd80cdf4816b93d
blob + bcb4b3f4cd14e31b2406f1b529508c88664fbbbc
--- Makefile
+++ Makefile
CFLAGS += -std=c99 -Wall
-TARGETS = ini_r ini_a ini_m ini_n
+TARGETS = ini_r ini_a ini_m ini_n ini_j
all: $(TARGETS)
ini_a : ini_a.c minip_a.h
ini_m : ini_m.c minip_m.h
ini_n : ini_n.c minip_n.h
+ini_j : ini_j.c minip_n.h
clean:
./ini_a test.ini | grep -v ' [01] byte '
./ini_m test.ini | grep -v ' [01] byte '
./ini_n test.ini | grep -v ' [01] byte '
+ ./ini_j test.ini 2>/dev/null
.PHONY: all test clean
blob - /dev/null
blob + 086e5b4eaa2fe24e2a2109830a8ff0a9824ed904 (mode 644)
--- /dev/null
+++ ini_j.c
+/*
+ * demo: (simple) ini files, converted to json
+ *
+ * EBNFish:
+ *
+ * inifile = {sect} {empty} {ws} [tail]
+ * sect = header {entry}
+ * tail = "EOF" {ws} eol {any}
+ *
+ * header = {empty} bra sname ket eol
+ * entry = {empty} key "=" value eol
+ * empty = {ws} [comment] nl
+ *
+ * (* tokens *)
+ * eol = nl | end
+ * bra = {ws} "["
+ * ket = "]" {ws}
+ * comment = ";" {lchar}
+ * sname = {lchar - "]"}+
+ * key = {lchar - "="}+
+ * value = {lchar}+
+ *
+ * (* character classes *)
+ * lchar = ? ASCII codes 9 (HT), 0x20-0x7e (SP, alnum, punct) ?
+ * ws = ? ASCII codes 32 (SP), 9 (HT) ?
+ * nl = ? ASCII code 10 (LF) ?
+ * end = ? end of input ?
+ * any = ? any character ?
+ *
+ * converted to be compatible with our combinators:
+ *
+ * inifile = sects empties wss tail
+ * sect = header entries
+ * tail = eof wss eol anys
+ * sects = {sect}
+ * otail = [tail]
+ *
+ * header = empties bra sname ket eol
+ * entries = empties key eq value eol
+ * empties = wss comment nl
+ * entries = {entry}
+ * empties = {empty}
+ *
+ * eol = nl | end
+ * bra = wss leftbr
+ * ket = rightbr wss
+ * comment = semi lchars
+ * ocmnt = [comment]
+ * sname = {schar}+
+ * key = {kchar}+
+ * value = {lchar}+
+ *
+ * eof = "EOF"
+ * eq = "="
+ * semi = ";"
+ * leftbr = "["
+ * rightbr = "]"
+ *
+ * lchars = {lchar}
+ * wss = {ws}
+ * anys = {any}
+ *
+ * lchar = ? ASCII codes 9 (HT), 0x20-0x7e (SP, alnum, punct) ?
+ * schar = ? ASCII codes 9, 0x20-0x5c, 0x5e-0x7e ?
+ * kchar = ? ASCII codes 9, 0x20-0x3c, 0x3e-0x7e ?
+ * ws = ? ASCII codes 32 (SP), 9 (HT) ?
+ * nl = ? ASCII code 10 (LF) ?
+ * end = ? end of input ?
+ * any = ? any character ?
+ *
+ * note how right-hand sides have one of the following forms:
+ *
+ * - a sequence of nonterminals (SEQ)
+ * - a single nonterminal inside { } (MANY)
+ * - a single nonterminal inside { }+ (MANY1)
+ * - a single nonterminal inside [ ] (OPT)
+ * - a choice of nonterminals (CHOICE)
+ * - a string of characters (STRING)
+ * - a single character (CHAR)
+ *
+ * finally, we can realize the "special sequences" (? ... ?) using END,
+ * ANYCHAR, RANGE, and expression choice ||.
+ */
+
+#include "minip_n.h"
+#include <stdio.h>
+
+action trace, print, print_string;
+#define DEF_(NT, EXPR) DEF(NT, (EXPR) && ACTION(trace, #NT))
+
+DEF (lchar, CHAR('\t') || RANGE(0x20, 0x7e))
+DEF (schar, CHAR('\t') || RANGE(0x20, 0x5c) || RANGE(0x5e, 0x7e))
+ /* = !CHAR(']') && SEQ(lchar) */
+DEF (kchar, !CHAR('=') && SEQ(lchar))
+DEF (ws, CHAR('\t') || CHAR(' '))
+DEF_(nl, CHAR('\n'))
+DEF_(end, END)
+DEF_(any, ANYCHAR)
+
+DEF_(lchars, MANY(lchar))
+DEF_(wss, MANY(ws))
+DEF_(anys, MANY(any)) /* = OMEGA */
+
+DEF_(eof, STRING("EOF"))
+DEF_(eq, CHAR('=')
+ && ACTION(print, ": "))
+DEF_(semi, CHAR(';'))
+DEF_(leftbr, CHAR('['))
+DEF_(rightbr, CHAR(']'))
+
+DEF_(eol, CHOICE(nl, end))
+DEF_(bra, SEQ(wss, leftbr))
+DEF_(ket, SEQ(rightbr, wss))
+DEF_(comment, SEQ(semi, lchars))
+DEF (ocmnt, OPT(comment))
+DEF_(sname, MANY1(schar)
+ && ACTION(print_string, " "))
+DEF_(key, MANY1(kchar)
+ && ACTION(print_string, " "))
+DEF_(value, MANY1(lchar)
+ && ACTION(print_string, ""))
+
+DEF_(out_comma, ACTION(print, ",\n"))
+
+DEF_(empty, SEQ(wss, ocmnt, nl))
+DEF_(empties, MANY(empty))
+DEF_(header, SEQ(empties, bra, sname, ket, eol)
+ && ACTION(print, ": {\n"))
+DEF_(entry, SEQ(empties, key, eq, value, eol))
+DEF (entry_x, SEQ(out_comma, entry))
+DEF (entries_x, MANY(entry_x))
+DEF_(entries, SEQ(entry, entries_x)
+ && ACTION(print, "\n }"))
+
+DEF_(tail, SEQ(eof, wss, eol, anys))
+DEF (otail, OPT(tail))
+DEF_(sect, SEQ(header, entries))
+// (A)
+DEF (sect_x, SEQ(out_comma, sect))
+DEF (sects_x, MANY(sect_x))
+DEF (sects1, SEQ(sect, sects_x))
+// (B)
+//static parser sects2;
+//DEF (sect_x, SEQ(out_comma, sect))
+//DEF (sects3, SEQ(sect_x, sects2))
+//DEF (sects2, OPT(sects3))
+//DEF (sects1, SEQ(sect, sects2))
+// (C)
+//static parser sects1;
+//DEF (sects2, SEQ(sect, out_comma, sects1)) /* if only tail-recursive */
+//DEF (sects1, CHOICE(sects2, sect))
+DEF_(sects, ACTION(print, "{\n") &&
+ OPT(sects1)
+ && ACTION(print, "\n}\n"))
+DEF_(inifile, TRY(sects, empties, wss, otail))
+
+bool
+trace(void *ctx, void *env, const char *s, size_t len)
+{
+ const char *nt = ctx;
+ const char *begin = env;
+ size_t pos;
+
+ pos = s - begin;
+ fprintf(stderr, "%4zx: %4zu byte %s\n", pos, len, nt);
+ return true;
+}
+
+bool
+print(void *ctx, void *env, const char *s, size_t len)
+{
+ fputs(ctx, stdout);
+ return true;
+}
+
+bool
+print_string(void *ctx, void *env, const char *s, size_t len)
+{
+ size_t i;
+ unsigned char c;
+
+ fputs(ctx, stdout); /* prefix string */
+
+ putchar('"');
+ for (i = 0; i < len; i++) {
+ c = (unsigned char)s[i];
+ assert(c <= 127); // XXX only ASCII
+ /* these must be escaped */
+ if (c == '"')
+ fputs("\\\"", stdout);
+ else if (c == '\\')
+ fputs("\\\\", stdout);
+ else if (c == '\t')
+ fputs("\\t", stdout);
+ else if (c <= 0x1F || c == 0x7F) /* i.e. iscntrl */
+ printf("\\u00%.2hhx", c);
+ else
+ putchar(c);
+ }
+ putchar('"');
+
+ return true;
+}
+
+
+#include <stdio.h>
+#include <stdlib.h> /* calloc */
+#include <fcntl.h> /* open, lseek */
+#include <sys/mman.h> /* mmap */
+#include <err.h>
+#include <assert.h>
+
+extern char *__progname;
+
+/*
+ * run the 'inifile' parser on a file given on the command line.
+ */
+int
+main(int argc, char *argv[])
+{
+ struct cache cache = {};
+ const char *infile;
+ const char *input;
+ const char *p;
+ int fd;
+ off_t o;
+ size_t sz, pos;
+
+ if (argc < 2) {
+ fprintf(stderr, "usage: %s file\n", __progname);
+ return 3;
+ }
+ infile = argv[1];
+
+ /* mmap input */
+ if ((fd = open(infile, O_RDONLY)) == -1)
+ err(2, "%s", infile);
+ if ((o = lseek(fd, 0, SEEK_END)) == -1)
+ err(2, "lseek");
+ sz = o;
+ input = mmap(NULL, sz, PROT_READ, MAP_PRIVATE, fd, 0);
+ if (input == MAP_FAILED)
+ err(2, "mmap");
+
+ /* result cache */
+ assert(cache.table == NULL);
+ assert(cache.capacity == 0);
+ assert(cache.nused == 0);
+
+ /* run parser */
+ p = inifile((struct stream){input, input + sz}, &cache, (void *)input);
+ if (p == NULL) {
+ fprintf(stderr, "%s: syntax error\n", infile);
+ return 1;
+ }
+ assert(p > input);
+ assert(p <= input + sz);
+ pos = p - input;
+ if (pos < sz) {
+ fprintf(stderr, "%s: syntax error (after pos. %zu/%zu)\n",
+ infile, pos, sz);
+ return 1;
+ }
+ fprintf(stderr, "success (consumed %zu/%zu bytes of input)\n", pos, sz);
+
+ return 0;
+}