Commit Diff


commit - 6fdc24670880d6e8e15741e7a8b212b9474d6b6d
commit + e46cabf4dabf47bee184092e4cdb372e9076f3ca
blob - 17edf08425ceb13ebd2f487bcfd80cdf4816b93d
blob + bcb4b3f4cd14e31b2406f1b529508c88664fbbbc
--- Makefile
+++ Makefile
@@ -1,6 +1,6 @@
 CFLAGS += -std=c99 -Wall
 
-TARGETS = ini_r ini_a ini_m ini_n
+TARGETS = ini_r ini_a ini_m ini_n ini_j
 
 all: $(TARGETS)
 
@@ -8,6 +8,7 @@ ini_r : ini_r.c minip_r.h
 ini_a : ini_a.c minip_a.h
 ini_m : ini_m.c minip_m.h
 ini_n : ini_n.c minip_n.h
+ini_j : ini_j.c minip_n.h
 
 
 clean:
@@ -18,5 +19,6 @@ test: all
 	./ini_a test.ini | grep -v ' [01] byte '
 	./ini_m test.ini | grep -v ' [01] byte '
 	./ini_n test.ini | grep -v ' [01] byte '
+	./ini_j test.ini 2>/dev/null
 
 .PHONY: all test clean
blob - /dev/null
blob + 086e5b4eaa2fe24e2a2109830a8ff0a9824ed904 (mode 644)
--- /dev/null
+++ ini_j.c
@@ -0,0 +1,267 @@
+/*
+ * demo: (simple) ini files, converted to json
+ *
+ * EBNFish:
+ *
+ *	inifile	= {sect} {empty} {ws} [tail]
+ *	sect	= header {entry}
+ *	tail	= "EOF" {ws} eol {any}
+ *
+ *	header	= {empty} bra sname ket eol
+ *	entry	= {empty} key "=" value eol
+ *	empty	= {ws} [comment] nl
+ *
+ *	(* tokens *)
+ *	eol	= nl | end
+ *	bra	= {ws} "["
+ *	ket	= "]" {ws}
+ *	comment	= ";" {lchar}
+ *	sname	= {lchar - "]"}+
+ *	key	= {lchar - "="}+
+ *	value	= {lchar}+
+ *
+ *	(* character classes *)
+ *	lchar	= ? ASCII codes 9 (HT), 0x20-0x7e (SP, alnum, punct) ?
+ *	ws	= ? ASCII codes 32 (SP), 9 (HT) ?
+ *	nl	= ? ASCII code 10 (LF) ?
+ *	end	= ? end of input ?
+ *	any	= ? any character ?
+ *
+ * converted to be compatible with our combinators:
+ *
+ *	inifile	= sects empties wss tail
+ *	sect	= header entries
+ *	tail	= eof wss eol anys
+ *	sects	= {sect}
+ *	otail	= [tail]
+ *
+ *	header	= empties bra sname ket eol
+ *	entries	= empties key eq value eol
+ *	empties	= wss comment nl
+ *	entries	= {entry}
+ *	empties	= {empty}
+ *
+ *	eol	= nl | end
+ *	bra	= wss leftbr
+ *	ket	= rightbr wss
+ *	comment	= semi lchars
+ *	ocmnt	= [comment]
+ *	sname	= {schar}+
+ *	key	= {kchar}+
+ *	value	= {lchar}+
+ *
+ *	eof	= "EOF"
+ *	eq	= "="
+ *	semi	= ";"
+ *	leftbr	= "["
+ *	rightbr	= "]"
+ *
+ *	lchars	= {lchar}
+ *	wss	= {ws}
+ *	anys	= {any}
+ *
+ *	lchar	= ? ASCII codes 9 (HT), 0x20-0x7e (SP, alnum, punct) ?
+ *	schar	= ? ASCII codes 9, 0x20-0x5c, 0x5e-0x7e ?
+ *	kchar	= ? ASCII codes 9, 0x20-0x3c, 0x3e-0x7e ?
+ *	ws	= ? ASCII codes 32 (SP), 9 (HT) ?
+ *	nl	= ? ASCII code 10 (LF) ?
+ *	end	= ? end of input ?
+ *	any	= ? any character ?
+ *
+ * note how right-hand sides have one of the following forms:
+ *
+ *  - a sequence of nonterminals (SEQ)
+ *  - a single nonterminal inside { } (MANY)
+ *  - a single nonterminal inside { }+ (MANY1)
+ *  - a single nonterminal inside [ ] (OPT)
+ *  - a choice of nonterminals (CHOICE)
+ *  - a string of characters (STRING)
+ *  - a single character (CHAR)
+ *
+ * finally, we can realize the "special sequences" (? ... ?) using END,
+ * ANYCHAR, RANGE, and expression choice ||.
+ */
+
+#include "minip_n.h"
+#include <stdio.h>
+
+action trace, print, print_string;
+#define DEF_(NT, EXPR)	DEF(NT, (EXPR) && ACTION(trace, #NT))
+
+DEF (lchar,	CHAR('\t') || RANGE(0x20, 0x7e))
+DEF (schar,	CHAR('\t') || RANGE(0x20, 0x5c) || RANGE(0x5e, 0x7e))
+				/* = !CHAR(']') && SEQ(lchar) */
+DEF (kchar,	!CHAR('=') && SEQ(lchar))
+DEF (ws,	CHAR('\t') || CHAR(' '))
+DEF_(nl,	CHAR('\n'))
+DEF_(end,	END)
+DEF_(any,	ANYCHAR)
+
+DEF_(lchars,	MANY(lchar))
+DEF_(wss,	MANY(ws))
+DEF_(anys,	MANY(any))	/* = OMEGA */
+
+DEF_(eof,	STRING("EOF"))
+DEF_(eq,	CHAR('=')
+		    && ACTION(print, ": "))
+DEF_(semi,	CHAR(';'))
+DEF_(leftbr,	CHAR('['))
+DEF_(rightbr,	CHAR(']'))
+
+DEF_(eol,	CHOICE(nl, end))
+DEF_(bra,	SEQ(wss, leftbr))
+DEF_(ket,	SEQ(rightbr, wss))
+DEF_(comment,	SEQ(semi, lchars))
+DEF (ocmnt,	OPT(comment))
+DEF_(sname,	MANY1(schar)
+		    && ACTION(print_string, "  "))
+DEF_(key,	MANY1(kchar)
+		    && ACTION(print_string, "    "))
+DEF_(value,	MANY1(lchar)
+		    && ACTION(print_string, ""))
+
+DEF_(out_comma,	ACTION(print, ",\n"))
+
+DEF_(empty,	SEQ(wss, ocmnt, nl))
+DEF_(empties,	MANY(empty))
+DEF_(header,	SEQ(empties, bra, sname, ket, eol)
+		    && ACTION(print, ": {\n"))
+DEF_(entry,	SEQ(empties, key, eq, value, eol))
+DEF (entry_x,	SEQ(out_comma, entry))
+DEF (entries_x,	MANY(entry_x))
+DEF_(entries,	SEQ(entry, entries_x)
+		    && ACTION(print, "\n  }"))
+
+DEF_(tail,	SEQ(eof, wss, eol, anys))
+DEF (otail,	OPT(tail))
+DEF_(sect,	SEQ(header, entries))
+// (A)
+DEF (sect_x,	SEQ(out_comma, sect))
+DEF (sects_x,	MANY(sect_x))
+DEF (sects1,	SEQ(sect, sects_x))
+// (B)
+//static parser sects2;
+//DEF (sect_x,	SEQ(out_comma, sect))
+//DEF (sects3,	SEQ(sect_x, sects2))
+//DEF (sects2,	OPT(sects3))
+//DEF (sects1,	SEQ(sect, sects2))
+// (C)
+//static parser sects1;
+//DEF (sects2,	SEQ(sect, out_comma, sects1))	/* if only tail-recursive */
+//DEF (sects1,	CHOICE(sects2, sect))
+DEF_(sects,	ACTION(print, "{\n") &&
+		    OPT(sects1)
+		    && ACTION(print, "\n}\n"))
+DEF_(inifile,	TRY(sects, empties, wss, otail))
+
+bool
+trace(void *ctx, void *env, const char *s, size_t len)
+{
+	const char *nt = ctx;
+	const char *begin = env;
+	size_t pos;
+
+	pos = s - begin;
+	fprintf(stderr, "%4zx: %4zu byte %s\n", pos, len, nt);
+	return true;
+}
+
+bool
+print(void *ctx, void *env, const char *s, size_t len)
+{
+	fputs(ctx, stdout);
+	return true;
+}
+
+bool
+print_string(void *ctx, void *env, const char *s, size_t len)
+{
+	size_t i;
+	unsigned char c;
+
+	fputs(ctx, stdout);	/* prefix string */
+
+	putchar('"');
+	for (i = 0; i < len; i++) {
+		c = (unsigned char)s[i];
+		assert(c <= 127);			// XXX only ASCII
+		/* these must be escaped */
+		if (c == '"')
+			fputs("\\\"", stdout);
+		else if (c == '\\')
+			fputs("\\\\", stdout);
+		else if (c == '\t')
+			fputs("\\t", stdout);
+		else if (c <= 0x1F || c == 0x7F)	/* i.e. iscntrl */
+			printf("\\u00%.2hhx", c);
+		else
+			putchar(c);
+	}
+	putchar('"');
+
+	return true;
+}
+
+
+#include <stdio.h>
+#include <stdlib.h>	/* calloc */
+#include <fcntl.h>	/* open, lseek */
+#include <sys/mman.h>	/* mmap */
+#include <err.h>
+#include <assert.h>
+
+extern char *__progname;
+
+/*
+ * run the 'inifile' parser on a file given on the command line.
+ */
+int
+main(int argc, char *argv[])
+{
+	struct cache cache = {};
+	const char *infile;
+	const char *input;
+	const char *p;
+	int fd;
+	off_t o;
+	size_t sz, pos;
+
+	if (argc < 2) {
+		fprintf(stderr, "usage: %s file\n", __progname);
+		return 3;
+	}
+	infile = argv[1];
+
+	/* mmap input */
+	if ((fd = open(infile, O_RDONLY)) == -1)
+		err(2, "%s", infile);
+	if ((o = lseek(fd, 0, SEEK_END)) == -1)
+		err(2, "lseek");
+	sz = o;
+	input = mmap(NULL, sz, PROT_READ, MAP_PRIVATE, fd, 0);
+	if (input == MAP_FAILED)
+		err(2, "mmap");
+
+	/* result cache */
+	assert(cache.table == NULL);
+	assert(cache.capacity == 0);
+	assert(cache.nused == 0);
+
+	/* run parser */
+	p = inifile((struct stream){input, input + sz}, &cache, (void *)input);
+	if (p == NULL) {
+		fprintf(stderr, "%s: syntax error\n", infile);
+		return 1;
+	}
+	assert(p > input);
+	assert(p <= input + sz);
+	pos = p - input;
+	if (pos < sz) {
+		fprintf(stderr, "%s: syntax error (after pos. %zu/%zu)\n",
+		    infile, pos, sz);
+		return 1;
+	}
+	fprintf(stderr, "success (consumed %zu/%zu bytes of input)\n", pos, sz);
+
+	return 0;
+}