Commit Diff


commit - 9d2eb8f8b610e4c920b0c463618df5452e4641c6
commit + 903947a29f50a9398009e743b1b17a6b349541a9
blob - /dev/null
blob + cd996c612addccf81c289b86ac36f0366cb961c9 (mode 644)
--- /dev/null
+++ ini_r.c
@@ -0,0 +1,172 @@
+/*
+ * demo: (simple) ini files
+ *
+ * EBNFish:
+ *
+ *	inifile	= {sect} {empty} {ws} [tail]
+ *	sect	= header {entry}
+ *	tail	= "EOF" {ws} eol {any}
+ *
+ *	header	= {empty} bra sname ket eol
+ *	entry	= {empty} key "=" value eol
+ *	empty	= {ws} [comment] nl
+ *
+ *	(* tokens *)
+ *	eol	= nl | end
+ *	bra	= {ws} "["
+ *	ket	= "]" {ws}
+ *	comment	= ";" {lchar}
+ *	sname	= {lchar - "]"}+
+ *	key	= {lchar - "="}+
+ *	value	= {lchar}+
+ *
+ *	(* character classes *)
+ *	lchar	= ? ASCII codes 9 (HT), 0x20-0x7e (SP, alnum, punct) ?
+ *	ws	= ? ASCII codes 32 (SP), 9 (HT) ?
+ *	nl	= ? ASCII code 10 (LF) ?
+ *	end	= ? end of input ?
+ *	any	= ? any character ?
+ *
+ * converted to be compatible with our combinators:
+ *
+ *	inifile	= sects empties wss tail
+ *	sects	= {header entries}
+ *	tail	= [eof wss eol anys]
+ *
+ *	header	= empties bra sname ket eol
+ *	entries	= {empties key eq value eol}
+ *	empties	= {wss comment nl}
+ *
+ *	eol	= nl | end
+ *	bra	= wss leftbr
+ *	ket	= rightbr wss
+ *	comment	= [semi lchars]
+ *	sname	= {schar}+
+ *	key	= {kchar}+
+ *	value	= {lchar}+
+ *
+ *	eof	= "EOF"
+ *	eq	= "="
+ *	semi	= ";"
+ *	leftbr	= "["
+ *	rightbr	= "]"
+ *
+ *	lchars	= {lchar}
+ *	wss	= {ws}
+ *	anys	= {any}
+ *
+ *	lchar	= ? ASCII codes 9 (HT), 0x20-0x7e (SP, alnum, punct) ?
+ *	schar	= ? ASCII codes 9, 0x20-0x5c, 0x5e-0x7e ?
+ *	kchar	= ? ASCII codes 9, 0x20-0x3c, 0x3e-0x7e ?
+ *	ws	= ? ASCII codes 32 (SP), 9 (HT) ?
+ *	nl	= ? ASCII code 10 (LF) ?
+ *	end	= ? end of input ?
+ *	any	= ? any character ?
+ *
+ * note how right-hand sides have one of the following forms:
+ *
+ *  - a sequence of nonterminals (SEQ)
+ *  - a sequence of nonterminals inside { } (MANY)
+ *  - a sequence of nonterminals inside { }+ (MANY1)
+ *  - a sequence of nonterminals inside [ ] (OPT)
+ *  - a choice of nonterminals (CHOICE)
+ *  - a string of characters (STRING)
+ *  - a single character (CHAR)
+ *
+ * finally, we can realize the "special sequences" (? ... ?) using END,
+ * ANYCHAR, RANGE, and expression choice ||.
+ */
+
+#include "minip_r.h"
+
+DEF(lchar,	CHAR('\t') || RANGE(0x20, 0x7e))
+DEF(schar,	CHAR('\t') || RANGE(0x20, 0x5c) || RANGE(0x5e, 0x7e))
+				/* = !CHAR(']') && SEQ(lchar) */
+DEF(kchar,	!CHAR('=') && SEQ(lchar))
+DEF(ws,		CHAR('\t') || CHAR(' '))
+DEF(nl,		CHAR('\n'))
+DEF(end,	END)
+DEF(any,	ANYCHAR)
+
+DEF(lchars,	MANY(lchar))
+DEF(wss,	MANY(ws))
+DEF(anys,	MANY(any))	/* = OMEGA */
+
+DEF(eof,	STRING("EOF"))
+DEF(eq,		CHAR('='))
+DEF(semi,	CHAR(';'))
+DEF(leftbr,	CHAR('['))
+DEF(rightbr,	CHAR(']'))
+
+DEF(eol,	CHOICE(nl, end))
+DEF(bra,	SEQ(wss, leftbr))
+DEF(ket,	SEQ(rightbr, wss))
+DEF(comment,	OPT(semi, lchars))
+DEF(sname,	MANY1(schar))
+DEF(key,	MANY1(kchar))
+DEF(value,	MANY1(lchar))
+
+DEF(empties,	MANY(wss, comment, nl))
+DEF(header,	SEQ(empties, bra, sname, ket, eol))
+DEF(entries,	MANY(empties, key, eq, value, eol))
+
+DEF(tail,	OPT(eof, wss, eol, anys))
+DEF(sects,	MANY(header, entries))
+DEF(inifile,	SEQ(sects, empties, wss, tail))
+
+
+#include <stdio.h>
+#include <fcntl.h>	/* open, lseek */
+#include <sys/mman.h>	/* mmap */
+#include <err.h>
+#include <assert.h>
+
+extern char *__progname;
+
+/*
+ * run the 'inifile' parser on a file given on the command line.
+ */
+int
+main(int argc, char *argv[])
+{
+	const char *infile;
+	const char *input;
+	const char *p;
+	int fd;
+	off_t o;
+	size_t sz, pos;
+
+	if (argc < 2) {
+		fprintf(stderr, "usage: %s file\n", __progname);
+		return 3;
+	}
+	infile = argv[1];
+
+	/* mmap input */
+	if ((fd = open(infile, O_RDONLY)) == -1)
+		err(2, "%s", infile);
+	if ((o = lseek(fd, 0, SEEK_END)) == -1)
+		err(2, "lseek");
+	sz = o;
+	input = mmap(NULL, sz, PROT_READ, MAP_PRIVATE, fd, 0);
+	if (input == MAP_FAILED)
+		err(2, "mmap");
+
+	/* run parser */
+	p = inifile(input, input + sz);
+	if (p == NULL) {
+		fprintf(stderr, "%s: syntax error\n", infile);
+		return 1;
+	}
+	assert(p > input);
+	assert(p <= input + sz);
+	pos = p - input;
+	if (pos < sz) {
+		fprintf(stderr, "%s: syntax error (after pos. %zu/%zu)\n",
+		    infile, pos, sz);
+		return 1;
+	}
+	printf("success (consumed %zu/%zu bytes of input)\n", pos, sz);
+
+	return 0;
+}