commit dc58ab0616d7d9dcf3eea536f0b46ad34789c05c
from: Sven M. Hallberg <pesco@khjk.org>
date: Tue Jun  2 21:15:14 2026 UTC

listprg: unicode PETSCII representation

commit - fd0515e71ed2e34c2d2928343f5d00da2b6c1e48
commit + dc58ab0616d7d9dcf3eea536f0b46ad34789c05c
blob - 77fe098fef382c0feb258ac3db516ff963c4ab7f
blob + 1c2590780b262ede129ed8ebe99d38ba97ba8349
--- README
+++ README
@@ -48,19 +48,11 @@
      Files are naturally formatted to fit the limited 40x25 character
      screen of the C64 and its line-wise editing workflow, so some
      adjustment from modern "sensibilities" might be necessary when
-     reading.  Note that several characters in the Commodore character
-     set appear different from ASCII. Notable examples:
+     reading.
 
-           ASCII        Commodore "PETSCII"
-           -----        -------------------
-           ^            up arrow
-           _            left arrow
-           ~            pi
-           \            pound (currency) sign
+     Included here are a few programs that were produced "along the way"
+     during development:
 
-     Finally, included here are a few programs that were produced "along
-     the way" during development:
-
            HEXDUMP      dump memory (stand-alone version)
            HDFILE       hexdump raw contents of a file
            SAVEMEM      save memory to PRG file (stand-alone version)
@@ -71,3 +63,12 @@
            MKALLTOK     write a PRG file that contains every possible BASIC
                         token (used for listprg)
 
+     Notes on encoding: Several (printable) characters in the Commodore
+     character set differ from ASCII - arrows, for instance, in place of
+     caret and underscore.  Such characters have been converted to Unicode
+     to appear visually correct.  Lower case characters are not normally
+     available at all, their codes being reused for graphical symbols.
+     The code of MANUAL and TV contains strings with embedded control
+     characters.  These appear as "{XX}" in the listings, which is
+     unambiguous because curly braces are not part of the character set.
+
blob - 9914c936b8f4c1a2078cb356aef82c1b8cbe7077
blob + fe68824bbda194597ba7c8604b4add7a605e832e
--- listprg.c
+++ listprg.c
@@ -11,13 +11,15 @@
 #include <sys/stat.h>	/* fstat */
 #include <err.h>
 
-int vflag;				/* verbose */
-int xflag;				/* print raw token values */
+int lflag;				/* use lower-case charset */
+int rflag;				/* output non-tokens raw */
+int vflag;				/* verbose mode */
+int xflag;				/* print tokens as hex */
 
 void
 usage(void)
 {
-	fprintf(stderr, "usage: listprg [-vx] [file ...]\n");
+	fprintf(stderr, "usage: listprg [-lrvx] [file ...]\n");
 	exit(1);
 }
 
@@ -27,24 +29,147 @@ const char *tokens[128] = {
 	"WAIT", "LOAD", "SAVE", "VERIFY", "DEF", "POKE", "PRINT#", "PRINT", 
 	"CONT", "LIST", "CLR", "CMD", "SYS", "OPEN", "CLOSE", "GET", "NEW", 
 	"TAB(", "TO", "FN", "SPC(", "THEN", "NOT", "STEP", "+", "-", "*", "/", 
-	"^", "AND", "OR", ">", "=", "<", "SGN", "INT", "ABS", "USR", "FRE", 
+	"\u2191", /* 94, ASCII ^, up arrow */
+	"AND", "OR", ">", "=", "<", "SGN", "INT", "ABS", "USR", "FRE", 
 	"POS", "SQR", "RND", "LOG", "EXP", "COS", "SIN", "TAN", "ATN", "PEEK", 
 	"LEN", "STR$", "VAL", "ASC", "CHR$", "LEFT$", "RIGHT$", "MID$", "GO", 
-	"{CC}", /* XXX don't know, generated NUL?! */
-	"FOR", "NEXT", "DATA", "INPUT#", "INPUT", "DIM", "READ", "LET", "GOTO",
-	"RUN", "IF", "RESTORE", "GOSUB", "RETURN", "REM", "STOP", "ON", "WAIT",
-	"LOAD", "SAVE", "VERIFY", "DEF", "POKE", "PRINT#", "PRINT", "CONT",
-	"LIST", "CLR", "CMD", "SYS", "OPEN", "CLOSE", "GET", "NEW", "TAB(",
-	"TO", "FN", "SPC(", "THEN", "NOT", "STEP", "+", "-", "*", "/", "^",
-	"AND", "OR", ">", "=",
-	"{pi}" /* 255, pi in commodore character set */
+
+	"{CC}",	/* 204, generated NUL, syntax error?! */
+	/* duplicates (?) */
+	"FOR", "NEXT", "DATA", "INPUT#", "INPUT", "DIM", "READ", "LET",
+	"GOTO", "RUN", "IF", "RESTORE", "GOSUB", "RETURN", "REM", "STOP", "ON",
+	"WAIT", "LOAD", "SAVE", "VERIFY", "DEF", "POKE", "PRINT#", "PRINT",
+	"CONT", "LIST", "CLR", "CMD", "SYS", "OPEN", "CLOSE", "GET", "NEW",
+	"TAB(", "TO", "FN", "SPC(", "THEN", "NOT", "STEP", "+", "-", "*", "/",
+	"\u2191", "AND", "OR", ">", "=",
+
+	"\u03c0" /* 255, ASCII ~, pi */
 };
 
+/* upper-case PETSCII character set, unicode representation */
+const char *upper[256] = {
+	/* 0x00 - 0x1F (control characters) */
+	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
+	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
+	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
+	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
+	/* 0x20 - 0x3F */
+	" ",  "!",  "\"", "#",  "$",  "%",  "&",  "'",
+	"(",  ")",  "*",  "+",  ",",  "-",  ".",  "/",
+	"0",  "1",  "2",  "3",  "4",  "5",  "6",  "7",
+	"8",  "9",  ":",  ";",  "<",  "=",  ">",  "?",
+	/* 0x40 - 0x5F */
+	"@",  "A",  "B",  "C",  "D",  "E",  "F",  "G",
+	"H",  "I",  "J",  "K",  "L",  "M",  "N",  "O",
+	"P",  "Q",  "R",  "S",  "T",  "U",  "V",  "W",
+	"X",  "Y",  "Z",  "[", "\u00A3", "]", "\u2191", "\u2190",
+	/* 0x60 - 0x7F */
+	"\u2500",	"\u2660",	"\U0001FB72",	"\U0001FB78",
+	"\U0001FB77",	"\U0001FB76",	"\U0001FB7A",	"\U0001FB71",
+	"\U0001FB74",	"\u256E",	"\u2570",	"\u256F",
+	"\U0001FB7C",	"\u2572",	"\u2571",	"\U0001FB7D",
+	"\U0001FB7E",	"\u2022",	"\U0001FB7B",	"\u2665",
+	"\U0001FB70",	"\u256D",	"\u2573",	"\u25CB",
+	"\u2663",	"\U0001FB75",	"\u2666",	"\u253C",
+	"\U0001FB8C",	"\U0001FB73",	"\u03C0",	"\u25E5",
+	/* 0x80 - 0x9F (control characters) */
+	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
+	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
+	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
+	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
+	/* 0xA0 - 0xBF */
+	"\u00A0",	"\u258C",	"\u2584",	"\u2594",
+	"\u2581",	"\u258E",	"\u2592",	"\u2595",
+	"\U0001FB8F",	"\u25E4",	"\U0001FB87",	"\u251C",
+	"\u2597",	"\u2514",	"\u2510",	"\u2582",
+	"\u250C",	"\u2534",	"\u252C",	"\u2524",
+	"\u258E",	"\u258D",	"\U0001FB88",	"\U0001FB82",
+	"\U0001FB83",	"\u2583",	"\U0001FB7F",	"\u2596",
+	"\u259D",	"\u2518",	"\u2598",	"\u259A",
+	/* duplicate of 0x60 - 0x7F */
+	"\u2500",	"\u2660",	"\U0001FB72",	"\U0001FB78",
+	"\U0001FB77",	"\U0001FB76",	"\U0001FB7A",	"\U0001FB71",
+	"\U0001FB74",	"\u256E",	"\u2570",	"\u256F",
+	"\U0001FB7C",	"\u2572",	"\u2571",	"\U0001FB7D",
+	"\U0001FB7E",	"\u2022",	"\U0001FB7B",	"\u2665",
+	"\U0001FB70",	"\u256D",	"\u2573",	"\u25CB",
+	"\u2663",	"\U0001FB75",	"\u2666",	"\u253C",
+	"\U0001FB8C",	"\U0001FB73",	"\u03C0",	"\u25E5",
+	/* duplicate of 0xA0 - 0xBE */
+	"\u00A0",	"\u258C",	"\u2584",	"\u2594",
+	"\u2581",	"\u258E",	"\u2592",	"\u2595",
+	"\U0001FB8F",	"\u25E4",	"\U0001FB87",	"\u251C",
+	"\u2597",	"\u2514",	"\u2510",	"\u2582",
+	"\u250C",	"\u2534",	"\u252C",	"\u2524",
+	"\u258E",	"\u258D",	"\U0001FB88",	"\U0001FB82",
+	"\U0001FB83",	"\u2583",	"\U0001FB7F",	"\u2596",
+	"\u259D",	"\u2518",	"\u2598",
+	/* duplicate of 0x7E */
+	"\u03C0"
+};
+
+/* lower-case PETSCII character set, unicode representation */
+const char *lower[256] = {
+	/* 0x00 - 0x1F (control characters) */
+	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
+	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
+	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
+	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
+	/* 0x20 - 0x3F */
+	" ",  "!",  "\"", "#",  "$",  "%",  "&",  "'",
+	"(",  ")",  "*",  "+",  ",",  "-",  ".",  "/",
+	"0",  "1",  "2",  "3",  "4",  "5",  "6",  "7",
+	"8",  "9",  ":",  ";",  "<",  "=",  ">",  "?",
+	/* 0x40 - 0x5F */
+	"@",  "a",  "b",  "c",  "d",  "e",  "f",  "g",
+	"h",  "i",  "j",  "k",  "l",  "m",  "n",  "o",
+	"p",  "q",  "r",  "s",  "t",  "u",  "v",  "w",
+	"x",  "y",  "z",  "[", "\u00A3", "]", "\u2191", "\u2190",
+	/* 0x60 - 0x7F */
+	"\u2500", "A", "B", "C", "D", "E",  "F",  "G",
+	"H",  "I",  "J",  "K",  "L",  "M",  "N",  "O",
+	"P",  "Q",  "R",  "S",  "T",  "U",  "V",  "W",
+	"X",  "Y",  "Z",  "\u253C",
+	"\U0001FB8C",	"\U0001FB73",	"\U0001FB96",	"\U0001FB98",
+	/* 0x80 - 0x9F (control characters) */
+	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
+	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
+	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
+	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
+	/* 0xA0 - 0xBF */
+	"\u00A0",	"\u258C",	"\u2584",	"\u2594",
+	"\u2581",	"\u258E",	"\u2592",	"\u2595",
+	"\U0001FB8F",	"\U0001FB99",	"\U0001FB87",	"\u251C",
+	"\u2597",	"\u2514",	"\u2510",	"\u2582",
+	"\u250C",	"\u2534",	"\u252C",	"\u2524",
+	"\u258E",	"\u258D",	"\U0001FB88",	"\U0001FB82",
+	"\U0001FB83",	"\u2583",	"\u2713",	"\u2596",
+	"\u259D",	"\u2518",	"\u2598",	"\u259A",
+	/* duplicate of 0x60 - 0x7F */
+	"\u2500", "A", "B", "C", "D", "E",  "F",  "G",
+	"H",  "I",  "J",  "K",  "L",  "M",  "N",  "O",
+	"P",  "Q",  "R",  "S",  "T",  "U",  "V",  "W",
+	"X",  "Y",  "Z",  "\u253C",
+	"\U0001FB8C",	"\U0001FB73",	"\U0001FB96",	"\U0001FB98",
+	/* duplicate of 0xA0 - 0xBE */
+	"\u00A0",	"\u258C",	"\u2584",	"\u2594",
+	"\u2581",	"\u258E",	"\u2592",	"\u2595",
+	"\U0001FB8F",	"\U0001FB99",	"\U0001FB87",	"\u251C",
+	"\u2597",	"\u2514",	"\u2510",	"\u2582",
+	"\u250C",	"\u2534",	"\u252C",	"\u2524",
+	"\u258E",	"\u258D",	"\U0001FB88",	"\U0001FB82",
+	"\U0001FB83",	"\u2583",	"\u2713",	"\u2596",
+	"\u259D",	"\u2518",	"\u2598",
+	/* duplicate of 0x7E */
+	"\U0001FB96"
+};
+
 void
 listprg(const char *file)
 {
 	struct stat st;
 	unsigned char *p, *q, *s;
+	const char **charset;
 	void *m;
 	size_t len;
 	int fd, i, n;
@@ -61,6 +186,7 @@ listprg(const char *file)
 	if (m == MAP_FAILED)
 		err(1, "%s: mmap", file);
 
+	charset = lflag ? lower : upper;
 	for (p = m, q = p + len; p < q; p++) {
 		if (p[1] == 0) {		/* high byte of next ptr */
 			if (p[0] != 0)
@@ -78,14 +204,20 @@ listprg(const char *file)
 		for (p += 4; *p != 0; p++) {
 			if (*p == '"')
 				instring = !instring;
-			if (instring)
-				putchar(*p);
-			else if (*p > 127 && !xflag)
-				fputs(tokens[*p - 128], stdout);
-			else if (*p >= 32 && *p < 127)	/* isprint() */
-				putchar(*p);
-			else
-				printf("{%2X}", *p);
+
+			if (instring || *p < 128) {
+				if (rflag)
+					putchar(*p);
+				else if (charset[*p] != NULL)
+					fputs(charset[*p], stdout);
+				else
+					printf("{%2X}", *p);
+			} else {
+				if (xflag)
+					printf("{%2X}", *p);
+				else
+					fputs(tokens[*p - 128], stdout);
+			}
 		}
 		putchar('\n');
 	}
@@ -109,8 +241,14 @@ main(int argc, char *argv[])
 {
 	int i, c;
 
-	while ((c = getopt(argc, argv, "vx")) != -1) {
+	while ((c = getopt(argc, argv, "lrvx")) != -1) {
 		switch (c) {
+		case 'l':
+			lflag = 1;
+			break;
+		case 'r':
+			rflag = 1;
+			break;
 		case 'v':
 			vflag = 1;
 			break;