commit dc58ab0616d7d9dcf3eea536f0b46ad34789c05c from: Sven M. Hallberg date: Tue Jun 2 21:15:14 2026 UTC listprg: unicode PETSCII representation commit - fd0515e71ed2e34c2d2928343f5d00da2b6c1e48 commit + dc58ab0616d7d9dcf3eea536f0b46ad34789c05c blob - 77fe098fef382c0feb258ac3db516ff963c4ab7f blob + 1c2590780b262ede129ed8ebe99d38ba97ba8349 --- README +++ README @@ -48,19 +48,11 @@ Files are naturally formatted to fit the limited 40x25 character screen of the C64 and its line-wise editing workflow, so some adjustment from modern "sensibilities" might be necessary when - reading. Note that several characters in the Commodore character - set appear different from ASCII. Notable examples: + reading. - ASCII Commodore "PETSCII" - ----- ------------------- - ^ up arrow - _ left arrow - ~ pi - \ pound (currency) sign + Included here are a few programs that were produced "along the way" + during development: - Finally, included here are a few programs that were produced "along - the way" during development: - HEXDUMP dump memory (stand-alone version) HDFILE hexdump raw contents of a file SAVEMEM save memory to PRG file (stand-alone version) @@ -71,3 +63,12 @@ MKALLTOK write a PRG file that contains every possible BASIC token (used for listprg) + Notes on encoding: Several (printable) characters in the Commodore + character set differ from ASCII - arrows, for instance, in place of + caret and underscore. Such characters have been converted to Unicode + to appear visually correct. Lower case characters are not normally + available at all, their codes being reused for graphical symbols. + The code of MANUAL and TV contains strings with embedded control + characters. These appear as "{XX}" in the listings, which is + unambiguous because curly braces are not part of the character set. + blob - 9914c936b8f4c1a2078cb356aef82c1b8cbe7077 blob + fe68824bbda194597ba7c8604b4add7a605e832e --- listprg.c +++ listprg.c @@ -11,13 +11,15 @@ #include /* fstat */ #include -int vflag; /* verbose */ -int xflag; /* print raw token values */ +int lflag; /* use lower-case charset */ +int rflag; /* output non-tokens raw */ +int vflag; /* verbose mode */ +int xflag; /* print tokens as hex */ void usage(void) { - fprintf(stderr, "usage: listprg [-vx] [file ...]\n"); + fprintf(stderr, "usage: listprg [-lrvx] [file ...]\n"); exit(1); } @@ -27,24 +29,147 @@ const char *tokens[128] = { "WAIT", "LOAD", "SAVE", "VERIFY", "DEF", "POKE", "PRINT#", "PRINT", "CONT", "LIST", "CLR", "CMD", "SYS", "OPEN", "CLOSE", "GET", "NEW", "TAB(", "TO", "FN", "SPC(", "THEN", "NOT", "STEP", "+", "-", "*", "/", - "^", "AND", "OR", ">", "=", "<", "SGN", "INT", "ABS", "USR", "FRE", + "\u2191", /* 94, ASCII ^, up arrow */ + "AND", "OR", ">", "=", "<", "SGN", "INT", "ABS", "USR", "FRE", "POS", "SQR", "RND", "LOG", "EXP", "COS", "SIN", "TAN", "ATN", "PEEK", "LEN", "STR$", "VAL", "ASC", "CHR$", "LEFT$", "RIGHT$", "MID$", "GO", - "{CC}", /* XXX don't know, generated NUL?! */ - "FOR", "NEXT", "DATA", "INPUT#", "INPUT", "DIM", "READ", "LET", "GOTO", - "RUN", "IF", "RESTORE", "GOSUB", "RETURN", "REM", "STOP", "ON", "WAIT", - "LOAD", "SAVE", "VERIFY", "DEF", "POKE", "PRINT#", "PRINT", "CONT", - "LIST", "CLR", "CMD", "SYS", "OPEN", "CLOSE", "GET", "NEW", "TAB(", - "TO", "FN", "SPC(", "THEN", "NOT", "STEP", "+", "-", "*", "/", "^", - "AND", "OR", ">", "=", - "{pi}" /* 255, pi in commodore character set */ + + "{CC}", /* 204, generated NUL, syntax error?! */ + /* duplicates (?) */ + "FOR", "NEXT", "DATA", "INPUT#", "INPUT", "DIM", "READ", "LET", + "GOTO", "RUN", "IF", "RESTORE", "GOSUB", "RETURN", "REM", "STOP", "ON", + "WAIT", "LOAD", "SAVE", "VERIFY", "DEF", "POKE", "PRINT#", "PRINT", + "CONT", "LIST", "CLR", "CMD", "SYS", "OPEN", "CLOSE", "GET", "NEW", + "TAB(", "TO", "FN", "SPC(", "THEN", "NOT", "STEP", "+", "-", "*", "/", + "\u2191", "AND", "OR", ">", "=", + + "\u03c0" /* 255, ASCII ~, pi */ }; +/* upper-case PETSCII character set, unicode representation */ +const char *upper[256] = { + /* 0x00 - 0x1F (control characters) */ + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + /* 0x20 - 0x3F */ + " ", "!", "\"", "#", "$", "%", "&", "'", + "(", ")", "*", "+", ",", "-", ".", "/", + "0", "1", "2", "3", "4", "5", "6", "7", + "8", "9", ":", ";", "<", "=", ">", "?", + /* 0x40 - 0x5F */ + "@", "A", "B", "C", "D", "E", "F", "G", + "H", "I", "J", "K", "L", "M", "N", "O", + "P", "Q", "R", "S", "T", "U", "V", "W", + "X", "Y", "Z", "[", "\u00A3", "]", "\u2191", "\u2190", + /* 0x60 - 0x7F */ + "\u2500", "\u2660", "\U0001FB72", "\U0001FB78", + "\U0001FB77", "\U0001FB76", "\U0001FB7A", "\U0001FB71", + "\U0001FB74", "\u256E", "\u2570", "\u256F", + "\U0001FB7C", "\u2572", "\u2571", "\U0001FB7D", + "\U0001FB7E", "\u2022", "\U0001FB7B", "\u2665", + "\U0001FB70", "\u256D", "\u2573", "\u25CB", + "\u2663", "\U0001FB75", "\u2666", "\u253C", + "\U0001FB8C", "\U0001FB73", "\u03C0", "\u25E5", + /* 0x80 - 0x9F (control characters) */ + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + /* 0xA0 - 0xBF */ + "\u00A0", "\u258C", "\u2584", "\u2594", + "\u2581", "\u258E", "\u2592", "\u2595", + "\U0001FB8F", "\u25E4", "\U0001FB87", "\u251C", + "\u2597", "\u2514", "\u2510", "\u2582", + "\u250C", "\u2534", "\u252C", "\u2524", + "\u258E", "\u258D", "\U0001FB88", "\U0001FB82", + "\U0001FB83", "\u2583", "\U0001FB7F", "\u2596", + "\u259D", "\u2518", "\u2598", "\u259A", + /* duplicate of 0x60 - 0x7F */ + "\u2500", "\u2660", "\U0001FB72", "\U0001FB78", + "\U0001FB77", "\U0001FB76", "\U0001FB7A", "\U0001FB71", + "\U0001FB74", "\u256E", "\u2570", "\u256F", + "\U0001FB7C", "\u2572", "\u2571", "\U0001FB7D", + "\U0001FB7E", "\u2022", "\U0001FB7B", "\u2665", + "\U0001FB70", "\u256D", "\u2573", "\u25CB", + "\u2663", "\U0001FB75", "\u2666", "\u253C", + "\U0001FB8C", "\U0001FB73", "\u03C0", "\u25E5", + /* duplicate of 0xA0 - 0xBE */ + "\u00A0", "\u258C", "\u2584", "\u2594", + "\u2581", "\u258E", "\u2592", "\u2595", + "\U0001FB8F", "\u25E4", "\U0001FB87", "\u251C", + "\u2597", "\u2514", "\u2510", "\u2582", + "\u250C", "\u2534", "\u252C", "\u2524", + "\u258E", "\u258D", "\U0001FB88", "\U0001FB82", + "\U0001FB83", "\u2583", "\U0001FB7F", "\u2596", + "\u259D", "\u2518", "\u2598", + /* duplicate of 0x7E */ + "\u03C0" +}; + +/* lower-case PETSCII character set, unicode representation */ +const char *lower[256] = { + /* 0x00 - 0x1F (control characters) */ + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + /* 0x20 - 0x3F */ + " ", "!", "\"", "#", "$", "%", "&", "'", + "(", ")", "*", "+", ",", "-", ".", "/", + "0", "1", "2", "3", "4", "5", "6", "7", + "8", "9", ":", ";", "<", "=", ">", "?", + /* 0x40 - 0x5F */ + "@", "a", "b", "c", "d", "e", "f", "g", + "h", "i", "j", "k", "l", "m", "n", "o", + "p", "q", "r", "s", "t", "u", "v", "w", + "x", "y", "z", "[", "\u00A3", "]", "\u2191", "\u2190", + /* 0x60 - 0x7F */ + "\u2500", "A", "B", "C", "D", "E", "F", "G", + "H", "I", "J", "K", "L", "M", "N", "O", + "P", "Q", "R", "S", "T", "U", "V", "W", + "X", "Y", "Z", "\u253C", + "\U0001FB8C", "\U0001FB73", "\U0001FB96", "\U0001FB98", + /* 0x80 - 0x9F (control characters) */ + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + /* 0xA0 - 0xBF */ + "\u00A0", "\u258C", "\u2584", "\u2594", + "\u2581", "\u258E", "\u2592", "\u2595", + "\U0001FB8F", "\U0001FB99", "\U0001FB87", "\u251C", + "\u2597", "\u2514", "\u2510", "\u2582", + "\u250C", "\u2534", "\u252C", "\u2524", + "\u258E", "\u258D", "\U0001FB88", "\U0001FB82", + "\U0001FB83", "\u2583", "\u2713", "\u2596", + "\u259D", "\u2518", "\u2598", "\u259A", + /* duplicate of 0x60 - 0x7F */ + "\u2500", "A", "B", "C", "D", "E", "F", "G", + "H", "I", "J", "K", "L", "M", "N", "O", + "P", "Q", "R", "S", "T", "U", "V", "W", + "X", "Y", "Z", "\u253C", + "\U0001FB8C", "\U0001FB73", "\U0001FB96", "\U0001FB98", + /* duplicate of 0xA0 - 0xBE */ + "\u00A0", "\u258C", "\u2584", "\u2594", + "\u2581", "\u258E", "\u2592", "\u2595", + "\U0001FB8F", "\U0001FB99", "\U0001FB87", "\u251C", + "\u2597", "\u2514", "\u2510", "\u2582", + "\u250C", "\u2534", "\u252C", "\u2524", + "\u258E", "\u258D", "\U0001FB88", "\U0001FB82", + "\U0001FB83", "\u2583", "\u2713", "\u2596", + "\u259D", "\u2518", "\u2598", + /* duplicate of 0x7E */ + "\U0001FB96" +}; + void listprg(const char *file) { struct stat st; unsigned char *p, *q, *s; + const char **charset; void *m; size_t len; int fd, i, n; @@ -61,6 +186,7 @@ listprg(const char *file) if (m == MAP_FAILED) err(1, "%s: mmap", file); + charset = lflag ? lower : upper; for (p = m, q = p + len; p < q; p++) { if (p[1] == 0) { /* high byte of next ptr */ if (p[0] != 0) @@ -78,14 +204,20 @@ listprg(const char *file) for (p += 4; *p != 0; p++) { if (*p == '"') instring = !instring; - if (instring) - putchar(*p); - else if (*p > 127 && !xflag) - fputs(tokens[*p - 128], stdout); - else if (*p >= 32 && *p < 127) /* isprint() */ - putchar(*p); - else - printf("{%2X}", *p); + + if (instring || *p < 128) { + if (rflag) + putchar(*p); + else if (charset[*p] != NULL) + fputs(charset[*p], stdout); + else + printf("{%2X}", *p); + } else { + if (xflag) + printf("{%2X}", *p); + else + fputs(tokens[*p - 128], stdout); + } } putchar('\n'); } @@ -109,8 +241,14 @@ main(int argc, char *argv[]) { int i, c; - while ((c = getopt(argc, argv, "vx")) != -1) { + while ((c = getopt(argc, argv, "lrvx")) != -1) { switch (c) { + case 'l': + lflag = 1; + break; + case 'r': + rflag = 1; + break; case 'v': vflag = 1; break;