+static void* xmalloc(size_t size)
+{
+ void* ptr = malloc(size);
+ if (ptr == NULL) {
+ // LCOV_EXCL_START
+ // exclude from coverage analysis because we can't simulate an out of memory error in testing
+ fprintf(stderr, "Out of memory!\n");
+ exit(EXIT_FAILURE);
+ // LCOV_EXCL_STOP
+ }
+ return (ptr);
+}
+
+void packed_to_token(long packed, char token[TOKLEN + 1])
+{
+ // The advent->ascii mapping.
+ const char advent_to_ascii[] = {
+ ' ', '!', '"', '#', '$', '%', '&', '\'',
+ '(', ')', '*', '+', ',', '-', '.', '/',
+ '0', '1', '2', '3', '4', '5', '6', '7',
+ '8', '9', ':', ';', '<', '=', '>', '?',
+ '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
+ 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
+ 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
+ 'X', 'Y', 'Z', '\0', '\0', '\0', '\0', '\0',
+ };
+
+ // Unpack and map back to ASCII.
+ for (int i = 0; i < 5; ++i) {
+ char advent = (packed >> i * 6) & 63;
+ token[i] = advent_to_ascii[(int) advent];
+ }
+
+ // Ensure the last character is \0.
+ token[5] = '\0';
+
+ // Replace trailing whitespace with \0.
+ for (int i = 4; i >= 0; --i) {
+ if (token[i] == ' ' ||
+ token[i] == '\t')
+ token[i] = '\0';
+ else
+ break;
+ }
+}
+
+long token_to_packed(const char token[])
+{
+ const char ascii_to_advent[] = {
+ 63, 63, 63, 63, 63, 63, 63, 63,
+ 63, 63, 63, 63, 63, 63, 63, 63,
+ 63, 63, 63, 63, 63, 63, 63, 63,
+ 63, 63, 63, 63, 63, 63, 63, 63,
+
+ 0, 1, 2, 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23,
+ 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 33, 34, 35, 36, 37, 38, 39,
+ 40, 41, 42, 43, 44, 45, 46, 47,
+ 48, 49, 50, 51, 52, 53, 54, 55,
+ 56, 57, 58, 59, 60, 61, 62, 63,
+
+ 63, 63, 63, 63, 63, 63, 63, 63,
+ 63, 63, 63, 63, 63, 63, 63, 63,
+ 63, 63, 63, 63, 63, 63, 63, 63,
+ 63, 63, 63, 63, 63, 63, 63, 63,
+ };
+
+ size_t t_len = strlen(token);
+ if (t_len > TOKLEN)
+ t_len = TOKLEN;
+ long packed = 0;
+ for (size_t i = 0; i < t_len; ++i) {
+ char mapped = ascii_to_advent[(int) toupper(token[i])];
+ packed |= (mapped << (6 * i));
+ }
+ return (packed);
+}
+
+void tokenize(char* raw, struct command_t *cmd)
+{
+ memset(cmd, '\0', sizeof(struct command_t));
+
+ /* Bound prefix on the %s would be needed to prevent buffer
+ * overflow. but we shortstop this more simply by making each
+ * raw-input buffer as long as the enrire inout buffer. */
+ sscanf(raw, "%s%s", cmd->raw1, cmd->raw2);
+
+ // pack the substrings
+ cmd->wd1 = token_to_packed(cmd->raw1);
+ cmd->wd2 = token_to_packed(cmd->raw2);
+
+ /* (ESR) In oldstyle mode, simulate the uppercasing and truncating
+ * effect on raw tokens of packing them into sixbit characters, 5
+ * to a 32-bit word. This is something the FORTRAN version did
+ * becuse archaic FORTRAN had no string types. Don Wood's
+ * mechanical translation of 2.5 to C retained the packing and
+ * thus this misfeature.
+ *
+ * It's philosophically questionable whether this is the right
+ * thing to do even in oldstyle mode. On one hand, the text
+ * mangling was not authorial intent, but a result of limitations
+ * in their tools. On the other, not simulating this misbehavior
+ * goes against the goal of making oldstyle as accurate as
+ * possible an emulation of the original UI.
+ */
+ if (settings.oldstyle) {
+ cmd->raw1[TOKLEN + TOKLEN] = cmd->raw2[TOKLEN + TOKLEN] = '\0';
+ for (size_t i = 0; i < strlen(cmd->raw1); i++)
+ cmd->raw1[i] = toupper(cmd->raw1[i]);
+ for (size_t i = 0; i < strlen(cmd->raw2); i++)
+ cmd->raw2[i] = toupper(cmd->raw2[i]);
+ }
+}
+
+/* Hide the fact that wods are corrently packed longs */
+
+void wordclear(token_t *v)
+{
+ *v = 0;