From 682972020b2828867b4cffc80eb2dbe0e06ce93b Mon Sep 17 00:00:00 2001
From: katherine <shmibs@airen-no-jikken.icu>
Date: Sun, 12 May 2019 16:07:11 -0700
Subject: implement internal parser

---
 src/err.h   |  14 +-
 src/main.c  |  43 ++----
 src/parse.c | 439 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/parse.h |  62 +++++++++
 src/tok.c   | 111 ++++++++++-----
 src/tok.h   |  37 ++---
 6 files changed, 622 insertions(+), 84 deletions(-)

diff --git a/src/err.h b/src/err.h
index 3d3262f..5a3b9bc 100644
--- a/src/err.h
+++ b/src/err.h
@@ -6,11 +6,17 @@
 
 #define ERR(...) \
 	do { \
-		fprintf(stderr, "error: " __VA_ARGS__); \
+		fprintf(stderr, "\e[1;31merror:\e[0m " __VA_ARGS__); \
 		fprintf(stderr, "\n"); \
 		exit(EXIT_FAILURE); \
 	} while (0)
 
+#define WARN(...) \
+	do { \
+		fprintf(stderr, "\e[1;35mwarning:\e[0m " __VA_ARGS__); \
+		fprintf(stderr, "\n"); \
+	} while (0)
+
 #define TRY(cond, ...) \
 	do { \
 		if (!(cond)) { \
@@ -24,4 +30,10 @@
 		TRY((dest) != NULL, "could not allocate memory"); \
 	} while (0)
 
+#define TRYREALLOC(dest, count) \
+	do { \
+		(dest) = realloc((dest), (count) * sizeof(*(dest))); \
+		TRY((dest) != NULL, "could not allocate memory"); \
+	} while (0)
+
 #endif
diff --git a/src/main.c b/src/main.c
index ed9c413..a101849 100644
--- a/src/main.c
+++ b/src/main.c
@@ -1,48 +1,27 @@
-#include "opt.h"
 #include "err.h"
-#include "tok.h"
-
-#include <stdio.h>
-#include <errno.h>
-
-
-
+#include "opt.h"
+#include "parse.h"
 
 int main(int argc, char **argv)
 {
 	FILE *fi = stdin;
-	struct tok_s t;
+	const char *finame = "stdin";
+	struct parse_result_s pr;
 
 	opt_parse(argc, argv);
 
 	if (opt_infile_str() != NULL) {
-		fi = fopen(opt_infile_str(), "r");
-		TRY(fi != NULL, "could not read file `%s`", opt_infile_str());
+		finame = opt_infile_str();
+		fi = fopen(finame, "r");
+		TRY(fi != NULL, "could not read file `%s`", finame);
 	}
 
-	while (1) {
-		t = tok_get(fi);
-
-		if (t.type == TOK_UNKNWN || t.type == TOK_END)
-			break;
-
-		printf("%s:%zu:%zu: ", (fi == stdin ? "stdin" : opt_infile_str()),
-				t.line, t.col);
+	pr = parse(fi, finame);
 
-		if (t.type > TOK_QMARK) {
-			printf("%u, `%s`\n", t.type, t.val);
-		} else {
-			printf("%u\n", t.type);
-		}
-	};
-
-	if (t.type == TOK_UNKNWN) {
-		printf("%s:%zu:%zu: error: unrecognised token `%s`\n",
-				(fi == stdin ? "stdin" : opt_infile_str()),
-				t.line, t.col, t.val);
-	}
+	if (fi != stdin)
+		fclose(fi);
 
-	fclose(fi);
+	parse_result_wipe(&pr);
 
 	return 0;
 }
diff --git a/src/parse.c b/src/parse.c
index e69de29..4a6aff8 100644
--- a/src/parse.c
+++ b/src/parse.c
@@ -0,0 +1,439 @@
+#include "parse.h"
+#include "err.h"
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+
+static const char *curfname;
+
+#define ERR_AT(l, c, ...) \
+	do { \
+		fprintf(stderr, "\e[1m%s:%zu:%zu:\e[0m ", \
+				curfname, (l), (c)); \
+		ERR(__VA_ARGS__); \
+	} while (0)
+
+#define ERR_END(t) \
+	do { \
+		if ((t).type == TOK_END) \
+			ERR_AT((t).line, (t).col, "unexpected end of file"); \
+	} while (0)
+
+#define WARN_AT(l, c, ...) \
+	do { \
+		fprintf(stderr, "\e[1m%s:%zu:%zu:\e[0m ", \
+				curfname, (l), (c)); \
+		WARN(__VA_ARGS__); \
+	} while (0)
+
+static struct parse_result_s r;
+
+static enum parse_type_e sub_parse_type(void)
+{
+	struct tok_s t = tok_get();
+	enum parse_type_e type = PARSE_TYPE_BOOL;
+	size_t l, c;
+
+	l = t.line;
+	c = t.col;
+
+	if (!strcmp(t.val, "array") ) {
+		type = PARSE_TYPE_ARRAY_BOOL;
+		t = tok_get();
+	} else if (!strcmp(t.val, "hash") ) {
+		type = PARSE_TYPE_HASH_BOOL;
+		t = tok_get();
+	}
+
+	ERR_END(t);
+
+	if (t.type != TOK_ID) {
+		if (type >= PARSE_TYPE_HASH_BOOL)
+			ERR_AT(l, c, "invalid type `hash %s`", t.val);
+		else if (type >= PARSE_TYPE_ARRAY_BOOL)
+			ERR_AT(l, c, "invalid type `array %s`", t.val);
+		else
+			ERR_AT(l, c, "invalid type `%s`", t.val);
+	}
+
+	if (!strcmp(t.val, "bool") || !strcmp(t.val, "boolean"))
+		return type;
+
+	if (!strcmp(t.val, "string"))
+		return type + PARSE_TYPE_STRING;
+
+	if (!strcmp(t.val, "char")) {
+		t = tok_get();
+
+		ERR_END(t);
+
+		if (t.type != TOK_ASTERISK) {
+			if (type >= PARSE_TYPE_HASH_BOOL)
+				ERR_AT(l, c, "invalid type `hash char %s`", t.val);
+			else if (type >= PARSE_TYPE_ARRAY_BOOL)
+				ERR_AT(l, c, "invalid type `array char %s`", t.val);
+			else
+				ERR_AT(l, c, "invalid type `char %s`", t.val);
+		}
+
+		return type + PARSE_TYPE_STRING;
+	}
+
+	if (!strcmp(t.val, "int") || !strcmp(t.val, "integer"))
+		return type + PARSE_TYPE_INT;
+
+	if (!strcmp(t.val, "uint"))
+		return type + PARSE_TYPE_UINT;
+
+	if (!strcmp(t.val, "unsigned")) {
+		l = t.line;
+		c = t.col;
+		t = tok_get();
+
+		if (t.type != TOK_ID
+				|| (strcmp(t.val, "int") && strcmp(t.val, "integer"))
+		) {
+			tok_unget(t);
+		}
+
+		return type + PARSE_TYPE_UINT;
+	}
+
+	tok_unget(t);
+
+	return type + PARSE_TYPE_DEFTYPE;
+}
+
+static void sub_parse_deftype(size_t line, size_t col, bool is_union)
+{
+	struct tok_s t;
+	enum parse_type_e type;
+	struct parse_deftype_s *dtp;
+	struct parse_deftype_s dt = {
+		.line = line,
+		.col = col,
+		.is_union = is_union,
+		.member_list_len = 0,
+	};
+	unsigned i, j;
+
+	t = tok_get();
+	ERR_END(t);
+	if (t.type != TOK_ID) {
+		ERR_AT(t.line, t.col, "unexpected token `%s` (expected %s name)",
+				t.val, (dt.is_union ? "union" : "struct"));
+	}
+
+	HASH_FIND_STR(r.deftypes, t.val, dtp);
+	if (dtp != NULL) {
+		ERR_AT(dt.line, dt.col,
+				"type `%s` redefined (previous definition was at line %zu)",
+				t.val, dtp->line);
+	}
+
+	strcpy(dt.name, t.val);
+
+	t = tok_get();
+	ERR_END(t);
+	if (t.type != TOK_LBRACE)
+		ERR_AT(t.line, t.col, "unexpected token `%s` (expected `{`)", t.val);
+
+	while (true) {
+		if (dt.member_list_len == PARSE_DEFTYPE_MAX_LEN) {
+			ERR_AT(dt.line, dt.col, "%s %s has too many members",
+					(dt.is_union ? "union" : "struct"), dt.name);
+		}
+
+		t = tok_get();
+		if (t.type == TOK_RBRACE) {
+			if (dt.member_list_len < 2) {
+				ERR_AT(dt.line, dt.col, "%s `%s` must specify at fewest two members",
+						(dt.is_union ? "union" : "struct"), dt.name);
+			}
+
+			break;
+		}
+
+		tok_unget(t);
+
+		type = sub_parse_type();
+
+		if (type == PARSE_TYPE_DEFTYPE
+				|| type == PARSE_TYPE_ARRAY_DEFTYPE
+				|| type == PARSE_TYPE_HASH_DEFTYPE
+		) {
+			t = tok_get();
+			ERR_AT(t.line, t.col, "defined types may not contain other defined types");
+		}
+
+		t = tok_get();
+		ERR_END(t);
+		if (t.type != TOK_ID) {
+			if (t.type == TOK_RBRACE || t.type == TOK_COMMA) {
+				ERR_AT(t.line, t.col, "missing member name in %s `%s'",
+						(dt.is_union ? "union" : "struct"), dt.name);
+			}
+
+			ERR_AT(t.line, t.col, "bad %s member name `%s`",
+					(dt.is_union ? "union" : "struct"), t.val);
+		}
+
+		strcpy(dt.member_name_list[dt.member_list_len], t.val);
+		dt.member_type_list[dt.member_list_len] = type;
+		dt.member_list_len++;
+
+		t = tok_get();
+		if (t.type != TOK_COMMA)
+			tok_unget(t);
+	}
+
+	for (i = 0; i < dt.member_list_len; i++) {
+		for (j = i + 1; j < dt.member_list_len; j++) {
+			if (!strcmp(dt.member_name_list[i], dt.member_name_list[j]) ) {
+				ERR_AT(dt.line, dt.col, "%s `%s` contains multiple members named %s",
+						(dt.is_union ? "union" : "struct"), dt.name,
+						dt.member_name_list[i]);
+			}
+
+		}
+	}
+
+	TRYALLOC(dtp, 1);
+	memcpy(dtp, &dt, sizeof(*dtp));
+
+	HASH_ADD_STR(r.deftypes, name, dtp);
+}
+
+static bool sub_parse_op(void)
+{
+	struct tok_s t = tok_get();
+
+	switch (t.type) {
+	case TOK_OP_STRUCT:
+		sub_parse_deftype(t.line, t.col, false);
+		return true;
+
+	case TOK_OP_UNION:
+		sub_parse_deftype(t.line, t.col, true);
+		return true;
+
+	case TOK_OP_FUN_SUF:
+		if (r.fun_suf_seen) {
+			WARN_AT(t.line, t.col,
+					"function-suffix redefined (previous value was `%s`)",
+					r.fun_suf);
+		}
+
+		t = tok_get();
+		ERR_END(t);
+		if (t.type != TOK_ID)
+			ERR_AT(t.line, t.col, "invalid function-suffix `%s`", t.val);
+
+		strcpy(r.fun_suf, t.val);
+
+		r.fun_suf_seen = true;
+
+		return true;
+
+	case TOK_OP_HKEY_SIZE:
+		if (r.hkey_size_seen) {
+			WARN_AT(t.line, t.col,
+					"hkey_size redefined (previous value was %lu)",
+					r.hkey_size);
+		}
+
+		t = tok_get();
+		ERR_END(t);
+		if (t.type != TOK_UINT)
+			ERR_AT(t.line, t.col, "invalid hkey_size `%s`", t.val);
+
+		r.hkey_size = strtoul(t.val, NULL, 10);
+
+		r.hkey_size_seen = true;
+
+		return true;
+
+	case TOK_OP_HKEY_NAME:
+		if (r.hkey_name_seen) {
+			ERR_AT(t.line, t.col,
+					"hkey_name redefined (previous value was `%s`)",
+					r.hkey_name);
+		}
+
+		t = tok_get();
+		ERR_END(t);
+		if (t.type != TOK_ID)
+			ERR_AT(t.line, t.col, "invalid hkey_name `%s`", t.val);
+
+		strcpy(r.hkey_name, t.val);
+
+		r.hkey_name_seen = true;
+
+		return true;
+
+	default:
+		tok_unget(t);
+		return false;
+	}
+}
+
+static bool sub_parse_rule(void)
+{
+	struct parse_var_s *vp;
+	struct parse_var_s v;
+	struct tok_s t = tok_get();
+
+	if (t.type != TOK_BANG) {
+		if (t.type != TOK_QMARK) {
+			tok_unget(t);
+			return false;
+		}
+		v.is_required = false;
+	} else {
+		v.is_required = true;
+	}
+
+	v.line = t.line;
+	v.col = t.col;
+
+	t = tok_get();
+	ERR_END(t);
+	if (t.type != TOK_ID) {
+		ERR_AT(t.line, t.col,
+				"unexpected token `%s`, (expected variable name)", t.val);
+	}
+
+	HASH_FIND_STR(r.vars, t.val, vp);
+	if (vp != NULL) {
+		ERR_AT(v.line, v.col,
+				"`%s` redefined (previous definition was at line %zu)",
+				t.val, vp->line);
+	}
+
+	strcpy(v.name, t.val);
+
+	t = tok_get();
+	ERR_END(t);
+	if (t.type != TOK_EQUAL)
+		ERR_AT(t.line, t.col, "unexpected token `%s`, (expected `=`)", t.val);
+
+	v.type = sub_parse_type();
+
+	if (v.type == PARSE_TYPE_DEFTYPE
+			|| v.type == PARSE_TYPE_ARRAY_DEFTYPE
+			|| v.type == PARSE_TYPE_HASH_DEFTYPE
+	) {
+		t = tok_get();
+		strcpy(v.deftype_name, t.val);
+	}
+
+	TRYALLOC(vp, 1);
+	memcpy(vp, &v, sizeof(*vp));
+	HASH_ADD_STR(r.vars, name, vp);
+
+	return true;
+}
+
+struct parse_result_s parse(FILE *f, const char *fname)
+{
+	size_t i, j;
+	struct tok_s t;
+
+	struct parse_var_s *vcur, *vtmp;
+	struct parse_deftype_s *d;
+
+	r.hkey_size = 16;
+	strcpy(r.hkey_name, "id");
+	r.hkey_name_seen = false;
+	r.hkey_size_seen = false;
+	r.fun_suf_seen = false;
+	r.deftypes = NULL;
+	r.vars = NULL;
+	curfname = fname;
+
+	tok_reset(f);
+
+	while (sub_parse_op() || sub_parse_rule());
+
+	t = tok_get();
+	if (t.type != TOK_END) {
+		if (t.type == TOK_UNKNWN)
+			ERR_AT(t.line, t.col, "unrecognised token `%s`", t.val);
+		else
+			ERR_AT(t.line, t.col, "unexpected token `%s`", t.val);
+	}
+
+	HASH_ITER(hh, r.vars, vcur, vtmp) {
+		switch (vcur->type) {
+		case PARSE_TYPE_DEFTYPE:
+		case PARSE_TYPE_ARRAY_DEFTYPE:
+		case PARSE_TYPE_HASH_DEFTYPE:
+			HASH_FIND_STR(r.deftypes, vcur->deftype_name, d);
+			if (d == NULL) {
+				ERR_AT(vcur->line, vcur->col,
+						"rule for variable `%s` references undefined type `%s`",
+						vcur->name, vcur->deftype_name);
+			}
+		default:
+			continue;
+		}
+	}
+
+	if (!r.fun_suf_seen) {
+
+		j = 0;
+
+		for (i = strlen(fname); i > 0 && fname[i] != '/'
+				&& fname[i] != '\\'; i--);
+
+		j = i + (fname[i] == '/' || fname[i] == '\\');
+
+		for (i = j; fname[i] != '\0' && fname[i] != '.'
+				&& i - j < r.hkey_size; i++) {
+			if (!isalnum(fname[i])) {
+				fprintf(stderr, "\e[1m%s:\e[0m ", fname);
+				ERR("no function suffix specified, and could not generate one");
+			}
+			r.fun_suf[i - j] = fname[i];
+		}
+
+		r.fun_suf[i - j] = '\0';
+
+		if (r.fun_suf[0] == '\0') {
+			fprintf(stderr, "\e[1m%s:\e[0m ", fname);
+			ERR("no function suffix specified, and could not generate one");
+		}
+
+		fprintf(stderr, "\e[1m%s:\e[0m ", fname);
+		WARN("no function suffix specified. using `%s`...", r.fun_suf);
+	}
+
+	return r;
+}
+
+void parse_result_wipe(struct parse_result_s *r)
+{
+	struct parse_var_s *vcur, *vtmp;
+	struct parse_deftype_s *dcur, *dtmp;
+
+	assert(r != NULL);
+
+	if (r->vars != NULL) {
+		HASH_ITER(hh, r->vars, vcur, vtmp) {
+			HASH_DEL(r->vars, vcur);
+			free(vcur);
+		}
+	}
+
+	if (r->deftypes != NULL) {
+		HASH_ITER(hh, r->deftypes, dcur, dtmp) {
+			HASH_DEL(r->deftypes, dcur);
+			free(dcur);
+		}
+	}
+
+}
diff --git a/src/parse.h b/src/parse.h
index 9f9b4e3..1bc6710 100644
--- a/src/parse.h
+++ b/src/parse.h
@@ -1,6 +1,68 @@
 #ifndef CONFCONF_PARSE_H
 #define CONFCONF_PARSE_H
 
+#include "tok.h"
 
+#include <uthash.h>
+
+#include <stdbool.h>
+
+#define PARSE_DEFTYPE_MAX_LEN 32
+
+enum parse_type_e {
+	PARSE_TYPE_BOOL          = 0,
+	PARSE_TYPE_STRING        = 1,
+	PARSE_TYPE_INT           = 2,
+	PARSE_TYPE_UINT          = 3,
+	PARSE_TYPE_DEFTYPE       = 4,
+
+	PARSE_TYPE_ARRAY_BOOL    = 5,
+	PARSE_TYPE_ARRAY_STRING  = 6,
+	PARSE_TYPE_ARRAY_INT     = 7,
+	PARSE_TYPE_ARRAY_UINT    = 8,
+	PARSE_TYPE_ARRAY_DEFTYPE = 9,
+
+	PARSE_TYPE_HASH_BOOL     = 10,
+	PARSE_TYPE_HASH_STRING   = 11,
+	PARSE_TYPE_HASH_INT      = 12,
+	PARSE_TYPE_HASH_UINT     = 13,
+	PARSE_TYPE_HASH_DEFTYPE  = 14,
+};
+
+struct parse_deftype_s {
+	char name[TOK_MAX_LEN];
+	size_t line;
+	size_t col;
+	bool is_union;
+	unsigned member_list_len;
+	enum parse_type_e member_type_list[PARSE_DEFTYPE_MAX_LEN];
+	char member_name_list[PARSE_DEFTYPE_MAX_LEN][TOK_MAX_LEN];
+	UT_hash_handle hh;
+};
+
+struct parse_var_s {
+	char name[TOK_MAX_LEN];
+	size_t line;
+	size_t col;
+	bool is_required;
+	enum parse_type_e type;
+	char deftype_name[TOK_MAX_LEN];
+	UT_hash_handle hh;
+};
+
+struct parse_result_s {
+	unsigned long hkey_size;
+	bool hkey_size_seen;
+	char hkey_name[TOK_MAX_LEN];
+	bool hkey_name_seen;
+	char fun_suf[TOK_MAX_LEN];
+	bool fun_suf_seen;
+	struct parse_deftype_s *deftypes;
+	struct parse_var_s *vars;
+};
+
+struct parse_result_s parse(FILE *f, const char *fname);
+
+void parse_result_wipe(struct parse_result_s *r);
 
 #endif
diff --git a/src/tok.c b/src/tok.c
index 12553a9..04d1eb8 100644
--- a/src/tok.c
+++ b/src/tok.c
@@ -3,19 +3,19 @@
 #include <stdbool.h>
 #include <ctype.h>
 
-#define TOK_MAX_LEN 128
-
 static char val[TOK_MAX_LEN];
-static size_t vlen = 0;
-static struct tok_s curtok = { .line = 1, .col = 1, .val = val };
+static size_t vlen;
+static struct tok_s curtok = { .val = val };
+static FILE *curf;
+static bool unget;
 
-static bool sub_eat_spaces(FILE *f)
+static bool sub_eat_spaces(void)
 {
 	int c;
 	bool seen = false;
 
 	while (true) {
-		c = getc(f);
+		c = getc(curf);
 
 		if (c == '\n') {
 			curtok.col = 1;
@@ -24,7 +24,7 @@ static bool sub_eat_spaces(FILE *f)
 		}
 
 		if (!isspace(c)) {
-			ungetc(c, f);
+			ungetc(c, curf);
 			break;
 		}
 
@@ -36,19 +36,19 @@ static bool sub_eat_spaces(FILE *f)
 	return seen;
 }
 
-static bool sub_eat_comment(FILE *f)
+static bool sub_eat_comment(void)
 {
 	int c;
 
-	c = getc(f);
+	c = getc(curf);
 
 	if (c != '#') {
-		ungetc(c, f);
+		ungetc(c, curf);
 		return false;
 	}
 
 	while (true) {
-		c = getc(f);
+		c = getc(curf);
 
 		if (c == '\n') {
 			curtok.col = 1;
@@ -57,13 +57,13 @@ static bool sub_eat_comment(FILE *f)
 		}
 
 		if (c == EOF) {
-			ungetc(c, f);
+			ungetc(c, curf);
 			return true;
 		}
 	}
 }
 
-static void sub_match_op(FILE *f)
+static void sub_match_op(void)
 {
 	struct {
 		bool possible;
@@ -71,6 +71,7 @@ static void sub_match_op(FILE *f)
 		char name[(32 < TOK_MAX_LEN ? 32 : TOK_MAX_LEN)];
 	} ops[] = {
 		{ true, TOK_OP_STRUCT,    ".struct"          },
+		{ true, TOK_OP_UNION,     ".union"           },
 		{ true, TOK_OP_HKEY_SIZE, ".hash-key-size"   },
 		{ true, TOK_OP_HKEY_NAME, ".hash-key-name"   },
 		{ true, TOK_OP_FUN_SUF,   ".function-suffix" },
@@ -84,10 +85,10 @@ static void sub_match_op(FILE *f)
 
 	for (i = 1;; i++) {
 		again = false;
-		c = getc(f);
+		c = getc(curf);
 
 		if (c == EOF || isspace(c)) {
-			ungetc(c, f);
+			ungetc(c, curf);
 			curtok.type = TOK_UNKNWN;
 			val[vlen] = '\0';
 			return;
@@ -96,7 +97,7 @@ static void sub_match_op(FILE *f)
 		val[vlen] = c;
 		vlen++;
 
-		for (j = 0; j < 4; j++) {
+		for (j = 0; j < 5; j++) {
 			if (!ops[j].possible)
 				continue;
 
@@ -119,9 +120,9 @@ static void sub_match_op(FILE *f)
 			do {
 				val[vlen] = c;
 				vlen++;
-				c = getc(f);
+				c = getc(curf);
 			} while (c != EOF && !isspace(c) && vlen < TOK_MAX_LEN - 1);
-			ungetc(c, f);
+			ungetc(c, curf);
 			val[vlen] = '\0';
 			curtok.type = TOK_UNKNWN;
 			return;
@@ -129,17 +130,17 @@ static void sub_match_op(FILE *f)
 	}
 }
 
-static void sub_match_uint(FILE *f)
+static void sub_match_uint(void)
 {
 	int c;
 
 	curtok.type = TOK_UINT;
 
 	while (true) {
-		c = getc(f);
+		c = getc(curf);
 
 		if (!isdigit(c)) {
-			ungetc(c, f);
+			ungetc(c, curf);
 			val[vlen] = '\0';
 			return;
 		}
@@ -149,17 +150,17 @@ static void sub_match_uint(FILE *f)
 	}
 }
 
-static void sub_match_id(FILE *f)
+static void sub_match_id(void)
 {
 	int c;
 
 	curtok.type = TOK_ID;
 
 	while (true) {
-		c = getc(f);
+		c = getc(curf);
 
 		if (!isalnum(c) && c != '_') {
-			ungetc(c, f);
+			ungetc(c, curf);
 			val[vlen] = '\0';
 			return;
 		}
@@ -169,72 +170,104 @@ static void sub_match_id(FILE *f)
 	}
 }
 
-struct tok_s tok_get(FILE *f)
+void tok_reset(FILE *f)
+{
+	curf = f;
+	curtok.line = 1;
+	curtok.col = 1;
+	vlen = 0;
+	unget = false;
+}
+
+struct tok_s tok_get(void)
 {
 	int c;
 
+	if (unget) {
+		unget = false;
+		return curtok;
+	}
+
 	curtok.col += vlen;
 	vlen = 0;
 
-eat:
-	if (sub_eat_spaces(f))
-		goto eat;
-	if (sub_eat_comment(f))
-		goto eat;
+	while (sub_eat_spaces() || sub_eat_comment());
 
-	c = getc(f);
+	c = getc(curf);
 
 	switch (c) {
 	case '{':
 		curtok.type = TOK_LBRACE;
 		vlen = 1;
+		val[0] = '{';
+		val[1] = '\0';
 		return curtok;
 
 	case '}':
 		curtok.type = TOK_RBRACE;
 		vlen = 1;
+		val[0] = '}';
+		val[1] = '\0';
 		return curtok;
 
 	case '=':
 		curtok.type = TOK_EQUAL;
 		vlen = 1;
+		val[0] = '=';
+		val[1] = '\0';
 		return curtok;
 
 	case ',':
 		curtok.type = TOK_COMMA;
 		vlen = 1;
+		val[0] = ',';
+		val[1] = '\0';
 		return curtok;
 
 	case '!':
 		curtok.type = TOK_BANG;
 		vlen = 1;
+		val[0] = '!';
+		val[1] = '\0';
 		return curtok;
 
 	case '?':
 		curtok.type = TOK_QMARK;
 		vlen = 1;
+		val[0] = '?';
+		val[1] = '\0';
+		return curtok;
+
+	case '*':
+		curtok.type = TOK_ASTERISK;
+		vlen = 1;
+		val[0] = '*';
+		val[1] = '\0';
 		return curtok;
 
 	case EOF:
+		ungetc(c, curf);
 		curtok.type = TOK_END;
+		vlen = 0;
+		val[0] = '\0';
 		return curtok;
 
 	case '.':
-		sub_match_op(f);
+		sub_match_op();
 		return curtok;
 		
 	default:
 		if (isdigit(c)) {
 			val[0] = c;
 			vlen = 1;
-			sub_match_uint(f);
+			sub_match_uint();
 			return curtok;
 		}
 
 		if (isalpha(c) || c == '_') {
 			val[0] = c;
 			vlen = 1;
-			sub_match_id(f);
+			sub_match_id();
 			return curtok;
 		}
 
@@ -243,10 +276,10 @@ eat:
 		do {
 			val[vlen] = c;
 			vlen++;
-			c = getc(f);
+			c = getc(curf);
 		} while (c != EOF && !isspace(c) && vlen < TOK_MAX_LEN - 1);
 
-		ungetc(c, f);
+		ungetc(c, curf);
 		val[vlen] = '\0';
 
 		return curtok;
@@ -254,3 +287,9 @@ eat:
 
 	return curtok;
 }
+
+void tok_unget(struct tok_s t)
+{
+	unget = true;
+	curtok = t;
+}
diff --git a/src/tok.h b/src/tok.h
index 2e02c40..08099b3 100644
--- a/src/tok.h
+++ b/src/tok.h
@@ -3,21 +3,25 @@
 
 #include <stdio.h>
 
+#define TOK_MAX_LEN 128
+
 enum tok_type_e {
-	      TOK_LBRACE = 0,
-	      TOK_RBRACE = 1,
-	       TOK_EQUAL = 2,
-	       TOK_COMMA = 3,
-	        TOK_BANG = 4,
-	       TOK_QMARK = 5,
-	   TOK_OP_STRUCT = 6,
-	TOK_OP_HKEY_SIZE = 7,
-	TOK_OP_HKEY_NAME = 8,
-	  TOK_OP_FUN_SUF = 9,
-	        TOK_UINT = 10,
-	          TOK_ID = 11,
-	      TOK_UNKNWN = 12,
-	         TOK_END = 13,
+	TOK_LBRACE,
+	TOK_RBRACE,
+	TOK_EQUAL,
+	TOK_COMMA,
+	TOK_BANG,
+	TOK_QMARK,
+	TOK_ASTERISK,
+	TOK_OP_STRUCT,
+	TOK_OP_UNION,
+	TOK_OP_HKEY_SIZE,
+	TOK_OP_HKEY_NAME,
+	TOK_OP_FUN_SUF,
+	TOK_UINT,
+	TOK_ID,
+	TOK_UNKNWN,
+	TOK_END,
 };
 
 struct tok_s {
@@ -27,6 +31,9 @@ struct tok_s {
 	char *val;
 };
 
-struct tok_s tok_get(FILE *f);
+void tok_reset(FILE *f);
+
+struct tok_s tok_get(void);
+void tok_unget(struct tok_s t);
 
 #endif
-- 
cgit v1.2.3