From 682972020b2828867b4cffc80eb2dbe0e06ce93b Mon Sep 17 00:00:00 2001 From: katherine Date: Sun, 12 May 2019 16:07:11 -0700 Subject: implement internal parser --- src/tok.c | 111 ++++++++++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 75 insertions(+), 36 deletions(-) (limited to 'src/tok.c') diff --git a/src/tok.c b/src/tok.c index 12553a9..04d1eb8 100644 --- a/src/tok.c +++ b/src/tok.c @@ -3,19 +3,19 @@ #include #include -#define TOK_MAX_LEN 128 - static char val[TOK_MAX_LEN]; -static size_t vlen = 0; -static struct tok_s curtok = { .line = 1, .col = 1, .val = val }; +static size_t vlen; +static struct tok_s curtok = { .val = val }; +static FILE *curf; +static bool unget; -static bool sub_eat_spaces(FILE *f) +static bool sub_eat_spaces(void) { int c; bool seen = false; while (true) { - c = getc(f); + c = getc(curf); if (c == '\n') { curtok.col = 1; @@ -24,7 +24,7 @@ static bool sub_eat_spaces(FILE *f) } if (!isspace(c)) { - ungetc(c, f); + ungetc(c, curf); break; } @@ -36,19 +36,19 @@ static bool sub_eat_spaces(FILE *f) return seen; } -static bool sub_eat_comment(FILE *f) +static bool sub_eat_comment(void) { int c; - c = getc(f); + c = getc(curf); if (c != '#') { - ungetc(c, f); + ungetc(c, curf); return false; } while (true) { - c = getc(f); + c = getc(curf); if (c == '\n') { curtok.col = 1; @@ -57,13 +57,13 @@ static bool sub_eat_comment(FILE *f) } if (c == EOF) { - ungetc(c, f); + ungetc(c, curf); return true; } } } -static void sub_match_op(FILE *f) +static void sub_match_op(void) { struct { bool possible; @@ -71,6 +71,7 @@ static void sub_match_op(FILE *f) char name[(32 < TOK_MAX_LEN ? 32 : TOK_MAX_LEN)]; } ops[] = { { true, TOK_OP_STRUCT, ".struct" }, + { true, TOK_OP_UNION, ".union" }, { true, TOK_OP_HKEY_SIZE, ".hash-key-size" }, { true, TOK_OP_HKEY_NAME, ".hash-key-name" }, { true, TOK_OP_FUN_SUF, ".function-suffix" }, @@ -84,10 +85,10 @@ static void sub_match_op(FILE *f) for (i = 1;; i++) { again = false; - c = getc(f); + c = getc(curf); if (c == EOF || isspace(c)) { - ungetc(c, f); + ungetc(c, curf); curtok.type = TOK_UNKNWN; val[vlen] = '\0'; return; @@ -96,7 +97,7 @@ static void sub_match_op(FILE *f) val[vlen] = c; vlen++; - for (j = 0; j < 4; j++) { + for (j = 0; j < 5; j++) { if (!ops[j].possible) continue; @@ -119,9 +120,9 @@ static void sub_match_op(FILE *f) do { val[vlen] = c; vlen++; - c = getc(f); + c = getc(curf); } while (c != EOF && !isspace(c) && vlen < TOK_MAX_LEN - 1); - ungetc(c, f); + ungetc(c, curf); val[vlen] = '\0'; curtok.type = TOK_UNKNWN; return; @@ -129,17 +130,17 @@ static void sub_match_op(FILE *f) } } -static void sub_match_uint(FILE *f) +static void sub_match_uint(void) { int c; curtok.type = TOK_UINT; while (true) { - c = getc(f); + c = getc(curf); if (!isdigit(c)) { - ungetc(c, f); + ungetc(c, curf); val[vlen] = '\0'; return; } @@ -149,17 +150,17 @@ static void sub_match_uint(FILE *f) } } -static void sub_match_id(FILE *f) +static void sub_match_id(void) { int c; curtok.type = TOK_ID; while (true) { - c = getc(f); + c = getc(curf); if (!isalnum(c) && c != '_') { - ungetc(c, f); + ungetc(c, curf); val[vlen] = '\0'; return; } @@ -169,72 +170,104 @@ static void sub_match_id(FILE *f) } } -struct tok_s tok_get(FILE *f) +void tok_reset(FILE *f) +{ + curf = f; + curtok.line = 1; + curtok.col = 1; + vlen = 0; + unget = false; +} + +struct tok_s tok_get(void) { int c; + if (unget) { + unget = false; + return curtok; + } + curtok.col += vlen; vlen = 0; -eat: - if (sub_eat_spaces(f)) - goto eat; - if (sub_eat_comment(f)) - goto eat; + while (sub_eat_spaces() || sub_eat_comment()); - c = getc(f); + c = getc(curf); switch (c) { case '{': curtok.type = TOK_LBRACE; vlen = 1; + val[0] = '{'; + val[1] = '\0'; return curtok; case '}': curtok.type = TOK_RBRACE; vlen = 1; + val[0] = '}'; + val[1] = '\0'; return curtok; case '=': curtok.type = TOK_EQUAL; vlen = 1; + val[0] = '='; + val[1] = '\0'; return curtok; case ',': curtok.type = TOK_COMMA; vlen = 1; + val[0] = ','; + val[1] = '\0'; return curtok; case '!': curtok.type = TOK_BANG; vlen = 1; + val[0] = '!'; + val[1] = '\0'; return curtok; case '?': curtok.type = TOK_QMARK; vlen = 1; + val[0] = '?'; + val[1] = '\0'; + return curtok; + + case '*': + curtok.type = TOK_ASTERISK; + vlen = 1; + val[0] = '*'; + val[1] = '\0'; return curtok; case EOF: + ungetc(c, curf); curtok.type = TOK_END; + vlen = 0; + val[0] = '\0'; return curtok; case '.': - sub_match_op(f); + sub_match_op(); return curtok; default: if (isdigit(c)) { val[0] = c; vlen = 1; - sub_match_uint(f); + sub_match_uint(); return curtok; } if (isalpha(c) || c == '_') { val[0] = c; vlen = 1; - sub_match_id(f); + sub_match_id(); return curtok; } @@ -243,10 +276,10 @@ eat: do { val[vlen] = c; vlen++; - c = getc(f); + c = getc(curf); } while (c != EOF && !isspace(c) && vlen < TOK_MAX_LEN - 1); - ungetc(c, f); + ungetc(c, curf); val[vlen] = '\0'; return curtok; @@ -254,3 +287,9 @@ eat: return curtok; } + +void tok_unget(struct tok_s t) +{ + unget = true; + curtok = t; +} -- cgit v1.2.3