diff options
-rw-r--r-- | src/err.h | 14 | ||||
-rw-r--r-- | src/main.c | 43 | ||||
-rw-r--r-- | src/parse.c | 439 | ||||
-rw-r--r-- | src/parse.h | 62 | ||||
-rw-r--r-- | src/tok.c | 111 | ||||
-rw-r--r-- | src/tok.h | 37 |
6 files changed, 622 insertions, 84 deletions
@@ -6,11 +6,17 @@ #define ERR(...) \ do { \ - fprintf(stderr, "error: " __VA_ARGS__); \ + fprintf(stderr, "\e[1;31merror:\e[0m " __VA_ARGS__); \ fprintf(stderr, "\n"); \ exit(EXIT_FAILURE); \ } while (0) +#define WARN(...) \ + do { \ + fprintf(stderr, "\e[1;35mwarning:\e[0m " __VA_ARGS__); \ + fprintf(stderr, "\n"); \ + } while (0) + #define TRY(cond, ...) \ do { \ if (!(cond)) { \ @@ -24,4 +30,10 @@ TRY((dest) != NULL, "could not allocate memory"); \ } while (0) +#define TRYREALLOC(dest, count) \ + do { \ + (dest) = realloc((dest), (count) * sizeof(*(dest))); \ + TRY((dest) != NULL, "could not allocate memory"); \ + } while (0) + #endif @@ -1,48 +1,27 @@ -#include "opt.h" #include "err.h" -#include "tok.h" - -#include <stdio.h> -#include <errno.h> - - - +#include "opt.h" +#include "parse.h" int main(int argc, char **argv) { FILE *fi = stdin; - struct tok_s t; + const char *finame = "stdin"; + struct parse_result_s pr; opt_parse(argc, argv); if (opt_infile_str() != NULL) { - fi = fopen(opt_infile_str(), "r"); - TRY(fi != NULL, "could not read file `%s`", opt_infile_str()); + finame = opt_infile_str(); + fi = fopen(finame, "r"); + TRY(fi != NULL, "could not read file `%s`", finame); } - while (1) { - t = tok_get(fi); - - if (t.type == TOK_UNKNWN || t.type == TOK_END) - break; - - printf("%s:%zu:%zu: ", (fi == stdin ? "stdin" : opt_infile_str()), - t.line, t.col); + pr = parse(fi, finame); - if (t.type > TOK_QMARK) { - printf("%u, `%s`\n", t.type, t.val); - } else { - printf("%u\n", t.type); - } - }; - - if (t.type == TOK_UNKNWN) { - printf("%s:%zu:%zu: error: unrecognised token `%s`\n", - (fi == stdin ? "stdin" : opt_infile_str()), - t.line, t.col, t.val); - } + if (fi != stdin) + fclose(fi); - fclose(fi); + parse_result_wipe(&pr); return 0; } diff --git a/src/parse.c b/src/parse.c index e69de29..4a6aff8 100644 --- a/src/parse.c +++ b/src/parse.c @@ -0,0 +1,439 @@ +#include "parse.h" +#include "err.h" + +#include <assert.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <ctype.h> + +static const char *curfname; + +#define ERR_AT(l, c, ...) \ + do { \ + fprintf(stderr, "\e[1m%s:%zu:%zu:\e[0m ", \ + curfname, (l), (c)); \ + ERR(__VA_ARGS__); \ + } while (0) + +#define ERR_END(t) \ + do { \ + if ((t).type == TOK_END) \ + ERR_AT((t).line, (t).col, "unexpected end of file"); \ + } while (0) + +#define WARN_AT(l, c, ...) \ + do { \ + fprintf(stderr, "\e[1m%s:%zu:%zu:\e[0m ", \ + curfname, (l), (c)); \ + WARN(__VA_ARGS__); \ + } while (0) + +static struct parse_result_s r; + +static enum parse_type_e sub_parse_type(void) +{ + struct tok_s t = tok_get(); + enum parse_type_e type = PARSE_TYPE_BOOL; + size_t l, c; + + l = t.line; + c = t.col; + + if (!strcmp(t.val, "array") ) { + type = PARSE_TYPE_ARRAY_BOOL; + t = tok_get(); + } else if (!strcmp(t.val, "hash") ) { + type = PARSE_TYPE_HASH_BOOL; + t = tok_get(); + } + + ERR_END(t); + + if (t.type != TOK_ID) { + if (type >= PARSE_TYPE_HASH_BOOL) + ERR_AT(l, c, "invalid type `hash %s`", t.val); + else if (type >= PARSE_TYPE_ARRAY_BOOL) + ERR_AT(l, c, "invalid type `array %s`", t.val); + else + ERR_AT(l, c, "invalid type `%s`", t.val); + } + + if (!strcmp(t.val, "bool") || !strcmp(t.val, "boolean")) + return type; + + if (!strcmp(t.val, "string")) + return type + PARSE_TYPE_STRING; + + if (!strcmp(t.val, "char")) { + t = tok_get(); + + ERR_END(t); + + if (t.type != TOK_ASTERISK) { + if (type >= PARSE_TYPE_HASH_BOOL) + ERR_AT(l, c, "invalid type `hash char %s`", t.val); + else if (type >= PARSE_TYPE_ARRAY_BOOL) + ERR_AT(l, c, "invalid type `array char %s`", t.val); + else + ERR_AT(l, c, "invalid type `char %s`", t.val); + } + + return type + PARSE_TYPE_STRING; + } + + if (!strcmp(t.val, "int") || !strcmp(t.val, "integer")) + return type + PARSE_TYPE_INT; + + if (!strcmp(t.val, "uint")) + return type + PARSE_TYPE_UINT; + + if (!strcmp(t.val, "unsigned")) { + l = t.line; + c = t.col; + t = tok_get(); + + if (t.type != TOK_ID + || (strcmp(t.val, "int") && strcmp(t.val, "integer")) + ) { + tok_unget(t); + } + + return type + PARSE_TYPE_UINT; + } + + tok_unget(t); + + return type + PARSE_TYPE_DEFTYPE; +} + +static void sub_parse_deftype(size_t line, size_t col, bool is_union) +{ + struct tok_s t; + enum parse_type_e type; + struct parse_deftype_s *dtp; + struct parse_deftype_s dt = { + .line = line, + .col = col, + .is_union = is_union, + .member_list_len = 0, + }; + unsigned i, j; + + t = tok_get(); + ERR_END(t); + if (t.type != TOK_ID) { + ERR_AT(t.line, t.col, "unexpected token `%s` (expected %s name)", + t.val, (dt.is_union ? "union" : "struct")); + } + + HASH_FIND_STR(r.deftypes, t.val, dtp); + if (dtp != NULL) { + ERR_AT(dt.line, dt.col, + "type `%s` redefined (previous definition was at line %zu)", + t.val, dtp->line); + } + + strcpy(dt.name, t.val); + + t = tok_get(); + ERR_END(t); + if (t.type != TOK_LBRACE) + ERR_AT(t.line, t.col, "unexpected token `%s` (expected `{`)", t.val); + + while (true) { + if (dt.member_list_len == PARSE_DEFTYPE_MAX_LEN) { + ERR_AT(dt.line, dt.col, "%s %s has too many members", + (dt.is_union ? "union" : "struct"), dt.name); + } + + t = tok_get(); + if (t.type == TOK_RBRACE) { + if (dt.member_list_len < 2) { + ERR_AT(dt.line, dt.col, "%s `%s` must specify at fewest two members", + (dt.is_union ? "union" : "struct"), dt.name); + } + + break; + } + + tok_unget(t); + + type = sub_parse_type(); + + if (type == PARSE_TYPE_DEFTYPE + || type == PARSE_TYPE_ARRAY_DEFTYPE + || type == PARSE_TYPE_HASH_DEFTYPE + ) { + t = tok_get(); + ERR_AT(t.line, t.col, "defined types may not contain other defined types"); + } + + t = tok_get(); + ERR_END(t); + if (t.type != TOK_ID) { + if (t.type == TOK_RBRACE || t.type == TOK_COMMA) { + ERR_AT(t.line, t.col, "missing member name in %s `%s'", + (dt.is_union ? "union" : "struct"), dt.name); + } + + ERR_AT(t.line, t.col, "bad %s member name `%s`", + (dt.is_union ? "union" : "struct"), t.val); + } + + strcpy(dt.member_name_list[dt.member_list_len], t.val); + dt.member_type_list[dt.member_list_len] = type; + dt.member_list_len++; + + t = tok_get(); + if (t.type != TOK_COMMA) + tok_unget(t); + } + + for (i = 0; i < dt.member_list_len; i++) { + for (j = i + 1; j < dt.member_list_len; j++) { + if (!strcmp(dt.member_name_list[i], dt.member_name_list[j]) ) { + ERR_AT(dt.line, dt.col, "%s `%s` contains multiple members named %s", + (dt.is_union ? "union" : "struct"), dt.name, + dt.member_name_list[i]); + } + + } + } + + TRYALLOC(dtp, 1); + memcpy(dtp, &dt, sizeof(*dtp)); + + HASH_ADD_STR(r.deftypes, name, dtp); +} + +static bool sub_parse_op(void) +{ + struct tok_s t = tok_get(); + + switch (t.type) { + case TOK_OP_STRUCT: + sub_parse_deftype(t.line, t.col, false); + return true; + + case TOK_OP_UNION: + sub_parse_deftype(t.line, t.col, true); + return true; + + case TOK_OP_FUN_SUF: + if (r.fun_suf_seen) { + WARN_AT(t.line, t.col, + "function-suffix redefined (previous value was `%s`)", + r.fun_suf); + } + + t = tok_get(); + ERR_END(t); + if (t.type != TOK_ID) + ERR_AT(t.line, t.col, "invalid function-suffix `%s`", t.val); + + strcpy(r.fun_suf, t.val); + + r.fun_suf_seen = true; + + return true; + + case TOK_OP_HKEY_SIZE: + if (r.hkey_size_seen) { + WARN_AT(t.line, t.col, + "hkey_size redefined (previous value was %lu)", + r.hkey_size); + } + + t = tok_get(); + ERR_END(t); + if (t.type != TOK_UINT) + ERR_AT(t.line, t.col, "invalid hkey_size `%s`", t.val); + + r.hkey_size = strtoul(t.val, NULL, 10); + + r.hkey_size_seen = true; + + return true; + + case TOK_OP_HKEY_NAME: + if (r.hkey_name_seen) { + ERR_AT(t.line, t.col, + "hkey_name redefined (previous value was `%s`)", + r.hkey_name); + } + + t = tok_get(); + ERR_END(t); + if (t.type != TOK_ID) + ERR_AT(t.line, t.col, "invalid hkey_name `%s`", t.val); + + strcpy(r.hkey_name, t.val); + + r.hkey_name_seen = true; + + return true; + + default: + tok_unget(t); + return false; + } +} + +static bool sub_parse_rule(void) +{ + struct parse_var_s *vp; + struct parse_var_s v; + struct tok_s t = tok_get(); + + if (t.type != TOK_BANG) { + if (t.type != TOK_QMARK) { + tok_unget(t); + return false; + } + v.is_required = false; + } else { + v.is_required = true; + } + + v.line = t.line; + v.col = t.col; + + t = tok_get(); + ERR_END(t); + if (t.type != TOK_ID) { + ERR_AT(t.line, t.col, + "unexpected token `%s`, (expected variable name)", t.val); + } + + HASH_FIND_STR(r.vars, t.val, vp); + if (vp != NULL) { + ERR_AT(v.line, v.col, + "`%s` redefined (previous definition was at line %zu)", + t.val, vp->line); + } + + strcpy(v.name, t.val); + + t = tok_get(); + ERR_END(t); + if (t.type != TOK_EQUAL) + ERR_AT(t.line, t.col, "unexpected token `%s`, (expected `=`)", t.val); + + v.type = sub_parse_type(); + + if (v.type == PARSE_TYPE_DEFTYPE + || v.type == PARSE_TYPE_ARRAY_DEFTYPE + || v.type == PARSE_TYPE_HASH_DEFTYPE + ) { + t = tok_get(); + strcpy(v.deftype_name, t.val); + } + + TRYALLOC(vp, 1); + memcpy(vp, &v, sizeof(*vp)); + HASH_ADD_STR(r.vars, name, vp); + + return true; +} + +struct parse_result_s parse(FILE *f, const char *fname) +{ + size_t i, j; + struct tok_s t; + + struct parse_var_s *vcur, *vtmp; + struct parse_deftype_s *d; + + r.hkey_size = 16; + strcpy(r.hkey_name, "id"); + r.hkey_name_seen = false; + r.hkey_size_seen = false; + r.fun_suf_seen = false; + r.deftypes = NULL; + r.vars = NULL; + curfname = fname; + + tok_reset(f); + + while (sub_parse_op() || sub_parse_rule()); + + t = tok_get(); + if (t.type != TOK_END) { + if (t.type == TOK_UNKNWN) + ERR_AT(t.line, t.col, "unrecognised token `%s`", t.val); + else + ERR_AT(t.line, t.col, "unexpected token `%s`", t.val); + } + + HASH_ITER(hh, r.vars, vcur, vtmp) { + switch (vcur->type) { + case PARSE_TYPE_DEFTYPE: + case PARSE_TYPE_ARRAY_DEFTYPE: + case PARSE_TYPE_HASH_DEFTYPE: + HASH_FIND_STR(r.deftypes, vcur->deftype_name, d); + if (d == NULL) { + ERR_AT(vcur->line, vcur->col, + "rule for variable `%s` references undefined type `%s`", + vcur->name, vcur->deftype_name); + } + default: + continue; + } + } + + if (!r.fun_suf_seen) { + + j = 0; + + for (i = strlen(fname); i > 0 && fname[i] != '/' + && fname[i] != '\\'; i--); + + j = i + (fname[i] == '/' || fname[i] == '\\'); + + for (i = j; fname[i] != '\0' && fname[i] != '.' + && i - j < r.hkey_size; i++) { + if (!isalnum(fname[i])) { + fprintf(stderr, "\e[1m%s:\e[0m ", fname); + ERR("no function suffix specified, and could not generate one"); + } + r.fun_suf[i - j] = fname[i]; + } + + r.fun_suf[i - j] = '\0'; + + if (r.fun_suf[0] == '\0') { + fprintf(stderr, "\e[1m%s:\e[0m ", fname); + ERR("no function suffix specified, and could not generate one"); + } + + fprintf(stderr, "\e[1m%s:\e[0m ", fname); + WARN("no function suffix specified. using `%s`...", r.fun_suf); + } + + return r; +} + +void parse_result_wipe(struct parse_result_s *r) +{ + struct parse_var_s *vcur, *vtmp; + struct parse_deftype_s *dcur, *dtmp; + + assert(r != NULL); + + if (r->vars != NULL) { + HASH_ITER(hh, r->vars, vcur, vtmp) { + HASH_DEL(r->vars, vcur); + free(vcur); + } + } + + if (r->deftypes != NULL) { + HASH_ITER(hh, r->deftypes, dcur, dtmp) { + HASH_DEL(r->deftypes, dcur); + free(dcur); + } + } + +} diff --git a/src/parse.h b/src/parse.h index 9f9b4e3..1bc6710 100644 --- a/src/parse.h +++ b/src/parse.h @@ -1,6 +1,68 @@ #ifndef CONFCONF_PARSE_H #define CONFCONF_PARSE_H +#include "tok.h" +#include <uthash.h> + +#include <stdbool.h> + +#define PARSE_DEFTYPE_MAX_LEN 32 + +enum parse_type_e { + PARSE_TYPE_BOOL = 0, + PARSE_TYPE_STRING = 1, + PARSE_TYPE_INT = 2, + PARSE_TYPE_UINT = 3, + PARSE_TYPE_DEFTYPE = 4, + + PARSE_TYPE_ARRAY_BOOL = 5, + PARSE_TYPE_ARRAY_STRING = 6, + PARSE_TYPE_ARRAY_INT = 7, + PARSE_TYPE_ARRAY_UINT = 8, + PARSE_TYPE_ARRAY_DEFTYPE = 9, + + PARSE_TYPE_HASH_BOOL = 10, + PARSE_TYPE_HASH_STRING = 11, + PARSE_TYPE_HASH_INT = 12, + PARSE_TYPE_HASH_UINT = 13, + PARSE_TYPE_HASH_DEFTYPE = 14, +}; + +struct parse_deftype_s { + char name[TOK_MAX_LEN]; + size_t line; + size_t col; + bool is_union; + unsigned member_list_len; + enum parse_type_e member_type_list[PARSE_DEFTYPE_MAX_LEN]; + char member_name_list[PARSE_DEFTYPE_MAX_LEN][TOK_MAX_LEN]; + UT_hash_handle hh; +}; + +struct parse_var_s { + char name[TOK_MAX_LEN]; + size_t line; + size_t col; + bool is_required; + enum parse_type_e type; + char deftype_name[TOK_MAX_LEN]; + UT_hash_handle hh; +}; + +struct parse_result_s { + unsigned long hkey_size; + bool hkey_size_seen; + char hkey_name[TOK_MAX_LEN]; + bool hkey_name_seen; + char fun_suf[TOK_MAX_LEN]; + bool fun_suf_seen; + struct parse_deftype_s *deftypes; + struct parse_var_s *vars; +}; + +struct parse_result_s parse(FILE *f, const char *fname); + +void parse_result_wipe(struct parse_result_s *r); #endif @@ -3,19 +3,19 @@ #include <stdbool.h> #include <ctype.h> -#define TOK_MAX_LEN 128 - static char val[TOK_MAX_LEN]; -static size_t vlen = 0; -static struct tok_s curtok = { .line = 1, .col = 1, .val = val }; +static size_t vlen; +static struct tok_s curtok = { .val = val }; +static FILE *curf; +static bool unget; -static bool sub_eat_spaces(FILE *f) +static bool sub_eat_spaces(void) { int c; bool seen = false; while (true) { - c = getc(f); + c = getc(curf); if (c == '\n') { curtok.col = 1; @@ -24,7 +24,7 @@ static bool sub_eat_spaces(FILE *f) } if (!isspace(c)) { - ungetc(c, f); + ungetc(c, curf); break; } @@ -36,19 +36,19 @@ static bool sub_eat_spaces(FILE *f) return seen; } -static bool sub_eat_comment(FILE *f) +static bool sub_eat_comment(void) { int c; - c = getc(f); + c = getc(curf); if (c != '#') { - ungetc(c, f); + ungetc(c, curf); return false; } while (true) { - c = getc(f); + c = getc(curf); if (c == '\n') { curtok.col = 1; @@ -57,13 +57,13 @@ static bool sub_eat_comment(FILE *f) } if (c == EOF) { - ungetc(c, f); + ungetc(c, curf); return true; } } } -static void sub_match_op(FILE *f) +static void sub_match_op(void) { struct { bool possible; @@ -71,6 +71,7 @@ static void sub_match_op(FILE *f) char name[(32 < TOK_MAX_LEN ? 32 : TOK_MAX_LEN)]; } ops[] = { { true, TOK_OP_STRUCT, ".struct" }, + { true, TOK_OP_UNION, ".union" }, { true, TOK_OP_HKEY_SIZE, ".hash-key-size" }, { true, TOK_OP_HKEY_NAME, ".hash-key-name" }, { true, TOK_OP_FUN_SUF, ".function-suffix" }, @@ -84,10 +85,10 @@ static void sub_match_op(FILE *f) for (i = 1;; i++) { again = false; - c = getc(f); + c = getc(curf); if (c == EOF || isspace(c)) { - ungetc(c, f); + ungetc(c, curf); curtok.type = TOK_UNKNWN; val[vlen] = '\0'; return; @@ -96,7 +97,7 @@ static void sub_match_op(FILE *f) val[vlen] = c; vlen++; - for (j = 0; j < 4; j++) { + for (j = 0; j < 5; j++) { if (!ops[j].possible) continue; @@ -119,9 +120,9 @@ static void sub_match_op(FILE *f) do { val[vlen] = c; vlen++; - c = getc(f); + c = getc(curf); } while (c != EOF && !isspace(c) && vlen < TOK_MAX_LEN - 1); - ungetc(c, f); + ungetc(c, curf); val[vlen] = '\0'; curtok.type = TOK_UNKNWN; return; @@ -129,17 +130,17 @@ static void sub_match_op(FILE *f) } } -static void sub_match_uint(FILE *f) +static void sub_match_uint(void) { int c; curtok.type = TOK_UINT; while (true) { - c = getc(f); + c = getc(curf); if (!isdigit(c)) { - ungetc(c, f); + ungetc(c, curf); val[vlen] = '\0'; return; } @@ -149,17 +150,17 @@ static void sub_match_uint(FILE *f) } } -static void sub_match_id(FILE *f) +static void sub_match_id(void) { int c; curtok.type = TOK_ID; while (true) { - c = getc(f); + c = getc(curf); if (!isalnum(c) && c != '_') { - ungetc(c, f); + ungetc(c, curf); val[vlen] = '\0'; return; } @@ -169,72 +170,104 @@ static void sub_match_id(FILE *f) } } -struct tok_s tok_get(FILE *f) +void tok_reset(FILE *f) +{ + curf = f; + curtok.line = 1; + curtok.col = 1; + vlen = 0; + unget = false; +} + +struct tok_s tok_get(void) { int c; + if (unget) { + unget = false; + return curtok; + } + curtok.col += vlen; vlen = 0; -eat: - if (sub_eat_spaces(f)) - goto eat; - if (sub_eat_comment(f)) - goto eat; + while (sub_eat_spaces() || sub_eat_comment()); - c = getc(f); + c = getc(curf); switch (c) { case '{': curtok.type = TOK_LBRACE; vlen = 1; + val[0] = '{'; + val[1] = '\0'; return curtok; case '}': curtok.type = TOK_RBRACE; vlen = 1; + val[0] = '}'; + val[1] = '\0'; return curtok; case '=': curtok.type = TOK_EQUAL; vlen = 1; + val[0] = '='; + val[1] = '\0'; return curtok; case ',': curtok.type = TOK_COMMA; vlen = 1; + val[0] = ','; + val[1] = '\0'; return curtok; case '!': curtok.type = TOK_BANG; vlen = 1; + val[0] = '!'; + val[1] = '\0'; return curtok; case '?': curtok.type = TOK_QMARK; vlen = 1; + val[0] = '?'; + val[1] = '\0'; + return curtok; + + case '*': + curtok.type = TOK_ASTERISK; + vlen = 1; + val[0] = '*'; + val[1] = '\0'; return curtok; case EOF: + ungetc(c, curf); curtok.type = TOK_END; + vlen = 0; + val[0] = '\0'; return curtok; case '.': - sub_match_op(f); + sub_match_op(); return curtok; default: if (isdigit(c)) { val[0] = c; vlen = 1; - sub_match_uint(f); + sub_match_uint(); return curtok; } if (isalpha(c) || c == '_') { val[0] = c; vlen = 1; - sub_match_id(f); + sub_match_id(); return curtok; } @@ -243,10 +276,10 @@ eat: do { val[vlen] = c; vlen++; - c = getc(f); + c = getc(curf); } while (c != EOF && !isspace(c) && vlen < TOK_MAX_LEN - 1); - ungetc(c, f); + ungetc(c, curf); val[vlen] = '\0'; return curtok; @@ -254,3 +287,9 @@ eat: return curtok; } + +void tok_unget(struct tok_s t) +{ + unget = true; + curtok = t; +} @@ -3,21 +3,25 @@ #include <stdio.h> +#define TOK_MAX_LEN 128 + enum tok_type_e { - TOK_LBRACE = 0, - TOK_RBRACE = 1, - TOK_EQUAL = 2, - TOK_COMMA = 3, - TOK_BANG = 4, - TOK_QMARK = 5, - TOK_OP_STRUCT = 6, - TOK_OP_HKEY_SIZE = 7, - TOK_OP_HKEY_NAME = 8, - TOK_OP_FUN_SUF = 9, - TOK_UINT = 10, - TOK_ID = 11, - TOK_UNKNWN = 12, - TOK_END = 13, + TOK_LBRACE, + TOK_RBRACE, + TOK_EQUAL, + TOK_COMMA, + TOK_BANG, + TOK_QMARK, + TOK_ASTERISK, + TOK_OP_STRUCT, + TOK_OP_UNION, + TOK_OP_HKEY_SIZE, + TOK_OP_HKEY_NAME, + TOK_OP_FUN_SUF, + TOK_UINT, + TOK_ID, + TOK_UNKNWN, + TOK_END, }; struct tok_s { @@ -27,6 +31,9 @@ struct tok_s { char *val; }; -struct tok_s tok_get(FILE *f); +void tok_reset(FILE *f); + +struct tok_s tok_get(void); +void tok_unget(struct tok_s t); #endif |