From 95a9726023abd85b49ed39837911e1b231f4389b Mon Sep 17 00:00:00 2001 From: katherine Date: Wed, 8 May 2019 23:33:41 -0700 Subject: implement internal tokeniser --- src/err.h | 27 +++++++ src/gen.h | 4 + src/main.c | 42 ++++++++-- src/opt.c | 2 +- src/parse.h | 6 ++ src/tok.c | 256 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/tok.h | 32 ++++++++ 7 files changed, 363 insertions(+), 6 deletions(-) create mode 100644 src/err.h create mode 100644 src/tok.c create mode 100644 src/tok.h diff --git a/src/err.h b/src/err.h new file mode 100644 index 0000000..3d3262f --- /dev/null +++ b/src/err.h @@ -0,0 +1,27 @@ +#ifndef CONFCONF_ERR_H +#define CONFCONF_ERR_H + +#include +#include + +#define ERR(...) \ + do { \ + fprintf(stderr, "error: " __VA_ARGS__); \ + fprintf(stderr, "\n"); \ + exit(EXIT_FAILURE); \ + } while (0) + +#define TRY(cond, ...) \ + do { \ + if (!(cond)) { \ + ERR(__VA_ARGS__); \ + } \ + } while (0) + +#define TRYALLOC(dest, count) \ + do { \ + (dest) = malloc((count) * sizeof(*(dest))); \ + TRY((dest) != NULL, "could not allocate memory"); \ + } while (0) + +#endif diff --git a/src/gen.h b/src/gen.h index e69de29..f3cb932 100644 --- a/src/gen.h +++ b/src/gen.h @@ -0,0 +1,4 @@ +#ifndef CONFCONF_GEN_H +#define CONFCONF_GEN_H + +#endif diff --git a/src/main.c b/src/main.c index cd5eb3a..ed9c413 100644 --- a/src/main.c +++ b/src/main.c @@ -1,16 +1,48 @@ #include "opt.h" +#include "err.h" +#include "tok.h" #include +#include + + + int main(int argc, char **argv) { + FILE *fi = stdin; + struct tok_s t; + opt_parse(argc, argv); - if (opt_infile_str()) - puts(opt_infile_str()); + if (opt_infile_str() != NULL) { + fi = fopen(opt_infile_str(), "r"); + TRY(fi != NULL, "could not read file `%s`", opt_infile_str()); + } + + while (1) { + t = tok_get(fi); + + if (t.type == TOK_UNKNWN || t.type == TOK_END) + break; + + printf("%s:%zu:%zu: ", (fi == stdin ? "stdin" : opt_infile_str()), + t.line, t.col); + + if (t.type > TOK_QMARK) { + printf("%u, `%s`\n", t.type, t.val); + } else { + printf("%u\n", t.type); + } + }; + + if (t.type == TOK_UNKNWN) { + printf("%s:%zu:%zu: error: unrecognised token `%s`\n", + (fi == stdin ? "stdin" : opt_infile_str()), + t.line, t.col, t.val); + } + + fclose(fi); - if (opt_outfile_str()) - puts(opt_outfile_str()); - return 0; } diff --git a/src/opt.c b/src/opt.c index f4a1969..0f70293 100644 --- a/src/opt.c +++ b/src/opt.c @@ -34,7 +34,7 @@ void opt_parse(int argc, char **argv) /* help */ if (options[0].was_seen) { simple_opt_print_usage(stdout, 70, argv[0], - "[-i input] [-o output]", + "[-i input.confconf] [-o output.h]", "confconf is a config file parser generator for C", options); exit(EXIT_SUCCESS); diff --git a/src/parse.h b/src/parse.h index e69de29..9f9b4e3 100644 --- a/src/parse.h +++ b/src/parse.h @@ -0,0 +1,6 @@ +#ifndef CONFCONF_PARSE_H +#define CONFCONF_PARSE_H + + + +#endif diff --git a/src/tok.c b/src/tok.c new file mode 100644 index 0000000..12553a9 --- /dev/null +++ b/src/tok.c @@ -0,0 +1,256 @@ +#include "tok.h" + +#include +#include + +#define TOK_MAX_LEN 128 + +static char val[TOK_MAX_LEN]; +static size_t vlen = 0; +static struct tok_s curtok = { .line = 1, .col = 1, .val = val }; + +static bool sub_eat_spaces(FILE *f) +{ + int c; + bool seen = false; + + while (true) { + c = getc(f); + + if (c == '\n') { + curtok.col = 1; + curtok.line++; + continue; + } + + if (!isspace(c)) { + ungetc(c, f); + break; + } + + curtok.col++; + + seen = true; + } + + return seen; +} + +static bool sub_eat_comment(FILE *f) +{ + int c; + + c = getc(f); + + if (c != '#') { + ungetc(c, f); + return false; + } + + while (true) { + c = getc(f); + + if (c == '\n') { + curtok.col = 1; + curtok.line++; + return true; + } + + if (c == EOF) { + ungetc(c, f); + return true; + } + } +} + +static void sub_match_op(FILE *f) +{ + struct { + bool possible; + enum tok_type_e type; + char name[(32 < TOK_MAX_LEN ? 32 : TOK_MAX_LEN)]; + } ops[] = { + { true, TOK_OP_STRUCT, ".struct" }, + { true, TOK_OP_HKEY_SIZE, ".hash-key-size" }, + { true, TOK_OP_HKEY_NAME, ".hash-key-name" }, + { true, TOK_OP_FUN_SUF, ".function-suffix" }, + }; + unsigned i, j; + bool again; + int c; + + val[0] = '.'; + vlen = 1; + + for (i = 1;; i++) { + again = false; + c = getc(f); + + if (c == EOF || isspace(c)) { + ungetc(c, f); + curtok.type = TOK_UNKNWN; + val[vlen] = '\0'; + return; + } + + val[vlen] = c; + vlen++; + + for (j = 0; j < 4; j++) { + if (!ops[j].possible) + continue; + + if (c != ops[j].name[i]) { + ops[j].possible = false; + continue; + } + + if (ops[j].name[i+1] == '\0') { + curtok.type = ops[j].type; + val[vlen] = '\0'; + return; + } + + again = true; + } + + if (!again) { + vlen--; + do { + val[vlen] = c; + vlen++; + c = getc(f); + } while (c != EOF && !isspace(c) && vlen < TOK_MAX_LEN - 1); + ungetc(c, f); + val[vlen] = '\0'; + curtok.type = TOK_UNKNWN; + return; + } + } +} + +static void sub_match_uint(FILE *f) +{ + int c; + + curtok.type = TOK_UINT; + + while (true) { + c = getc(f); + + if (!isdigit(c)) { + ungetc(c, f); + val[vlen] = '\0'; + return; + } + + val[vlen] = c; + vlen++; + } +} + +static void sub_match_id(FILE *f) +{ + int c; + + curtok.type = TOK_ID; + + while (true) { + c = getc(f); + + if (!isalnum(c) && c != '_') { + ungetc(c, f); + val[vlen] = '\0'; + return; + } + + val[vlen] = c; + vlen++; + } +} + +struct tok_s tok_get(FILE *f) +{ + int c; + + curtok.col += vlen; + vlen = 0; + +eat: + if (sub_eat_spaces(f)) + goto eat; + if (sub_eat_comment(f)) + goto eat; + + c = getc(f); + + switch (c) { + case '{': + curtok.type = TOK_LBRACE; + vlen = 1; + return curtok; + + case '}': + curtok.type = TOK_RBRACE; + vlen = 1; + return curtok; + + case '=': + curtok.type = TOK_EQUAL; + vlen = 1; + return curtok; + + case ',': + curtok.type = TOK_COMMA; + vlen = 1; + return curtok; + + case '!': + curtok.type = TOK_BANG; + vlen = 1; + return curtok; + + case '?': + curtok.type = TOK_QMARK; + vlen = 1; + return curtok; + + case EOF: + curtok.type = TOK_END; + return curtok; + + case '.': + sub_match_op(f); + return curtok; + + default: + if (isdigit(c)) { + val[0] = c; + vlen = 1; + sub_match_uint(f); + return curtok; + } + + if (isalpha(c) || c == '_') { + val[0] = c; + vlen = 1; + sub_match_id(f); + return curtok; + } + + curtok.type = TOK_UNKNWN; + + do { + val[vlen] = c; + vlen++; + c = getc(f); + } while (c != EOF && !isspace(c) && vlen < TOK_MAX_LEN - 1); + + ungetc(c, f); + val[vlen] = '\0'; + + return curtok; + } + + return curtok; +} diff --git a/src/tok.h b/src/tok.h new file mode 100644 index 0000000..2e02c40 --- /dev/null +++ b/src/tok.h @@ -0,0 +1,32 @@ +#ifndef CONFCONF_TOK_H +#define CONFCONF_TOK_H + +#include + +enum tok_type_e { + TOK_LBRACE = 0, + TOK_RBRACE = 1, + TOK_EQUAL = 2, + TOK_COMMA = 3, + TOK_BANG = 4, + TOK_QMARK = 5, + TOK_OP_STRUCT = 6, + TOK_OP_HKEY_SIZE = 7, + TOK_OP_HKEY_NAME = 8, + TOK_OP_FUN_SUF = 9, + TOK_UINT = 10, + TOK_ID = 11, + TOK_UNKNWN = 12, + TOK_END = 13, +}; + +struct tok_s { + enum tok_type_e type; + size_t line; + size_t col; + char *val; +}; + +struct tok_s tok_get(FILE *f); + +#endif -- cgit v1.2.3