aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorkatherine <shmibs@airen-no-jikken.icu>2019-05-08 23:33:41 -0700
committerkatherine <shmibs@airen-no-jikken.icu>2019-05-08 23:33:41 -0700
commit95a9726023abd85b49ed39837911e1b231f4389b (patch)
tree3535a3ca1feac910ecf736fdc07ddcb5559f4321
parentca0d95e26663e05d702c6f3a5627812dbf0c9f90 (diff)
downloadconfconf-95a9726023abd85b49ed39837911e1b231f4389b.tar.gz
implement internal tokeniser
-rw-r--r--src/err.h27
-rw-r--r--src/gen.h4
-rw-r--r--src/main.c42
-rw-r--r--src/opt.c2
-rw-r--r--src/parse.h6
-rw-r--r--src/tok.c256
-rw-r--r--src/tok.h32
7 files changed, 363 insertions, 6 deletions
diff --git a/src/err.h b/src/err.h
new file mode 100644
index 0000000..3d3262f
--- /dev/null
+++ b/src/err.h
@@ -0,0 +1,27 @@
+#ifndef CONFCONF_ERR_H
+#define CONFCONF_ERR_H
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#define ERR(...) \
+ do { \
+ fprintf(stderr, "error: " __VA_ARGS__); \
+ fprintf(stderr, "\n"); \
+ exit(EXIT_FAILURE); \
+ } while (0)
+
+#define TRY(cond, ...) \
+ do { \
+ if (!(cond)) { \
+ ERR(__VA_ARGS__); \
+ } \
+ } while (0)
+
+#define TRYALLOC(dest, count) \
+ do { \
+ (dest) = malloc((count) * sizeof(*(dest))); \
+ TRY((dest) != NULL, "could not allocate memory"); \
+ } while (0)
+
+#endif
diff --git a/src/gen.h b/src/gen.h
index e69de29..f3cb932 100644
--- a/src/gen.h
+++ b/src/gen.h
@@ -0,0 +1,4 @@
+#ifndef CONFCONF_GEN_H
+#define CONFCONF_GEN_H
+
+#endif
diff --git a/src/main.c b/src/main.c
index cd5eb3a..ed9c413 100644
--- a/src/main.c
+++ b/src/main.c
@@ -1,16 +1,48 @@
#include "opt.h"
+#include "err.h"
+#include "tok.h"
#include <stdio.h>
+#include <errno.h>
+
+
+
int main(int argc, char **argv)
{
+ FILE *fi = stdin;
+ struct tok_s t;
+
opt_parse(argc, argv);
- if (opt_infile_str())
- puts(opt_infile_str());
+ if (opt_infile_str() != NULL) {
+ fi = fopen(opt_infile_str(), "r");
+ TRY(fi != NULL, "could not read file `%s`", opt_infile_str());
+ }
+
+ while (1) {
+ t = tok_get(fi);
+
+ if (t.type == TOK_UNKNWN || t.type == TOK_END)
+ break;
+
+ printf("%s:%zu:%zu: ", (fi == stdin ? "stdin" : opt_infile_str()),
+ t.line, t.col);
+
+ if (t.type > TOK_QMARK) {
+ printf("%u, `%s`\n", t.type, t.val);
+ } else {
+ printf("%u\n", t.type);
+ }
+ };
+
+ if (t.type == TOK_UNKNWN) {
+ printf("%s:%zu:%zu: error: unrecognised token `%s`\n",
+ (fi == stdin ? "stdin" : opt_infile_str()),
+ t.line, t.col, t.val);
+ }
+
+ fclose(fi);
- if (opt_outfile_str())
- puts(opt_outfile_str());
-
return 0;
}
diff --git a/src/opt.c b/src/opt.c
index f4a1969..0f70293 100644
--- a/src/opt.c
+++ b/src/opt.c
@@ -34,7 +34,7 @@ void opt_parse(int argc, char **argv)
/* help */
if (options[0].was_seen) {
simple_opt_print_usage(stdout, 70, argv[0],
- "[-i input] [-o output]",
+ "[-i input.confconf] [-o output.h]",
"confconf is a config file parser generator for C",
options);
exit(EXIT_SUCCESS);
diff --git a/src/parse.h b/src/parse.h
index e69de29..9f9b4e3 100644
--- a/src/parse.h
+++ b/src/parse.h
@@ -0,0 +1,6 @@
+#ifndef CONFCONF_PARSE_H
+#define CONFCONF_PARSE_H
+
+
+
+#endif
diff --git a/src/tok.c b/src/tok.c
new file mode 100644
index 0000000..12553a9
--- /dev/null
+++ b/src/tok.c
@@ -0,0 +1,256 @@
+#include "tok.h"
+
+#include <stdbool.h>
+#include <ctype.h>
+
+#define TOK_MAX_LEN 128
+
+static char val[TOK_MAX_LEN];
+static size_t vlen = 0;
+static struct tok_s curtok = { .line = 1, .col = 1, .val = val };
+
+static bool sub_eat_spaces(FILE *f)
+{
+ int c;
+ bool seen = false;
+
+ while (true) {
+ c = getc(f);
+
+ if (c == '\n') {
+ curtok.col = 1;
+ curtok.line++;
+ continue;
+ }
+
+ if (!isspace(c)) {
+ ungetc(c, f);
+ break;
+ }
+
+ curtok.col++;
+
+ seen = true;
+ }
+
+ return seen;
+}
+
+static bool sub_eat_comment(FILE *f)
+{
+ int c;
+
+ c = getc(f);
+
+ if (c != '#') {
+ ungetc(c, f);
+ return false;
+ }
+
+ while (true) {
+ c = getc(f);
+
+ if (c == '\n') {
+ curtok.col = 1;
+ curtok.line++;
+ return true;
+ }
+
+ if (c == EOF) {
+ ungetc(c, f);
+ return true;
+ }
+ }
+}
+
+static void sub_match_op(FILE *f)
+{
+ struct {
+ bool possible;
+ enum tok_type_e type;
+ char name[(32 < TOK_MAX_LEN ? 32 : TOK_MAX_LEN)];
+ } ops[] = {
+ { true, TOK_OP_STRUCT, ".struct" },
+ { true, TOK_OP_HKEY_SIZE, ".hash-key-size" },
+ { true, TOK_OP_HKEY_NAME, ".hash-key-name" },
+ { true, TOK_OP_FUN_SUF, ".function-suffix" },
+ };
+ unsigned i, j;
+ bool again;
+ int c;
+
+ val[0] = '.';
+ vlen = 1;
+
+ for (i = 1;; i++) {
+ again = false;
+ c = getc(f);
+
+ if (c == EOF || isspace(c)) {
+ ungetc(c, f);
+ curtok.type = TOK_UNKNWN;
+ val[vlen] = '\0';
+ return;
+ }
+
+ val[vlen] = c;
+ vlen++;
+
+ for (j = 0; j < 4; j++) {
+ if (!ops[j].possible)
+ continue;
+
+ if (c != ops[j].name[i]) {
+ ops[j].possible = false;
+ continue;
+ }
+
+ if (ops[j].name[i+1] == '\0') {
+ curtok.type = ops[j].type;
+ val[vlen] = '\0';
+ return;
+ }
+
+ again = true;
+ }
+
+ if (!again) {
+ vlen--;
+ do {
+ val[vlen] = c;
+ vlen++;
+ c = getc(f);
+ } while (c != EOF && !isspace(c) && vlen < TOK_MAX_LEN - 1);
+ ungetc(c, f);
+ val[vlen] = '\0';
+ curtok.type = TOK_UNKNWN;
+ return;
+ }
+ }
+}
+
+static void sub_match_uint(FILE *f)
+{
+ int c;
+
+ curtok.type = TOK_UINT;
+
+ while (true) {
+ c = getc(f);
+
+ if (!isdigit(c)) {
+ ungetc(c, f);
+ val[vlen] = '\0';
+ return;
+ }
+
+ val[vlen] = c;
+ vlen++;
+ }
+}
+
+static void sub_match_id(FILE *f)
+{
+ int c;
+
+ curtok.type = TOK_ID;
+
+ while (true) {
+ c = getc(f);
+
+ if (!isalnum(c) && c != '_') {
+ ungetc(c, f);
+ val[vlen] = '\0';
+ return;
+ }
+
+ val[vlen] = c;
+ vlen++;
+ }
+}
+
+struct tok_s tok_get(FILE *f)
+{
+ int c;
+
+ curtok.col += vlen;
+ vlen = 0;
+
+eat:
+ if (sub_eat_spaces(f))
+ goto eat;
+ if (sub_eat_comment(f))
+ goto eat;
+
+ c = getc(f);
+
+ switch (c) {
+ case '{':
+ curtok.type = TOK_LBRACE;
+ vlen = 1;
+ return curtok;
+
+ case '}':
+ curtok.type = TOK_RBRACE;
+ vlen = 1;
+ return curtok;
+
+ case '=':
+ curtok.type = TOK_EQUAL;
+ vlen = 1;
+ return curtok;
+
+ case ',':
+ curtok.type = TOK_COMMA;
+ vlen = 1;
+ return curtok;
+
+ case '!':
+ curtok.type = TOK_BANG;
+ vlen = 1;
+ return curtok;
+
+ case '?':
+ curtok.type = TOK_QMARK;
+ vlen = 1;
+ return curtok;
+
+ case EOF:
+ curtok.type = TOK_END;
+ return curtok;
+
+ case '.':
+ sub_match_op(f);
+ return curtok;
+
+ default:
+ if (isdigit(c)) {
+ val[0] = c;
+ vlen = 1;
+ sub_match_uint(f);
+ return curtok;
+ }
+
+ if (isalpha(c) || c == '_') {
+ val[0] = c;
+ vlen = 1;
+ sub_match_id(f);
+ return curtok;
+ }
+
+ curtok.type = TOK_UNKNWN;
+
+ do {
+ val[vlen] = c;
+ vlen++;
+ c = getc(f);
+ } while (c != EOF && !isspace(c) && vlen < TOK_MAX_LEN - 1);
+
+ ungetc(c, f);
+ val[vlen] = '\0';
+
+ return curtok;
+ }
+
+ return curtok;
+}
diff --git a/src/tok.h b/src/tok.h
new file mode 100644
index 0000000..2e02c40
--- /dev/null
+++ b/src/tok.h
@@ -0,0 +1,32 @@
+#ifndef CONFCONF_TOK_H
+#define CONFCONF_TOK_H
+
+#include <stdio.h>
+
+enum tok_type_e {
+ TOK_LBRACE = 0,
+ TOK_RBRACE = 1,
+ TOK_EQUAL = 2,
+ TOK_COMMA = 3,
+ TOK_BANG = 4,
+ TOK_QMARK = 5,
+ TOK_OP_STRUCT = 6,
+ TOK_OP_HKEY_SIZE = 7,
+ TOK_OP_HKEY_NAME = 8,
+ TOK_OP_FUN_SUF = 9,
+ TOK_UINT = 10,
+ TOK_ID = 11,
+ TOK_UNKNWN = 12,
+ TOK_END = 13,
+};
+
+struct tok_s {
+ enum tok_type_e type;
+ size_t line;
+ size_t col;
+ char *val;
+};
+
+struct tok_s tok_get(FILE *f);
+
+#endif