From 31632d4edbd903826cbf8f6bb93969f0d1a983fb Mon Sep 17 00:00:00 2001 From: katherine Date: Wed, 18 Dec 2019 21:54:46 -0700 Subject: implement calendar parsing --- src/calendar.c | 877 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 877 insertions(+) (limited to 'src/calendar.c') diff --git a/src/calendar.c b/src/calendar.c index 46fd68c..1240547 100644 --- a/src/calendar.c +++ b/src/calendar.c @@ -1 +1,878 @@ #include "calendar.h" + +#include "err.h" +#include "opt.h" + +#include +#include +#include +#include + +#define ERRP(...) \ + do { \ + fprintf(stderr, "err: %zu:%zu: ", s->last_line, s->last_col); \ + fprintf(stderr, __VA_ARGS__); \ + fprintf(stderr, "\n"); \ + } while (0) + +struct state_s { + bool err_flag; + size_t line, col; + size_t last_line, last_col; + struct entry_interval_s cur_warn; + bool cur_urgent; + bool cur_local; + FILE *f; + struct calendar_s *cal; +}; + +enum cmd_type_e { + CMD_TYPE_WARN, + CMD_TYPE_URGENT, + CMD_TYPE_LOCAL, +}; + +struct cmd_s { + enum cmd_type_e type; + struct entry_interval_s iv; + bool b; +}; + +struct pos_s { + size_t line, col; +}; + +static struct pos_s sub_get_pos(struct state_s *s) +{ + struct pos_s p = { + .line = s->line, + .col = s->col + }; + + return p; +} + +static void sub_set_pos(struct state_s *s, struct pos_s p) +{ + s->last_line = p.line; + s->last_col = p.col; +} + +static bool sub_eat_spaces(struct state_s *s) +{ + int c; + bool seen = false; + + while (true) { + c = getc(s->f); + + if (c == '\n') { + s->col = 1; + s->line++; + continue; + } + + if (!isspace(c)) { + ungetc(c, s->f); + break; + } + + s->col++; + + seen = true; + } + + return seen; +} + +static struct entry_interval_s sub_get_interval(struct state_s *s) +{ + struct entry_interval_s iv = { 0 }; + int r, count; + long l; + bool ready = false; + + sub_set_pos(s, sub_get_pos(s)); + + while (true) { + errno = 0; + r = fscanf(s->f, "%ld%n", &l, &count); + + /* read err */ + if (errno) { + ERRP("invalid time interval"); + s->err_flag = true; + return iv; + } + + /* no int found */ + if (r == 0) { + /* err if digit */ + r = getc(s->f); + if (isdigit(r)) { + ERRP("expected time interval"); + s->err_flag = true; + return iv; + } + ungetc(r, s->f); + + /* err if empty so far */ + if (ready == false) { + ERRP("expected time interval"); + s->err_flag = true; + } + + /* done */ + return iv; + } + + /* range err interval */ + if (l <= 0) { + ERRP("invalid time interval"); + s->err_flag = true; + return iv; + } + + if (l > INT_MAX) { + ERRP("time interval too large"); + s->err_flag = true; + return iv; + } + + s->col += count; + + switch (getc(s->f)) { + case 'y': + if (iv.year != 0) { + ERRP("duplicate year entry in time interval"); + s->err_flag = true; + return iv; + } + iv.year = l; + break; + + case 'm': + if (iv.month != 0) { + ERRP("duplicate month entry in time interval"); + s->err_flag = true; + return iv; + } + iv.month = l; + break; + + case 'd': + if (iv.day != 0) { + ERRP("duplicate day entry in time interval"); + s->err_flag = true; + return iv; + } + iv.day = l; + break; + + case 'H': + if (iv.hour != 0) { + ERRP("duplicate hour entry in time interval"); + s->err_flag = true; + return iv; + } + iv.hour = l; + break; + + case 'M': + if (iv.minute != 0) { + ERRP("duplicate minute entry in time interval"); + s->err_flag = true; + return iv; + } + iv.minute = l; + break; + + case 'S': + if (iv.second != 0) { + ERRP("duplicate second entry in time interval"); + s->err_flag = true; + return iv; + } + iv.second = l; + break; + + default: + ERRP("invalid time interval"); + s->err_flag = true; + return iv; + } + + s->col++; + ready = true; + + sub_eat_spaces(s); + } +} + +static time_t sub_get_date(struct state_s *s) +{ + int r, count; + long l; + int i; + char c; + time_t rt = 0; + struct tm tmp; + struct tm t = { + .tm_year = 1, + .tm_mday = 1, + .tm_isdst = -1, + }; + + sub_set_pos(s, sub_get_pos(s)); + + for (i = 0; i < 6; i++) { + c = getc(s->f); + if (c == '_') { + count = 1; + switch (i) { + /* year */ + case 0: + case 1: + case 2: + l = 1; + break; + default: + l = 0; + break; + } + goto skip_num; + } + ungetc(c, s->f); + + errno = 0; + r = fscanf(s->f, "%ld%n", &l, &count); + + /* read err */ + if (errno) { + ERRP("invalid date"); + s->err_flag = true; + return rt; + } + + /* no int found */ + if (r == 0) { + ERRP("expected date"); + s->err_flag = true; + return rt; + } + + /* range err interval */ + if (l < 0) { + ERRP("invalid date"); + s->err_flag = true; + return rt; + } + + if (l > INT_MAX) { + ERRP("invalid date"); + s->err_flag = true; + return rt; + } + +skip_num: + + switch (i) { + case 0: + t.tm_year = l - 1900; + break; + case 1: + t.tm_mon = l - 1; + break; + case 2: + t.tm_mday = l; + break; + case 3: + t.tm_hour = l; + break; + case 4: + t.tm_min = l; + break; + case 5: + t.tm_sec = l; + break; + } + + s->col += count; + + if (i < 2) { + c = getc(s->f); + + if (c != '-') { + ungetc(c, s->f); + goto validate; + } + s->col++; + + } else if (i == 2) { + sub_eat_spaces(s); + c = getc(s->f); + ungetc(c, s->f); + if (!isdigit(c) && c != '_') + goto validate; + } else if (i < 5) { + c = getc(s->f); + + if (c != ':') { + ungetc(c, s->f); + goto validate; + } + } + } + +validate: + tmp = t; + + /* NOTE: timegm is non-POSIX, but commonplace */ + if (s->cur_local) + rt = mktime(&t); + else + rt = timegm(&t); + + if (rt == -1) { + ERRP("invalid date"); + s->err_flag = true; + } + + if ( + tmp.tm_year != t.tm_year || + tmp.tm_mon != t.tm_mon || + tmp.tm_mday != t.tm_mday || + tmp.tm_hour != t.tm_hour || + tmp.tm_min != t.tm_min || + tmp.tm_sec != t.tm_sec + ) { + ERRP("invalid date!"); + s->err_flag = true; + } + + return rt; +} + +static inline struct cmd_s sub_get_cmd_warn(struct state_s *s) +{ + struct cmd_s cmd = { + .type = CMD_TYPE_WARN + }; + + cmd.iv = sub_get_interval(s); + + return cmd; +} + +static bool sub_get_bool(struct state_s *s) +{ + bool r = false; + + sub_set_pos(s, sub_get_pos(s)); + + switch (tolower(getc(s->f))) { + case '1': + r = true; + s->col += 1; + break; + + case '0': + s->col += 1; + break; + + case 't': + if ( + tolower(getc(s->f)) == 'r' && + tolower(getc(s->f)) == 'u' && + tolower(getc(s->f)) == 'e' + ) { + r = true; + s->col += 4; + } else { + s->err_flag = true; + } + break; + + case 'y': + if ( + tolower(getc(s->f)) == 'e' && + tolower(getc(s->f)) == 's' + ) { + r = true; + s->col += 3; + } else { + s->err_flag = true; + } + break; + + case 'n': + if ( + tolower(getc(s->f)) == 'o' + ) { + s->col += 2; + } else { + s->err_flag = true; + } + break; + + case 'f': + if ( + tolower(getc(s->f)) == 'a' && + tolower(getc(s->f)) == 'l' && + tolower(getc(s->f)) == 's' && + tolower(getc(s->f)) == 'e' + ) { + s->col += 5; + } else { + s->err_flag = true; + } + break; + + case 'o': + switch (tolower(getc(s->f))) { + case 'n': + r = true; + s->col += 2; + break; + case 'f': + if (tolower(getc(s->f)) == 'f') { + s->col += 3; + } else { + s->err_flag = true; + } + break; + default: + s->err_flag = true; + break; + } + break; + + default: + s->err_flag = true; + break; + } + + if (s->err_flag) + ERRP("invalid boolean"); + + return r; +} + +static inline struct cmd_s sub_get_cmd_urgent(struct state_s *s) +{ + struct cmd_s cmd = { + .type = CMD_TYPE_URGENT + }; + + cmd.b = sub_get_bool(s); + + return cmd; +} + +static inline struct cmd_s sub_get_cmd_local(struct state_s *s) +{ + struct cmd_s cmd = { + .type = CMD_TYPE_LOCAL + }; + + cmd.b = sub_get_bool(s); + + return cmd; +} + +static inline struct cmd_s sub_get_cmd(struct state_s *s) +{ + struct cmd_s cmd = { 0 }; + struct pos_s p = { + .line = s->line, + .col = s->col - 1, + }; + + sub_set_pos(s, p); + + switch (getc(s->f)) { + case 'w': + if ( + getc(s->f) == 'a' && + getc(s->f) == 'r' && + getc(s->f) == 'n' + ) { + s->col += 4; + sub_eat_spaces(s); + return sub_get_cmd_warn(s); + } + break; + + case 'u': + if ( + getc(s->f) == 'r' && + getc(s->f) == 'g' && + getc(s->f) == 'e' && + getc(s->f) == 'n' && + getc(s->f) == 't' + ) { + s->col += 6; + sub_eat_spaces(s); + return sub_get_cmd_urgent(s); + } + break; + + case 'l': + if ( + getc(s->f) == 'o' && + getc(s->f) == 'c' && + getc(s->f) == 'a' && + getc(s->f) == 'l' + ) { + s->col += 5; + sub_eat_spaces(s); + return sub_get_cmd_urgent(s); + } + break; + } + + ERRP("unknown command"); + s->err_flag = true; + return cmd; +} + +static char* sub_get_msg(struct state_s *s) +{ + const size_t step_size = 64; + size_t count = 0, width = 0; + char *msg = NULL; + char c; + + while ( (c = getc(s->f)) != '\n' && c != EOF) { + s->col++; + if (count == width) { + width += step_size; + msg = realloc(msg, width * sizeof(*msg)); + + if (msg == NULL) { + ERRM("failed to allocate memory"); + s->err_flag = true; + return NULL; + } + } + + if (c == opt_line_delim() || c == opt_col_delim()) { + sub_set_pos(s, sub_get_pos(s)); + ERRP("message may not contain line or column delimiter characters"); + s->err_flag = true; + free(msg); + return NULL; + } + + msg[count] = c; + count++; + } + + s->col = 0; + s->line++; + + return msg; +} + +static inline struct entry_s sub_get_entry_on(struct state_s *s) +{ + struct entry_s e = { + .type = ENTRY_TYPE_ON + }; + time_t rt; + int c; + + rt = sub_get_date(s); + if (s->err_flag) + return e; + + e.start = rt; + + sub_eat_spaces(s); + + c = getc(s->f); + + if (c != ',') { + if (c == 'w' + && getc(s->f) == 'a' + && getc(s->f) == 'r' + && getc(s->f) == 'n' + ) { + s->col += 4; + e.warn = sub_get_interval(s); + if (s->err_flag) + return e; + } else { + ungetc(c, s->f); + return e; + } + } + + s->col++; + + sub_eat_spaces(s); + e.msg = sub_get_msg(s); + return e; +} + +static inline struct entry_s sub_get_entry_every(struct state_s *s) +{ + struct entry_s e = { + .type = ENTRY_TYPE_EVERY, + .urgent = s->cur_urgent, + .local = s->cur_local, + .warn = s->cur_warn, + }; + bool from_seen = false, to_seen = false, warn_seen = false; + char c; + struct pos_s p = { + .line = s->last_line, + .col = s->last_col, + }; + + e.every = sub_get_interval(s); + + if (s->err_flag) + return e; + + while (true) { + sub_eat_spaces(s); + sub_set_pos(s, p); + + switch (c = getc(s->f)) { + case 'f': + if ( + getc(s->f) == 'r' && + getc(s->f) == 'o' && + getc(s->f) == 'm' + ) { + if (from_seen) { + ERRP("invalid calendar entry"); + s->err_flag = true; + return e; + } + from_seen = true; + + s->col += 4; + sub_eat_spaces(s); + e.start = sub_get_date(s); + + if (s->err_flag) + return e; + + break; + } + + case 't': + if ( + getc(s->f) == 'o' + ) { + if (to_seen) { + ERRP("invalid calendar entry"); + s->err_flag = true; + return e; + } + to_seen = true; + + s->col += 2; + sub_eat_spaces(s); + e.end = sub_get_date(s); + e.has_end = true; + + if (s->err_flag) + return e; + + break; + } + + case 'w': + if ( + getc(s->f) == 'a' && + getc(s->f) == 'r' && + getc(s->f) == 'n' + ) { + if (warn_seen) { + ERRP("invalid calendar entry"); + s->err_flag = true; + return e; + } + warn_seen = true; + + s->col += 4; + sub_eat_spaces(s); + e.warn = sub_get_interval(s); + + if (s->err_flag) + return e; + + break; + } + + default: + goto done; + } + } + +done: + if (!from_seen) { + ERRP("invalid calendar entry"); + s->err_flag = true; + } + + ungetc(c, s->f); + + sub_eat_spaces(s); + + c = getc(s->f); + + if (c != ',') { + ungetc(c, s->f); + return e; + } + + s->col++; + + sub_eat_spaces(s); + e.msg = sub_get_msg(s); + return e; +} + +static inline struct entry_s sub_get_entry(struct state_s *s) +{ + struct entry_s e = { 0 }; + + switch (getc(s->f)) { + case 'o': + if ( + getc(s->f) == 'n' + ) { + s->col += 2; + sub_eat_spaces(s); + return sub_get_entry_on(s); + } + break; + + case 'e': + if ( + getc(s->f) == 'v' && + getc(s->f) == 'e' && + getc(s->f) == 'r' && + getc(s->f) == 'y' + ) { + s->col += 5; + sub_eat_spaces(s); + return sub_get_entry_every(s); + } + break; + } + + ERRP("invalid calendar entry"); + s->err_flag = true; + + return e; +} + +static inline void sub_cal_push(struct state_s *s, struct calendar_s *cal, + struct entry_s *e) +{ + const size_t step_size = 64; + + if (cal->count == cal->width) { + cal->width += step_size; + cal->entries = realloc(cal->entries, cal->width * sizeof(*e)); + + if (cal->entries == NULL) { + ERRM("failed to allocate memory"); + s->err_flag = true; + return; + } + } + + cal->entries[cal->count] = *e; + cal->count++; +} + +static void sub_cal_free(struct calendar_s *cal) +{ + size_t i; + + if (cal->entries == NULL) + return; + + for (i = 0; i < cal->count; i++) { + if (cal->entries[i].msg != NULL) + free(cal->entries[i].msg); + } + + free(cal->entries); + cal->entries = NULL; +} + +struct calendar_s calendar_parse(FILE *f) +{ + struct calendar_s cal = { + .count = 0, + .err_flag = false, + }; + struct state_s s = { + .err_flag = false, + .line = 1, + .col = 1, + .cur_warn = { + .day = 1, + }, + .cur_urgent = false, + .cur_local = true, + .f = f, + .cal = &cal, + }; + int c; + struct cmd_s cmd; + struct entry_s e; + + while (true) { + sub_eat_spaces(&s); + sub_set_pos(&s, sub_get_pos(&s)); + + c = getc(f); + if (c == EOF) + break; + + if (c == '.') { + s.col++; + cmd = sub_get_cmd(&s); + + if (s.err_flag) { + sub_cal_free(&cal); + cal.err_flag = true; + return cal; + } + + switch (cmd.type) { + case CMD_TYPE_WARN: + s.cur_warn = cmd.iv; + break; + case CMD_TYPE_URGENT: + s.cur_urgent = cmd.b; + break; + case CMD_TYPE_LOCAL: + s.cur_local = cmd.b; + break; + } + + continue; + } + + ungetc(c, f); + + e = sub_get_entry(&s); + if (s.err_flag) { + sub_cal_free(&cal); + cal.err_flag = true; + return cal; + } + sub_cal_push(&s, &cal, &e); + } + + return cal; +} -- cgit v1.2.3