From f6ce73bd76ee9b07bb13a6df9a5663a38ccf4013 Mon Sep 17 00:00:00 2001 From: Tavian Barnes Date: Tue, 27 Oct 2009 20:35:17 -0400 Subject: Tokenize numeric values. --- dimension/tokenize.c | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++-- dimension/tokenize.h | 5 +++++ 2 files changed, 64 insertions(+), 2 deletions(-) (limited to 'dimension') diff --git a/dimension/tokenize.c b/dimension/tokenize.c index 7019ad7..c34cfc6 100644 --- a/dimension/tokenize.c +++ b/dimension/tokenize.c @@ -40,7 +40,9 @@ dmnsn_tokenize(FILE *file) return NULL; } - char *map = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0), *next = map; + char *map = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0), + *next = map, + *endi, *endf; if (map == MAP_FAILED) { fprintf(stderr, "Couldn't mmap() input stream.\n"); @@ -50,19 +52,28 @@ dmnsn_tokenize(FILE *file) dmnsn_token token; dmnsn_array *tokens = dmnsn_new_array(sizeof(dmnsn_token)); + unsigned int line = 0, col = 0; + unsigned int i; + while (next - map < size) { /* Saves us some code repetition in the vast majority of cases */ token.value = NULL; switch (*next) { case ' ': - case '\n': case '\r': case '\t': case '\f': case '\v': /* Skip whitespace */ ++next; + ++col; + continue; + + case '\n': + ++next; + ++line; + col = 0; continue; /* Macro to make basic symbol tokens easier */ @@ -86,6 +97,46 @@ dmnsn_tokenize(FILE *file) dmnsn_simple_token('/', DMNSN_SLASH); dmnsn_simple_token(',', DMNSN_COMMA); + /* Numeric values */ + case '.': /* Number begins with a decimal point, as in `.2' */ + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + strtoul(next, &endi, 0); + strtod(next, &endf); + if (endf > endi + /* These next conditions catch invalid octal integers being parsed as + floats, eg 08 */ + && (*endi == '.' || *endi == 'e' || *endi == 'E' || *endi == 'p' + || *endi == 'P')) + { + token.type = DMNSN_FLOAT; + token.value = malloc(endf - next + 1); + strncpy(token.value, next, endf - next); + token.value[endf - next] = '\0'; + next = endf; + } else if (endi > next) { + token.type = DMNSN_INT; + token.value = malloc(endi - next + 1); + strncpy(token.value, next, endi - next); + token.value[endi - next] = '\0'; + next = endi; + } else { + fprintf(stderr, "Invalid numeric value on line %u, column %u.\n", + line, col); + dmnsn_delete_tokens(tokens); + munmap(map, size); + return NULL; + } + break; + default: /* Unrecognised character */ fprintf(stderr, "Unrecognized character 0x%X in input.\n", @@ -97,6 +148,7 @@ dmnsn_tokenize(FILE *file) dmnsn_array_push(tokens, &token); ++next; + ++col; } munmap(map, size); @@ -157,6 +209,7 @@ dmnsn_token_name(dmnsn_token_type token_type) case type: \ return str; + /* Punctuation */ dmnsn_token_map(DMNSN_LBRACE, "{"); dmnsn_token_map(DMNSN_RBRACE, "}") dmnsn_token_map(DMNSN_LPAREN, "\\("); @@ -171,6 +224,10 @@ dmnsn_token_name(dmnsn_token_type token_type) dmnsn_token_map(DMNSN_SLASH, "/"); dmnsn_token_map(DMNSN_COMMA, ","); + /* Numeric values */ + dmnsn_token_map(DMNSN_INT, "int"); + dmnsn_token_map(DMNSN_FLOAT, "float"); + default: printf("Warning: unrecognised token %d.\n", (int)token_type); return "unrecognized-token"; diff --git a/dimension/tokenize.h b/dimension/tokenize.h index e64b7eb..91d59f6 100644 --- a/dimension/tokenize.h +++ b/dimension/tokenize.h @@ -20,6 +20,7 @@ #include "../libdimension/dimension.h" typedef enum { + /* Punctuation */ DMNSN_LBRACE, /* { */ DMNSN_RBRACE, /* } */ DMNSN_LPAREN, /* ( */ @@ -33,6 +34,10 @@ typedef enum { DMNSN_STAR, /* * */ DMNSN_SLASH, /* / */ DMNSN_COMMA, /* , */ + + /* Numeric values */ + DMNSN_INT, + DMNSN_FLOAT, } dmnsn_token_type; typedef struct dmnsn_token dmnsn_token; -- cgit v1.2.3