diff options
author | Tavian Barnes <tavianator@gmail.com> | 2009-10-27 00:02:47 -0400 |
---|---|---|
committer | Tavian Barnes <tavianator@gmail.com> | 2009-10-27 00:02:47 -0400 |
commit | 30754dcca0f8a4d553e9c865f7cd27f85d2919ee (patch) | |
tree | 67a8a349663e131d32c22e4a0e46609cb26903ee /dimension/tokenize.c | |
parent | 43ab94e9a2f18b0e40b441fedde5b4ce88046539 (diff) | |
download | dimension-30754dcca0f8a4d553e9c865f7cd27f85d2919ee.tar.xz |
New mmap-based tokenizer.
Diffstat (limited to 'dimension/tokenize.c')
-rw-r--r-- | dimension/tokenize.c | 46 |
1 files changed, 33 insertions, 13 deletions
diff --git a/dimension/tokenize.c b/dimension/tokenize.c index 6a9a723..efa7fce 100644 --- a/dimension/tokenize.c +++ b/dimension/tokenize.c @@ -21,35 +21,55 @@ #include <stdlib.h> #include <string.h> #include <ctype.h> +#include <sys/mman.h> +#include <unistd.h> dmnsn_array * dmnsn_tokenize(FILE *file) { - char c; + int fd = fileno(file); + off_t size = lseek(fd, 0, SEEK_END); + lseek(fd, 0, SEEK_SET); + char *map = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0), *next = map; + dmnsn_token token; dmnsn_array *tokens = dmnsn_new_array(sizeof(dmnsn_token)); - while (!feof(file)) { - fread(&c, 1, 1, file); - - if (isspace(c)) - continue; - - if (c == '{') { - token.type = DMNSN_LBRACE; + while (next - map < size) { + switch (*next) { + case ' ': + case '\n': + case '\r': + case '\t': + case '\f': + case '\v': + /* Skip whitespace */ + break; + + case '{': + token.type = DMNSN_LBRACE; token.value = NULL; dmnsn_array_push(tokens, &token); - } else if (c == '}') { - token.type = DMNSN_RBRACE; + break; + + case '}': + token.type = DMNSN_LBRACE; token.value = NULL; dmnsn_array_push(tokens, &token); - } else { - /* Invalid character */ + break; + + default: + /* Unrecognised character */ + fprintf(stderr, "Unrecognized character 0x%X in input.\n", (unsigned int)*next); dmnsn_delete_tokens(tokens); + munmap(map, size); return NULL; } + + ++next; } + munmap(map, size); return tokens; } |