summaryrefslogtreecommitdiffstats
path: root/dimension/tokenize.c
diff options
context:
space:
mode:
authorTavian Barnes <tavianator@gmail.com>2009-10-27 00:02:47 -0400
committerTavian Barnes <tavianator@gmail.com>2009-10-27 00:02:47 -0400
commit30754dcca0f8a4d553e9c865f7cd27f85d2919ee (patch)
tree67a8a349663e131d32c22e4a0e46609cb26903ee /dimension/tokenize.c
parent43ab94e9a2f18b0e40b441fedde5b4ce88046539 (diff)
downloaddimension-30754dcca0f8a4d553e9c865f7cd27f85d2919ee.tar.xz
New mmap-based tokenizer.
Diffstat (limited to 'dimension/tokenize.c')
-rw-r--r--dimension/tokenize.c46
1 files changed, 33 insertions, 13 deletions
diff --git a/dimension/tokenize.c b/dimension/tokenize.c
index 6a9a723..efa7fce 100644
--- a/dimension/tokenize.c
+++ b/dimension/tokenize.c
@@ -21,35 +21,55 @@
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
+#include <sys/mman.h>
+#include <unistd.h>
dmnsn_array *
dmnsn_tokenize(FILE *file)
{
- char c;
+ int fd = fileno(file);
+ off_t size = lseek(fd, 0, SEEK_END);
+ lseek(fd, 0, SEEK_SET);
+ char *map = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0), *next = map;
+
dmnsn_token token;
dmnsn_array *tokens = dmnsn_new_array(sizeof(dmnsn_token));
- while (!feof(file)) {
- fread(&c, 1, 1, file);
-
- if (isspace(c))
- continue;
-
- if (c == '{') {
- token.type = DMNSN_LBRACE;
+ while (next - map < size) {
+ switch (*next) {
+ case ' ':
+ case '\n':
+ case '\r':
+ case '\t':
+ case '\f':
+ case '\v':
+ /* Skip whitespace */
+ break;
+
+ case '{':
+ token.type = DMNSN_LBRACE;
token.value = NULL;
dmnsn_array_push(tokens, &token);
- } else if (c == '}') {
- token.type = DMNSN_RBRACE;
+ break;
+
+ case '}':
+ token.type = DMNSN_LBRACE;
token.value = NULL;
dmnsn_array_push(tokens, &token);
- } else {
- /* Invalid character */
+ break;
+
+ default:
+ /* Unrecognised character */
+ fprintf(stderr, "Unrecognized character 0x%X in input.\n", (unsigned int)*next);
dmnsn_delete_tokens(tokens);
+ munmap(map, size);
return NULL;
}
+
+ ++next;
}
+ munmap(map, size);
return tokens;
}