From 87cb2d172843e114a8640de3fde61db3a2bf0a6a Mon Sep 17 00:00:00 2001 From: Tavian Barnes Date: Thu, 29 Oct 2009 01:34:28 -0400 Subject: Even more tokenizer work. The tokenizer is now able to tokenize all of POV-Ray 3.6's include files. It turns out that some #includes may be dynamic (ex. with #writes beforehand), so the tokenizer can't directly perform includes - the executor must do this. --- dimension/tokenize.c | 175 ++++++++++++++++++------------------------ dimension/tokenize.h | 36 +++++---- tests/dimension/directives.sh | 2 +- 3 files changed, 96 insertions(+), 117 deletions(-) diff --git a/dimension/tokenize.c b/dimension/tokenize.c index 7a38f59..9a091f7 100644 --- a/dimension/tokenize.c +++ b/dimension/tokenize.c @@ -178,9 +178,15 @@ dmnsn_tokenize_directive(const char *filename, return 1; } + ++*next; + /* Handle spaces between `#' and directive */ + while (*next - map < size && (**next == ' ' || **next == '\t')) { + ++*next; + } + char *directive = malloc(alloc); - do { + while (*next - map < size && (isalnum(**next) || **next == '_')) { if (i + 1 >= alloc) { alloc *= 2; directive = realloc(directive, alloc); @@ -191,7 +197,7 @@ dmnsn_tokenize_directive(const char *filename, ++i; ++*col; ++*next; - } while (*next - map < size && (isalnum(**next) || **next == '_')); + } directive[i] = '\0'; @@ -206,32 +212,32 @@ dmnsn_tokenize_directive(const char *filename, } \ } while (0) - dmnsn_directive("#break", DMNSN_T_BREAK); - dmnsn_directive("#case", DMNSN_T_CASE); - dmnsn_directive("#debug", DMNSN_T_DEBUG); - dmnsn_directive("#declare", DMNSN_T_DECLARE); - dmnsn_directive("#default", DMNSN_T_DEFAULT); - dmnsn_directive("#else", DMNSN_T_ELSE); - dmnsn_directive("#end", DMNSN_T_END); - dmnsn_directive("#error", DMNSN_T_ERROR); - dmnsn_directive("#fclose", DMNSN_T_FCLOSE); - dmnsn_directive("#fopen", DMNSN_T_FOPEN); - dmnsn_directive("#if", DMNSN_T_IF); - dmnsn_directive("#ifdef", DMNSN_T_IFDEF); - dmnsn_directive("#ifndef", DMNSN_T_IFNDEF); - dmnsn_directive("#include", DMNSN_T_INCLUDE); - dmnsn_directive("#local", DMNSN_T_LOCAL); - dmnsn_directive("#macro", DMNSN_T_MACRO); - dmnsn_directive("#range", DMNSN_T_RANGE); - dmnsn_directive("#read", DMNSN_T_READ); - dmnsn_directive("#render", DMNSN_T_RENDER); - dmnsn_directive("#statistics", DMNSN_T_STATISTICS); - dmnsn_directive("#switch", DMNSN_T_SWITCH); - dmnsn_directive("#undef", DMNSN_T_UNDEF); - dmnsn_directive("#version", DMNSN_T_VERSION); - dmnsn_directive("#warning", DMNSN_T_WARNING); - dmnsn_directive("#while", DMNSN_T_WHILE); - dmnsn_directive("#write", DMNSN_T_WRITE); + dmnsn_directive("break", DMNSN_T_BREAK); + dmnsn_directive("case", DMNSN_T_CASE); + dmnsn_directive("debug", DMNSN_T_DEBUG); + dmnsn_directive("declare", DMNSN_T_DECLARE); + dmnsn_directive("default", DMNSN_T_DEFAULT); + dmnsn_directive("else", DMNSN_T_ELSE); + dmnsn_directive("end", DMNSN_T_END); + dmnsn_directive("error", DMNSN_T_ERROR); + dmnsn_directive("fclose", DMNSN_T_FCLOSE); + dmnsn_directive("fopen", DMNSN_T_FOPEN); + dmnsn_directive("if", DMNSN_T_IF); + dmnsn_directive("ifdef", DMNSN_T_IFDEF); + dmnsn_directive("ifndef", DMNSN_T_IFNDEF); + dmnsn_directive("include", DMNSN_T_INCLUDE); + dmnsn_directive("local", DMNSN_T_LOCAL); + dmnsn_directive("macro", DMNSN_T_MACRO); + dmnsn_directive("range", DMNSN_T_RANGE); + dmnsn_directive("read", DMNSN_T_READ); + dmnsn_directive("render", DMNSN_T_RENDER); + dmnsn_directive("statistics", DMNSN_T_STATISTICS); + dmnsn_directive("switch", DMNSN_T_SWITCH); + dmnsn_directive("undef", DMNSN_T_UNDEF); + dmnsn_directive("version", DMNSN_T_VERSION); + dmnsn_directive("warning", DMNSN_T_WARNING); + dmnsn_directive("while", DMNSN_T_WHILE); + dmnsn_directive("write", DMNSN_T_WRITE); free(directive); return 1; @@ -441,6 +447,13 @@ dmnsn_tokenize(const char *filename, FILE *file) dmnsn_simple_token('-', DMNSN_T_MINUS); dmnsn_simple_token('*', DMNSN_T_STAR); dmnsn_simple_token(',', DMNSN_T_COMMA); + dmnsn_simple_token('=', DMNSN_T_EQUALS); + dmnsn_simple_token(';', DMNSN_T_SEMICOLON); + dmnsn_simple_token('?', DMNSN_T_QUESTION); + dmnsn_simple_token(':', DMNSN_T_COLON); + dmnsn_simple_token('&', DMNSN_T_AND); + dmnsn_simple_token('!', DMNSN_T_EXCLAMATION); + dmnsn_simple_token('|', DMNSN_T_PIPE); /* Possible comment */ case '/': @@ -456,7 +469,6 @@ dmnsn_tokenize(const char *filename, FILE *file) break; /* Numeric values */ - case '.': /* Number begins with a decimal point, as in `.2' */ case '0': case '1': case '2': @@ -474,68 +486,19 @@ dmnsn_tokenize(const char *filename, FILE *file) } break; + case '.': /* Number may begin with a decimal point, as in `.2' */ + if (dmnsn_tokenize_number(filename, &line, &col, + map, size, &next, &token) != 0) { + token.type = DMNSN_T_DOT; + ++col; + ++next; + } + break; + case '#': /* Language directive */ if (dmnsn_tokenize_directive(filename, &line, &col, - map, size, &next, &token) == 0) { - if (token.type == DMNSN_T_INCLUDE) { - /* Skip whitespace */ - while (next - map < size && isspace(*next) && *next != '\n') { - ++next; - } - - if (dmnsn_tokenize_string(filename, &line, &col, - map, size, &next, &token) != 0) { - dmnsn_diagnostic(filename, line, col, - "Expected string after #include"); - goto bailout; - } - - /* Search in same directory as current file */ - char *filename_copy = strdup(filename); - char *localdir = dirname(filename_copy); - char *local_include = malloc(strlen(localdir) - + strlen(token.value) - + 2); - strcpy(local_include, localdir); - strcat(local_include, "/"); - strcat(local_include, token.value); - free(filename_copy); - free(token.value); - - /* Try to open the included file */ - FILE *include = fopen(local_include, "r"); - if (!include) { - dmnsn_diagnostic(filename, line, col, - "Couldn't open included file \"%s\"", - local_include); - free(local_include); - goto bailout; - } - - /* Parse it recursively */ - dmnsn_array *included_tokens = dmnsn_tokenize(local_include, include); - if (!included_tokens) { - dmnsn_diagnostic(filename, line, col, - "Error tokenizing included file \"%s\"", - local_include); - free(local_include); - goto bailout; - } - - fclose(include); - free(local_include); - - /* Append the tokens from the included file */ - unsigned int i; - for (i = 0; i < dmnsn_array_size(included_tokens); ++i) { - dmnsn_array_push(tokens, dmnsn_array_at(included_tokens, i)); - } - - dmnsn_delete_array(included_tokens); - continue; - } - } else { + map, size, &next, &token) != 0) { dmnsn_diagnostic(filename, line, col, "Invalid language directive"); goto bailout; } @@ -644,19 +607,27 @@ dmnsn_token_name(dmnsn_token_type token_type) return str; /* Punctuation */ - dmnsn_token_map(DMNSN_T_LBRACE, "{"); - dmnsn_token_map(DMNSN_T_RBRACE, "}") - dmnsn_token_map(DMNSN_T_LPAREN, "\\("); - dmnsn_token_map(DMNSN_T_RPAREN, "\\)"); - dmnsn_token_map(DMNSN_T_LBRACKET, "["); - dmnsn_token_map(DMNSN_T_RBRACKET, "]"); - dmnsn_token_map(DMNSN_T_LT, "<"); - dmnsn_token_map(DMNSN_T_GT, ">"); - dmnsn_token_map(DMNSN_T_PLUS, "+"); - dmnsn_token_map(DMNSN_T_MINUS, "-"); - dmnsn_token_map(DMNSN_T_STAR, "*"); - dmnsn_token_map(DMNSN_T_SLASH, "/"); - dmnsn_token_map(DMNSN_T_COMMA, ","); + dmnsn_token_map(DMNSN_T_LBRACE, "{"); + dmnsn_token_map(DMNSN_T_RBRACE, "}") + dmnsn_token_map(DMNSN_T_LPAREN, "\\("); + dmnsn_token_map(DMNSN_T_RPAREN, "\\)"); + dmnsn_token_map(DMNSN_T_LBRACKET, "["); + dmnsn_token_map(DMNSN_T_RBRACKET, "]"); + dmnsn_token_map(DMNSN_T_LT, "<"); + dmnsn_token_map(DMNSN_T_GT, ">"); + dmnsn_token_map(DMNSN_T_PLUS, "+"); + dmnsn_token_map(DMNSN_T_MINUS, "-"); + dmnsn_token_map(DMNSN_T_STAR, "*"); + dmnsn_token_map(DMNSN_T_SLASH, "/"); + dmnsn_token_map(DMNSN_T_COMMA, ","); + dmnsn_token_map(DMNSN_T_EQUALS, "="); + dmnsn_token_map(DMNSN_T_SEMICOLON, ";"); + dmnsn_token_map(DMNSN_T_QUESTION, "?"); + dmnsn_token_map(DMNSN_T_COLON, ":"); + dmnsn_token_map(DMNSN_T_AND, "&"); + dmnsn_token_map(DMNSN_T_EXCLAMATION, "!"); + dmnsn_token_map(DMNSN_T_DOT, "."); + dmnsn_token_map(DMNSN_T_PIPE, "|"); /* Numeric values */ dmnsn_token_map(DMNSN_T_INT, "int"); @@ -703,7 +674,7 @@ dmnsn_token_name(dmnsn_token_type token_type) dmnsn_token_map(DMNSN_T_IDENTIFIER, "identifier"); default: - printf("Warning: unrecognised token %d.\n", (int)token_type); + fprintf(stderr, "Warning: unrecognised token %d.\n", (int)token_type); return "unrecognized-token"; } } diff --git a/dimension/tokenize.h b/dimension/tokenize.h index a90073b..7a36232 100644 --- a/dimension/tokenize.h +++ b/dimension/tokenize.h @@ -21,19 +21,27 @@ typedef enum { /* Punctuation */ - DMNSN_T_LBRACE, /* { */ - DMNSN_T_RBRACE, /* } */ - DMNSN_T_LPAREN, /* ( */ - DMNSN_T_RPAREN, /* ) */ - DMNSN_T_LBRACKET, /* [ */ - DMNSN_T_RBRACKET, /* ] */ - DMNSN_T_LT, /* < */ - DMNSN_T_GT, /* > */ - DMNSN_T_PLUS, /* + */ - DMNSN_T_MINUS, /* - */ - DMNSN_T_STAR, /* * */ - DMNSN_T_SLASH, /* / */ - DMNSN_T_COMMA, /* , */ + DMNSN_T_LBRACE, /* { */ + DMNSN_T_RBRACE, /* } */ + DMNSN_T_LPAREN, /* ( */ + DMNSN_T_RPAREN, /* ) */ + DMNSN_T_LBRACKET, /* [ */ + DMNSN_T_RBRACKET, /* ] */ + DMNSN_T_LT, /* < */ + DMNSN_T_GT, /* > */ + DMNSN_T_PLUS, /* + */ + DMNSN_T_MINUS, /* - */ + DMNSN_T_STAR, /* * */ + DMNSN_T_SLASH, /* / */ + DMNSN_T_COMMA, /* , */ + DMNSN_T_EQUALS, /* = */ + DMNSN_T_SEMICOLON, /* ; */ + DMNSN_T_QUESTION, /* ? */ + DMNSN_T_COLON, /* : */ + DMNSN_T_AND, /* & */ + DMNSN_T_EXCLAMATION, /* ! */ + DMNSN_T_DOT, /* . */ + DMNSN_T_PIPE, /* | */ /* Numeric values */ DMNSN_T_INT, @@ -59,7 +67,7 @@ typedef enum { DMNSN_T_IF, DMNSN_T_IFDEF, DMNSN_T_IFNDEF, - DMNSN_T_INCLUDE, /* Only used internally */ + DMNSN_T_INCLUDE, DMNSN_T_LOCAL, DMNSN_T_MACRO, DMNSN_T_RANGE, diff --git a/tests/dimension/directives.sh b/tests/dimension/directives.sh index a4114a8..68c01e3 100755 --- a/tests/dimension/directives.sh +++ b/tests/dimension/directives.sh @@ -20,7 +20,7 @@ ######################################################################### directives=$(${top_builddir}/dimension/dimension --tokenize ${srcdir}/directives.pov) -directives_exp='({ \( [ < + - * / , > ] \) } #declare (identifier "x"))'; +directives_exp='(#include (string "punctuation.pov") #declare (identifier "x"))'; if [ "$directives" != "$directives_exp" ]; then echo "directives.pov tokenized as \"$directives\"" >&2 -- cgit v1.2.3