diff options
Diffstat (limited to 'dimension/tokenize.c')
-rw-r--r-- | dimension/tokenize.c | 266 |
1 files changed, 150 insertions, 116 deletions
diff --git a/dimension/tokenize.c b/dimension/tokenize.c index 4a9e9c6..5fce280 100644 --- a/dimension/tokenize.c +++ b/dimension/tokenize.c @@ -27,7 +27,7 @@ typedef struct dmnsn_buffered_token { dmnsn_parse_location lloc; } dmnsn_buffered_token; -typedef struct dmnsn_yylex_extra { +typedef struct dmnsn_token_buffer { int type; /* Indicate that the first token should be returned as-is */ #define DMNSN_T_LEX_VERBATIM DMNSN_T_EOF @@ -35,30 +35,30 @@ typedef struct dmnsn_yylex_extra { dmnsn_array *buffered; unsigned int i; - struct dmnsn_yylex_extra *prev; -} dmnsn_yylex_extra; + struct dmnsn_token_buffer *prev; +} dmnsn_token_buffer; -dmnsn_yylex_extra * -dmnsn_new_yylex_extra(int type) +static dmnsn_token_buffer * +dmnsn_new_token_buffer(int type) { - dmnsn_yylex_extra *extra = malloc(sizeof(dmnsn_yylex_extra)); - if (!extra) { + dmnsn_token_buffer *tbuffer = malloc(sizeof(dmnsn_token_buffer)); + if (!tbuffer) { dmnsn_error(DMNSN_SEVERITY_HIGH, "Failed to allocate token buffer."); } - extra->type = type; - extra->buffered = dmnsn_new_array(sizeof(dmnsn_buffered_token)); - extra->i = 0; - extra->prev = NULL; - return extra; + tbuffer->type = type; + tbuffer->buffered = dmnsn_new_array(sizeof(dmnsn_buffered_token)); + tbuffer->i = 0; + tbuffer->prev = NULL; + return tbuffer; } -void -dmnsn_delete_yylex_extra(dmnsn_yylex_extra *extra) +static void +dmnsn_delete_token_buffer(dmnsn_token_buffer *tbuffer) { - if (extra) { - dmnsn_delete_array(extra->buffered); - free(extra); + if (tbuffer) { + dmnsn_delete_array(tbuffer->buffered); + free(tbuffer); } } @@ -67,6 +67,107 @@ int dmnsn_yylex_impl(dmnsn_parse_item *lvalp, dmnsn_parse_location *llocp, int dmnsn_ld_yyparse(const char *filename, void *yyscanner, dmnsn_symbol_table *symtable); +static dmnsn_token_buffer * +dmnsn_declaration_buffer(int token, dmnsn_token_buffer *prev, + dmnsn_parse_item *lvalp, dmnsn_parse_location *llocp, + const char *filename, dmnsn_symbol_table *symtable, + void *yyscanner) +{ + dmnsn_token_buffer *tbuffer = dmnsn_new_token_buffer(DMNSN_T_LEX_VERBATIM); + tbuffer->prev = prev; + + /* Buffer the current token */ + dmnsn_buffered_token buffered = { + .type = token, + .lval = *lvalp, + .lloc = *llocp + }; + dmnsn_array_push(tbuffer->buffered, &buffered); + + /* Grab all the tokens belonging to the #declare/#local, i.e. until the braces + balance or we hit a semicolon */ + int bracelevel = -1; + while (1) { + /* Recursive call - permit other directives inside the declaration */ + buffered.type = dmnsn_yylex(&buffered.lval, &buffered.lloc, + filename, symtable, yyscanner); + + if (buffered.type == DMNSN_T_EOF) { + dmnsn_diagnostic(filename, buffered.lloc.first_line, + buffered.lloc.first_column, + "syntax error, unexpected end-of-file"); + dmnsn_delete_token_buffer(tbuffer); + return NULL; + } else if (buffered.type == DMNSN_T_LEX_ERROR) { + dmnsn_delete_token_buffer(tbuffer); + return NULL; + } + + dmnsn_array_push(tbuffer->buffered, &buffered); + + if (buffered.type == DMNSN_T_LBRACE) { + if (bracelevel < 0) + bracelevel = 1; + else + ++bracelevel; + } else if (buffered.type == DMNSN_T_RBRACE) { + --bracelevel; + if (bracelevel == 0) { + break; + } + } else if (buffered.type == DMNSN_T_SEMICOLON) { + break; + } + } + + /* Fake EOF */ + buffered.type = DMNSN_T_EOF; + dmnsn_array_push(tbuffer->buffered, &buffered); + + return tbuffer; +} + +static dmnsn_token_buffer * +dmnsn_undef_buffer(int token, dmnsn_token_buffer *prev, + dmnsn_parse_item *lvalp, dmnsn_parse_location *llocp, + const char *filename, dmnsn_symbol_table *symtable, + void *yyscanner) +{ + dmnsn_token_buffer *tbuffer = dmnsn_new_token_buffer(DMNSN_T_LEX_VERBATIM); + tbuffer->prev = prev; + + /* Buffer the current token */ + dmnsn_buffered_token buffered = { + .type = token, + .lval = *lvalp, + .lloc = *llocp + }; + dmnsn_array_push(tbuffer->buffered, &buffered); + + /* Recursive call */ + buffered.type = dmnsn_yylex(&buffered.lval, &buffered.lloc, + filename, symtable, yyscanner); + + if (buffered.type == DMNSN_T_EOF) { + dmnsn_diagnostic(filename, buffered.lloc.first_line, + buffered.lloc.first_column, + "syntax error, unexpected end-of-file"); + dmnsn_delete_token_buffer(tbuffer); + return NULL; + } else if (buffered.type == DMNSN_T_LEX_ERROR) { + dmnsn_delete_token_buffer(tbuffer); + return NULL; + } + /* Buffer the next token */ + dmnsn_array_push(tbuffer->buffered, &buffered); + + /* Fake EOF */ + buffered.type = DMNSN_T_EOF; + dmnsn_array_push(tbuffer->buffered, &buffered); + + return tbuffer; +} + int dmnsn_yylex(dmnsn_parse_item *lvalp, dmnsn_parse_location *llocp, const char *filename, dmnsn_symbol_table *symtable, void *yyscanner) @@ -80,32 +181,33 @@ dmnsn_yylex(dmnsn_parse_item *lvalp, dmnsn_parse_location *llocp, * directive tokens. * * Ideally we'd use a push parser for the language directives, but bison - * doesn't support GLR push parsers. + * doesn't support GLR push parsers. Instead, we buffer all the appropriate + * tokens and call a pull parser, then discard the buffer and continue. */ - dmnsn_yylex_extra *extra = dmnsn_yyget_extra(yyscanner); + dmnsn_token_buffer *tbuffer = dmnsn_yyget_extra(yyscanner); int token; while (1) { - if (extra) { - /* We are returning buffered tokens */ + if (tbuffer) { + /* Return buffered tokens */ dmnsn_buffered_token buffered; - if (extra->i < dmnsn_array_size(extra->buffered)) { - dmnsn_array_get(extra->buffered, extra->i, &buffered); + if (tbuffer->i < dmnsn_array_size(tbuffer->buffered)) { + dmnsn_array_get(tbuffer->buffered, tbuffer->i, &buffered); token = buffered.type; *lvalp = buffered.lval; *llocp = buffered.lloc; - ++extra->i; + ++tbuffer->i; - if (extra->type == DMNSN_T_LEX_VERBATIM && extra->i == 1) { - /* Don't reprocess the first token in some situations */ + if (tbuffer->type == DMNSN_T_LEX_VERBATIM && tbuffer->i == 1) { + /* Don't double-process the first token */ return token; } } else { - dmnsn_yyset_extra(extra->prev, yyscanner); - dmnsn_delete_yylex_extra(extra); - extra = dmnsn_yyget_extra(yyscanner); + dmnsn_yyset_extra(tbuffer->prev, yyscanner); + dmnsn_delete_token_buffer(tbuffer); + tbuffer = dmnsn_yyget_extra(yyscanner); continue; } } else { @@ -116,113 +218,45 @@ dmnsn_yylex(dmnsn_parse_item *lvalp, dmnsn_parse_location *llocp, case DMNSN_T_DECLARE: case DMNSN_T_LOCAL: { - dmnsn_yylex_extra *ex = dmnsn_new_yylex_extra(DMNSN_T_LEX_VERBATIM); - ex->prev = extra; - - /* Buffer the current token */ - dmnsn_buffered_token buffered = { - .type = token, - .lval = *lvalp, - .lloc = *llocp - }; - dmnsn_array_push(ex->buffered, &buffered); - - /* Grab all the tokens belonging to the #declare/#local */ - int bracelevel = -1; - while (1) { - /* Recursive call */ - buffered.type = dmnsn_yylex(&buffered.lval, &buffered.lloc, - filename, symtable, yyscanner); - - if (buffered.type == DMNSN_T_EOF) { - dmnsn_diagnostic(filename, buffered.lloc.first_line, - buffered.lloc.first_column, - "unexpected end-of-file"); - dmnsn_delete_yylex_extra(ex); - return DMNSN_T_LEX_ERROR; - } else if (buffered.type == DMNSN_T_LEX_ERROR) { - dmnsn_delete_yylex_extra(ex); - return DMNSN_T_LEX_ERROR; - } - - dmnsn_array_push(ex->buffered, &buffered); - - if (buffered.type == DMNSN_T_LBRACE) { - if (bracelevel < 0) - bracelevel = 1; - else - ++bracelevel; - } else if (buffered.type == DMNSN_T_RBRACE) { - --bracelevel; - if (bracelevel == 0) { - break; - } - } else if (buffered.type == DMNSN_T_SEMICOLON) { - break; - } + dmnsn_token_buffer *tb = dmnsn_declaration_buffer( + token, tbuffer, lvalp, llocp, filename, symtable, yyscanner + ); + if (!tb) { + return DMNSN_T_LEX_ERROR; } - /* Fake EOF */ - buffered.type = DMNSN_T_EOF; - dmnsn_array_push(ex->buffered, &buffered); - - dmnsn_yyset_extra(ex, yyscanner); + dmnsn_yyset_extra(tb, yyscanner); if (dmnsn_ld_yyparse(filename, yyscanner, symtable) != 0) { - dmnsn_yyset_extra(ex->prev, yyscanner); - dmnsn_delete_yylex_extra(ex); + dmnsn_yyset_extra(tb->prev, yyscanner); + dmnsn_delete_token_buffer(tb); return DMNSN_T_LEX_ERROR; } /* Restore the previous extra pointer */ - dmnsn_yyset_extra(ex->prev, yyscanner); - dmnsn_delete_yylex_extra(ex); + dmnsn_yyset_extra(tb->prev, yyscanner); + dmnsn_delete_token_buffer(tb); break; } case DMNSN_T_UNDEF: { - dmnsn_yylex_extra *ex = dmnsn_new_yylex_extra(DMNSN_T_LEX_VERBATIM); - ex->prev = extra; - - /* Buffer the current token */ - dmnsn_buffered_token buffered = { - .type = token, - .lval = *lvalp, - .lloc = *llocp - }; - dmnsn_array_push(ex->buffered, &buffered); - - /* Recursive call */ - buffered.type = dmnsn_yylex(&buffered.lval, &buffered.lloc, - filename, symtable, yyscanner); - - if (buffered.type == DMNSN_T_EOF) { - dmnsn_diagnostic(filename, buffered.lloc.first_line, - buffered.lloc.first_column, - "unexpected end-of-file"); - dmnsn_delete_yylex_extra(ex); - return DMNSN_T_LEX_ERROR; - } else if (buffered.type == DMNSN_T_LEX_ERROR) { - dmnsn_delete_yylex_extra(ex); + dmnsn_token_buffer *tb = dmnsn_undef_buffer( + token, tbuffer, lvalp, llocp, filename, symtable, yyscanner + ); + if (!tb) { return DMNSN_T_LEX_ERROR; } - /* Buffer the next token */ - dmnsn_array_push(ex->buffered, &buffered); - - /* Fake EOF */ - buffered.type = DMNSN_T_EOF; - dmnsn_array_push(ex->buffered, &buffered); - dmnsn_yyset_extra(ex, yyscanner); + dmnsn_yyset_extra(tb, yyscanner); if (dmnsn_ld_yyparse(filename, yyscanner, symtable) != 0) { - dmnsn_yyset_extra(ex->prev, yyscanner); - dmnsn_delete_yylex_extra(ex); + dmnsn_yyset_extra(tb->prev, yyscanner); + dmnsn_delete_token_buffer(tb); return DMNSN_T_LEX_ERROR; } /* Restore the previous extra pointer */ - dmnsn_yyset_extra(ex->prev, yyscanner); - dmnsn_delete_yylex_extra(ex); + dmnsn_yyset_extra(tb->prev, yyscanner); + dmnsn_delete_token_buffer(tb); break; } |