summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTavian Barnes <tavianator@gmail.com>2009-10-29 00:44:27 -0400
committerTavian Barnes <tavianator@gmail.com>2009-10-29 00:44:27 -0400
commita9e0784cbcd32dbd6184b280accaafd1c9575ba5 (patch)
tree986b047781acf522631d4f4bd8992d250ee01a1b
parent605b34f1cdb8eb10c05198cfdf4e0a628592d9cf (diff)
downloaddimension-a9e0784cbcd32dbd6184b280accaafd1c9575ba5.tar.xz
Complete string parsing.
-rw-r--r--dimension/tokenize.c75
-rw-r--r--tests/dimension/strings.pov2
-rwxr-xr-xtests/dimension/strings.sh3
3 files changed, 71 insertions, 9 deletions
diff --git a/dimension/tokenize.c b/dimension/tokenize.c
index 9eeab45..e34564c 100644
--- a/dimension/tokenize.c
+++ b/dimension/tokenize.c
@@ -220,6 +220,8 @@ dmnsn_tokenize_string(const char *filename,
char *map, size_t size, char **next, dmnsn_token *token)
{
unsigned int i = 0, alloc = 32;
+ char unicode[5] = { 0 }, *end;
+ unsigned long wchar;
if (**next != '"') {
return 1;
@@ -240,18 +242,74 @@ dmnsn_tokenize_string(const char *filename,
++*next;
switch (**next) {
- case '\\':
- token->value[i] = '\\';
+ case 'a':
+ token->value[i] = '\a';
break;
- case '"':
- token->value[i] = '"';
+ case 'b':
+ token->value[i] = '\b';
+ break;
+
+ case 'f':
+ token->value[i] = '\f';
break;
case 'n':
token->value[i] = '\n';
break;
+ case 'r':
+ token->value[i] = '\r';
+ break;
+
+ case 't':
+ token->value[i] = '\t';
+ break;
+
+ case 'u':
+ /* Escaped unicode character */
+ strncpy(unicode, *next + 1, 4);
+ wchar = strtoul(unicode, &end, 16);
+ if (*next - map >= size - 4) {
+ dmnsn_diagnostic(filename, *line, *col,
+ "EOF before end of escape sequence");
+ free(token->value);
+ return 1;
+ }
+ if (end != &unicode[4]) {
+ dmnsn_diagnostic(filename, *line, *col,
+ "WARNING: Invalid unicode character \"\\u%s\"",
+ unicode);
+ } else {
+ token->value[i] = wchar/256;
+ ++i;
+ if (i + 1 >= alloc) {
+ alloc *= 2;
+ token->value = realloc(token->value, alloc);
+ }
+ token->value[i] = wchar%256;
+
+ *col += 4;
+ *next += 4;
+ }
+ break;
+
+ case 'v':
+ token->value[i] = '\v';
+ break;
+
+ case '\\':
+ token->value[i] = '\\';
+ break;
+
+ case '\'':
+ token->value[i] = '\'';
+ break;
+
+ case '"':
+ token->value[i] = '"';
+ break;
+
default:
dmnsn_diagnostic(filename, *line, *col,
"WARNING: unrecognised escape sequence '\\%c'",
@@ -267,10 +325,15 @@ dmnsn_tokenize_string(const char *filename,
++*col;
++*next;
}
- ++*next;
- token->value[i] = '\0';
+ if (**next != '"') {
+ dmnsn_diagnostic(filename, *line, *col, "Non-terminated string");
+ free(token->value);
+ return 1;
+ }
+ ++*next;
+ token->value[i] = '\0';
return 0;
}
diff --git a/tests/dimension/strings.pov b/tests/dimension/strings.pov
index 307b774..663afdd 100644
--- a/tests/dimension/strings.pov
+++ b/tests/dimension/strings.pov
@@ -18,4 +18,4 @@
*************************************************************************/
// Test string handling, including escape sequences
-"This is a string with\n\"escape sequences\"\\"
+"This is a string with escape sequences: \a\b\f\n\r\t\u2123\v\\\'\"" \ No newline at end of file
diff --git a/tests/dimension/strings.sh b/tests/dimension/strings.sh
index 7557100..58e0751 100755
--- a/tests/dimension/strings.sh
+++ b/tests/dimension/strings.sh
@@ -20,8 +20,7 @@
#########################################################################
strings=$(${top_builddir}/dimension/dimension --tokenize ${srcdir}/strings.pov)
-strings_exp='((string "This is a string with
-"escape sequences"\"))'
+strings_exp=$(echo -e "((string \"This is a string with escape sequences: \a\b\f\n\r\t!#\v\\\'\"\"))")
if [ "$strings" != "$strings_exp" ]; then
echo "strings.pov tokenized as \"$strings\"" >&2