summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--regex.c95
1 files changed, 84 insertions, 11 deletions
diff --git a/regex.c b/regex.c
index d5c8346..a06d172 100644
--- a/regex.c
+++ b/regex.c
@@ -15,48 +15,79 @@
****************************************************************************/
#include "regex.h"
+#include <assert.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#if BFS_WITH_ONIGURUMA
-# include <onigposix.h>
+# include <oniguruma.h>
#else
# include <regex.h>
#endif
struct bfs_regex {
+#if BFS_WITH_ONIGURUMA
+ OnigRegex impl;
+#else
regex_t impl;
+#endif
};
struct bfs_regex *bfs_regcomp(const char *expr, enum bfs_regex_type type, enum bfs_regcomp_flags flags, int *err) {
+#if BFS_WITH_ONIGURUMA
+ static bool onig_initialized = false;
+ if (!onig_initialized) {
+ OnigEncoding encs[] = {ONIG_ENCODING_UTF8};
+ *err = onig_initialize(encs, sizeof(encs)/sizeof(encs[0]));
+ if (*err != ONIG_NORMAL) {
+ return NULL;
+ }
+ onig_initialized = true;
+ }
+#endif
+
struct bfs_regex *regex = malloc(sizeof(*regex));
if (!regex) {
+#if BFS_WITH_ONIGURUMA
+ *err = ONIGERR_MEMORY;
+#else
*err = REG_ESPACE;
+#endif
return NULL;
}
- int cflags = 0;
-
#if BFS_WITH_ONIGURUMA
- // Oniguruma's POSIX wrapper uses the selected default syntax when REG_EXTENDED is set
- cflags |= REG_EXTENDED;
-
+ OnigSyntaxType *syntax = NULL;
switch (type) {
case BFS_REGEX_POSIX_BASIC:
- onig_set_default_syntax(ONIG_SYNTAX_POSIX_BASIC);
+ syntax = ONIG_SYNTAX_POSIX_BASIC;
break;
case BFS_REGEX_POSIX_EXTENDED:
- onig_set_default_syntax(ONIG_SYNTAX_POSIX_EXTENDED);
+ syntax = ONIG_SYNTAX_POSIX_EXTENDED;
break;
case BFS_REGEX_EMACS:
- onig_set_default_syntax(ONIG_SYNTAX_EMACS);
+ syntax = ONIG_SYNTAX_EMACS;
break;
case BFS_REGEX_GREP:
- onig_set_default_syntax(ONIG_SYNTAX_GREP);
+ syntax = ONIG_SYNTAX_GREP;
break;
}
+ assert(syntax);
+
+ OnigOptionType options = syntax->options;
+ if (flags & BFS_REGEX_ICASE) {
+ options |= ONIG_OPTION_IGNORECASE;
+ }
+
+ const unsigned char *uexpr = (const unsigned char *)expr;
+ const unsigned char *end = uexpr + strlen(expr);
+ *err = onig_new(&regex->impl, uexpr, end, options, ONIG_ENCODING_UTF8, syntax, NULL);
+ if (*err != ONIG_NORMAL) {
+ goto fail;
+ }
#else
+ int cflags = 0;
switch (type) {
case BFS_REGEX_POSIX_BASIC:
break;
@@ -67,7 +98,6 @@ struct bfs_regex *bfs_regcomp(const char *expr, enum bfs_regex_type type, enum b
*err = REG_BADPAT;
goto fail;
}
-#endif
if (flags & BFS_REGEX_ICASE) {
cflags |= REG_ICASE;
@@ -77,6 +107,7 @@ struct bfs_regex *bfs_regcomp(const char *expr, enum bfs_regex_type type, enum b
if (*err != 0) {
goto fail;
}
+#endif
return regex;
@@ -87,6 +118,35 @@ fail:
bool bfs_regexec(struct bfs_regex *regex, const char *str, enum bfs_regexec_flags flags, int *err) {
size_t len = strlen(str);
+
+#if BFS_WITH_ONIGURUMA
+ const unsigned char *ustr = (const unsigned char *)str;
+ const unsigned char *end = ustr + len;
+
+ OnigRegion *region = onig_region_new();
+ if (!region) {
+ *err = ONIGERR_MEMORY;
+ return false;
+ }
+
+ bool match = false;
+ int ret = onig_search(regex->impl, ustr, end, ustr, end, region, ONIG_OPTION_DEFAULT);
+ if (ret >= 0) {
+ *err = 0;
+ if (flags & BFS_REGEX_ANCHOR) {
+ match = region->beg[0] == 0 && (size_t)region->end[0] == len;
+ } else {
+ match = true;
+ }
+ } else if (ret == ONIG_MISMATCH) {
+ *err = 0;
+ } else {
+ *err = ret;
+ }
+
+ onig_region_free(region, 1);
+ return match;
+#else
regmatch_t match = {
.rm_so = 0,
.rm_eo = len,
@@ -114,16 +174,28 @@ bool bfs_regexec(struct bfs_regex *regex, const char *str, enum bfs_regexec_flag
*err = ret;
return false;
}
+#endif
}
void bfs_regfree(struct bfs_regex *regex) {
if (regex) {
+#if BFS_WITH_ONIGURUMA
+ onig_free(regex->impl);
+#else
regfree(&regex->impl);
+#endif
free(regex);
}
}
char *bfs_regerror(int err, const struct bfs_regex *regex) {
+#if BFS_WITH_ONIGURUMA
+ unsigned char *str = malloc(ONIG_MAX_ERROR_MESSAGE_LEN);
+ if (str) {
+ onig_error_code_to_str(str, err);
+ }
+ return (char *)str;
+#else
const regex_t *impl = regex ? &regex->impl : NULL;
size_t len = regerror(err, impl, NULL, 0);
@@ -132,4 +204,5 @@ char *bfs_regerror(int err, const struct bfs_regex *regex) {
regerror(err, impl, str, len);
}
return str;
+#endif
}