diff options
-rw-r--r-- | regex.c | 95 |
1 files changed, 84 insertions, 11 deletions
@@ -15,48 +15,79 @@ ****************************************************************************/ #include "regex.h" +#include <assert.h> #include <stdbool.h> #include <stdlib.h> #include <string.h> #if BFS_WITH_ONIGURUMA -# include <onigposix.h> +# include <oniguruma.h> #else # include <regex.h> #endif struct bfs_regex { +#if BFS_WITH_ONIGURUMA + OnigRegex impl; +#else regex_t impl; +#endif }; struct bfs_regex *bfs_regcomp(const char *expr, enum bfs_regex_type type, enum bfs_regcomp_flags flags, int *err) { +#if BFS_WITH_ONIGURUMA + static bool onig_initialized = false; + if (!onig_initialized) { + OnigEncoding encs[] = {ONIG_ENCODING_UTF8}; + *err = onig_initialize(encs, sizeof(encs)/sizeof(encs[0])); + if (*err != ONIG_NORMAL) { + return NULL; + } + onig_initialized = true; + } +#endif + struct bfs_regex *regex = malloc(sizeof(*regex)); if (!regex) { +#if BFS_WITH_ONIGURUMA + *err = ONIGERR_MEMORY; +#else *err = REG_ESPACE; +#endif return NULL; } - int cflags = 0; - #if BFS_WITH_ONIGURUMA - // Oniguruma's POSIX wrapper uses the selected default syntax when REG_EXTENDED is set - cflags |= REG_EXTENDED; - + OnigSyntaxType *syntax = NULL; switch (type) { case BFS_REGEX_POSIX_BASIC: - onig_set_default_syntax(ONIG_SYNTAX_POSIX_BASIC); + syntax = ONIG_SYNTAX_POSIX_BASIC; break; case BFS_REGEX_POSIX_EXTENDED: - onig_set_default_syntax(ONIG_SYNTAX_POSIX_EXTENDED); + syntax = ONIG_SYNTAX_POSIX_EXTENDED; break; case BFS_REGEX_EMACS: - onig_set_default_syntax(ONIG_SYNTAX_EMACS); + syntax = ONIG_SYNTAX_EMACS; break; case BFS_REGEX_GREP: - onig_set_default_syntax(ONIG_SYNTAX_GREP); + syntax = ONIG_SYNTAX_GREP; break; } + assert(syntax); + + OnigOptionType options = syntax->options; + if (flags & BFS_REGEX_ICASE) { + options |= ONIG_OPTION_IGNORECASE; + } + + const unsigned char *uexpr = (const unsigned char *)expr; + const unsigned char *end = uexpr + strlen(expr); + *err = onig_new(®ex->impl, uexpr, end, options, ONIG_ENCODING_UTF8, syntax, NULL); + if (*err != ONIG_NORMAL) { + goto fail; + } #else + int cflags = 0; switch (type) { case BFS_REGEX_POSIX_BASIC: break; @@ -67,7 +98,6 @@ struct bfs_regex *bfs_regcomp(const char *expr, enum bfs_regex_type type, enum b *err = REG_BADPAT; goto fail; } -#endif if (flags & BFS_REGEX_ICASE) { cflags |= REG_ICASE; @@ -77,6 +107,7 @@ struct bfs_regex *bfs_regcomp(const char *expr, enum bfs_regex_type type, enum b if (*err != 0) { goto fail; } +#endif return regex; @@ -87,6 +118,35 @@ fail: bool bfs_regexec(struct bfs_regex *regex, const char *str, enum bfs_regexec_flags flags, int *err) { size_t len = strlen(str); + +#if BFS_WITH_ONIGURUMA + const unsigned char *ustr = (const unsigned char *)str; + const unsigned char *end = ustr + len; + + OnigRegion *region = onig_region_new(); + if (!region) { + *err = ONIGERR_MEMORY; + return false; + } + + bool match = false; + int ret = onig_search(regex->impl, ustr, end, ustr, end, region, ONIG_OPTION_DEFAULT); + if (ret >= 0) { + *err = 0; + if (flags & BFS_REGEX_ANCHOR) { + match = region->beg[0] == 0 && (size_t)region->end[0] == len; + } else { + match = true; + } + } else if (ret == ONIG_MISMATCH) { + *err = 0; + } else { + *err = ret; + } + + onig_region_free(region, 1); + return match; +#else regmatch_t match = { .rm_so = 0, .rm_eo = len, @@ -114,16 +174,28 @@ bool bfs_regexec(struct bfs_regex *regex, const char *str, enum bfs_regexec_flag *err = ret; return false; } +#endif } void bfs_regfree(struct bfs_regex *regex) { if (regex) { +#if BFS_WITH_ONIGURUMA + onig_free(regex->impl); +#else regfree(®ex->impl); +#endif free(regex); } } char *bfs_regerror(int err, const struct bfs_regex *regex) { +#if BFS_WITH_ONIGURUMA + unsigned char *str = malloc(ONIG_MAX_ERROR_MESSAGE_LEN); + if (str) { + onig_error_code_to_str(str, err); + } + return (char *)str; +#else const regex_t *impl = regex ? ®ex->impl : NULL; size_t len = regerror(err, impl, NULL, 0); @@ -132,4 +204,5 @@ char *bfs_regerror(int err, const struct bfs_regex *regex) { regerror(err, impl, str, len); } return str; +#endif } |