diff options
author | トトも <85485984+ElectronicsArchiver@users.noreply.github.com> | 2022-04-16 20:18:56 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-04-16 14:18:56 -0400 |
commit | 33cc3b9dd7bf3dae1c6cf86e46bb4923f96e7fff (patch) | |
tree | 02fb808d19aee560ac9d381ca5a52509881cdd44 /xregex.c | |
parent | 8f5a73a6585bd425807430fd80ce1e3a737f4c5f (diff) | |
download | bfs-33cc3b9dd7bf3dae1c6cf86e46bb4923f96e7fff.tar.xz |
Source / Include Folder (#88)
Moved Source Files Into `src` Folder
Diffstat (limited to 'xregex.c')
-rw-r--r-- | xregex.c | 301 |
1 files changed, 0 insertions, 301 deletions
diff --git a/xregex.c b/xregex.c deleted file mode 100644 index 3c3cf35..0000000 --- a/xregex.c +++ /dev/null @@ -1,301 +0,0 @@ -/**************************************************************************** - * bfs * - * Copyright (C) 2022 Tavian Barnes <tavianator@tavianator.com> * - * * - * Permission to use, copy, modify, and/or distribute this software for any * - * purpose with or without fee is hereby granted. * - * * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * - ****************************************************************************/ - -#include "xregex.h" -#include "util.h" -#include <assert.h> -#include <errno.h> -#include <stdlib.h> -#include <string.h> - -#if BFS_WITH_ONIGURUMA -# include <langinfo.h> -# include <oniguruma.h> -#else -# include <regex.h> -#endif - -struct bfs_regex { -#if BFS_WITH_ONIGURUMA - unsigned char *pattern; - OnigRegex impl; - int err; - OnigErrorInfo einfo; -#else - regex_t impl; - int err; -#endif -}; - -#if BFS_WITH_ONIGURUMA -/** Get (and initialize) the appropriate encoding for the current locale. */ -static int bfs_onig_encoding(OnigEncoding *penc) { - static OnigEncoding enc = NULL; - if (enc) { - *penc = enc; - return ONIG_NORMAL; - } - - // Fall back to ASCII by default - enc = ONIG_ENCODING_ASCII; - - // Oniguruma has no locale support, so try to guess the right encoding - // from the current locale. - const char *charmap = nl_langinfo(CODESET); - if (charmap) { -#define BFS_MAP_ENCODING(name, value) \ - do { \ - if (strcmp(charmap, name) == 0) { \ - enc = value; \ - } \ - } while (0) -#define BFS_MAP_ENCODING2(name1, name2, value) \ - do { \ - BFS_MAP_ENCODING(name1, value); \ - BFS_MAP_ENCODING(name2, value); \ - } while (0) - - // These names were found with locale -m on Linux and FreeBSD -#define BFS_MAP_ISO_8859(n) \ - BFS_MAP_ENCODING2("ISO-8859-" #n, "ISO8859-" #n, ONIG_ENCODING_ISO_8859_ ## n) - - BFS_MAP_ISO_8859(1); - BFS_MAP_ISO_8859(2); - BFS_MAP_ISO_8859(3); - BFS_MAP_ISO_8859(4); - BFS_MAP_ISO_8859(5); - BFS_MAP_ISO_8859(6); - BFS_MAP_ISO_8859(7); - BFS_MAP_ISO_8859(8); - BFS_MAP_ISO_8859(9); - BFS_MAP_ISO_8859(10); - BFS_MAP_ISO_8859(11); - // BFS_MAP_ISO_8859(12); - BFS_MAP_ISO_8859(13); - BFS_MAP_ISO_8859(14); - BFS_MAP_ISO_8859(15); - BFS_MAP_ISO_8859(16); - - BFS_MAP_ENCODING("UTF-8", ONIG_ENCODING_UTF8); - -#define BFS_MAP_EUC(name) \ - BFS_MAP_ENCODING2("EUC-" #name, "euc" #name, ONIG_ENCODING_EUC_ ## name) - - BFS_MAP_EUC(JP); - BFS_MAP_EUC(TW); - BFS_MAP_EUC(KR); - BFS_MAP_EUC(CN); - - BFS_MAP_ENCODING2("SHIFT_JIS", "SJIS", ONIG_ENCODING_SJIS); - - // BFS_MAP_ENCODING("KOI-8", ONIG_ENCODING_KOI8); - BFS_MAP_ENCODING("KOI8-R", ONIG_ENCODING_KOI8_R); - - BFS_MAP_ENCODING("CP1251", ONIG_ENCODING_CP1251); - - BFS_MAP_ENCODING("GB18030", ONIG_ENCODING_BIG5); - } - - int ret = onig_initialize(&enc, 1); - if (ret != ONIG_NORMAL) { - enc = NULL; - } - *penc = enc; - return ret; -} -#endif - -int bfs_regcomp(struct bfs_regex **preg, const char *pattern, enum bfs_regex_type type, enum bfs_regcomp_flags flags) { - struct bfs_regex *regex = *preg = malloc(sizeof(*regex)); - if (!regex) { - return -1; - } - -#if BFS_WITH_ONIGURUMA - // onig_error_code_to_str() says - // - // don't call this after the pattern argument of onig_new() is freed - // - // so make a defensive copy. - regex->pattern = (unsigned char *)strdup(pattern); - if (!regex->pattern) { - goto fail; - } - - regex->impl = NULL; - regex->err = ONIG_NORMAL; - - OnigSyntaxType *syntax = NULL; - switch (type) { - case BFS_REGEX_POSIX_BASIC: - syntax = ONIG_SYNTAX_POSIX_BASIC; - break; - case BFS_REGEX_POSIX_EXTENDED: - syntax = ONIG_SYNTAX_POSIX_EXTENDED; - break; - case BFS_REGEX_EMACS: - syntax = ONIG_SYNTAX_EMACS; - break; - case BFS_REGEX_GREP: - syntax = ONIG_SYNTAX_GREP; - break; - } - assert(syntax); - - OnigOptionType options = syntax->options; - if (flags & BFS_REGEX_ICASE) { - options |= ONIG_OPTION_IGNORECASE; - } - - OnigEncoding enc; - regex->err = bfs_onig_encoding(&enc); - if (regex->err != ONIG_NORMAL) { - return -1; - } - - const unsigned char *end = regex->pattern + strlen(pattern); - regex->err = onig_new(®ex->impl, regex->pattern, end, options, enc, syntax, ®ex->einfo); - if (regex->err != ONIG_NORMAL) { - return -1; - } -#else - int cflags = 0; - switch (type) { - case BFS_REGEX_POSIX_BASIC: - break; - case BFS_REGEX_POSIX_EXTENDED: - cflags |= REG_EXTENDED; - break; - default: - errno = EINVAL; - goto fail; - } - - if (flags & BFS_REGEX_ICASE) { - cflags |= REG_ICASE; - } - -#if BFS_HAS_FEATURE(memory_sanitizer, false) - // https://github.com/google/sanitizers/issues/1496 - memset(®ex->impl, 0, sizeof(regex->impl)); -#endif - - regex->err = regcomp(®ex->impl, pattern, cflags); - if (regex->err != 0) { - return -1; - } -#endif - - return 0; - -fail: - free(regex); - *preg = NULL; - return -1; -} - -int bfs_regexec(struct bfs_regex *regex, const char *str, enum bfs_regexec_flags flags) { - size_t len = strlen(str); - -#if BFS_WITH_ONIGURUMA - const unsigned char *ustr = (const unsigned char *)str; - const unsigned char *end = ustr + len; - - // The docs for onig_{match,search}() say - // - // Do not pass invalid byte string in the regex character encoding. - if (!onigenc_is_valid_mbc_string(onig_get_encoding(regex->impl), ustr, end)) { - return 0; - } - - int ret; - if (flags & BFS_REGEX_ANCHOR) { - ret = onig_match(regex->impl, ustr, end, ustr, NULL, ONIG_OPTION_DEFAULT); - } else { - ret = onig_search(regex->impl, ustr, end, ustr, end, NULL, ONIG_OPTION_DEFAULT); - } - - if (ret >= 0) { - if (flags & BFS_REGEX_ANCHOR) { - return (size_t)ret == len; - } else { - return 1; - } - } else if (ret == ONIG_MISMATCH) { - return 0; - } else { - regex->err = ret; - return -1; - } -#else - regmatch_t match = { - .rm_so = 0, - .rm_eo = len, - }; - - int eflags = 0; -#ifdef REG_STARTEND - eflags |= REG_STARTEND; -#endif - - int ret = regexec(®ex->impl, str, 1, &match, eflags); - if (ret == 0) { - if (flags & BFS_REGEX_ANCHOR) { - return match.rm_so == 0 && (size_t)match.rm_eo == len; - } else { - return 1; - } - } else if (ret == REG_NOMATCH) { - return 0; - } else { - regex->err = ret; - return -1; - } -#endif -} - -void bfs_regfree(struct bfs_regex *regex) { - if (regex) { -#if BFS_WITH_ONIGURUMA - onig_free(regex->impl); - free(regex->pattern); -#else - regfree(®ex->impl); -#endif - free(regex); - } -} - -char *bfs_regerror(const struct bfs_regex *regex) { - if (!regex) { - return strdup(strerror(ENOMEM)); - } - -#if BFS_WITH_ONIGURUMA - unsigned char *str = malloc(ONIG_MAX_ERROR_MESSAGE_LEN); - if (str) { - onig_error_code_to_str(str, regex->err, ®ex->einfo); - } - return (char *)str; -#else - size_t len = regerror(regex->err, ®ex->impl, NULL, 0); - char *str = malloc(len); - if (str) { - regerror(regex->err, ®ex->impl, str, len); - } - return str; -#endif -} |