diff options
author | Tavian Barnes <tavianator@tavianator.com> | 2024-05-07 15:42:46 -0400 |
---|---|---|
committer | Tavian Barnes <tavianator@tavianator.com> | 2024-05-07 15:42:46 -0400 |
commit | 452d6697e0f92326ab139eed4eadd9c2fd8b55ca (patch) | |
tree | 0feeb3722dcf6debb6c33c5175342bf1d70a1dba /src/xregex.c | |
parent | a4299f9bc1d3e60a7e628561e8d650c2a241e1c2 (diff) | |
parent | c5cf2cf90834f2f56b2940d2a499a1a614ebfd21 (diff) | |
download | bfs-find2fd.tar.xz |
Merge branch 'main' into find2fdfind2fd
Diffstat (limited to 'src/xregex.c')
-rw-r--r-- | src/xregex.c | 114 |
1 files changed, 54 insertions, 60 deletions
diff --git a/src/xregex.c b/src/xregex.c index 6f0e5a1..c2711bc 100644 --- a/src/xregex.c +++ b/src/xregex.c @@ -1,35 +1,27 @@ -/**************************************************************************** - * bfs * - * Copyright (C) 2022 Tavian Barnes <tavianator@tavianator.com> * - * * - * Permission to use, copy, modify, and/or distribute this software for any * - * purpose with or without fee is hereby granted. * - * * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * - ****************************************************************************/ +// Copyright © Tavian Barnes <tavianator@tavianator.com> +// SPDX-License-Identifier: 0BSD +#include "prelude.h" #include "xregex.h" -#include "config.h" -#include <assert.h> +#include "alloc.h" +#include "bfstd.h" +#include "diag.h" +#include "sanity.h" +#include "thread.h" #include <errno.h> +#include <pthread.h> #include <stdlib.h> #include <string.h> -#if BFS_WITH_ONIGURUMA -# include <langinfo.h> -# include <oniguruma.h> +#if BFS_USE_ONIGURUMA +# include <langinfo.h> +# include <oniguruma.h> #else -# include <regex.h> +# include <regex.h> #endif struct bfs_regex { -#if BFS_WITH_ONIGURUMA +#if BFS_USE_ONIGURUMA unsigned char *pattern; OnigRegex impl; int err; @@ -40,36 +32,34 @@ struct bfs_regex { #endif }; -#if BFS_WITH_ONIGURUMA -/** Get (and initialize) the appropriate encoding for the current locale. */ -static int bfs_onig_encoding(OnigEncoding *penc) { - static OnigEncoding enc = NULL; - if (enc) { - *penc = enc; - return ONIG_NORMAL; - } +#if BFS_USE_ONIGURUMA + +static int bfs_onig_status; +static OnigEncoding bfs_onig_enc; +/** pthread_once() callback. */ +static void bfs_onig_once(void) { // Fall back to ASCII by default - enc = ONIG_ENCODING_ASCII; + bfs_onig_enc = ONIG_ENCODING_ASCII; // Oniguruma has no locale support, so try to guess the right encoding // from the current locale. const char *charmap = nl_langinfo(CODESET); if (charmap) { -#define BFS_MAP_ENCODING(name, value) \ - do { \ - if (strcmp(charmap, name) == 0) { \ - enc = value; \ - } \ +#define BFS_MAP_ENCODING(name, value) \ + do { \ + if (strcmp(charmap, name) == 0) { \ + bfs_onig_enc = value; \ + } \ } while (0) -#define BFS_MAP_ENCODING2(name1, name2, value) \ - do { \ - BFS_MAP_ENCODING(name1, value); \ - BFS_MAP_ENCODING(name2, value); \ +#define BFS_MAP_ENCODING2(name1, name2, value) \ + do { \ + BFS_MAP_ENCODING(name1, value); \ + BFS_MAP_ENCODING(name2, value); \ } while (0) // These names were found with locale -m on Linux and FreeBSD -#define BFS_MAP_ISO_8859(n) \ +#define BFS_MAP_ISO_8859(n) \ BFS_MAP_ENCODING2("ISO-8859-" #n, "ISO8859-" #n, ONIG_ENCODING_ISO_8859_ ## n) BFS_MAP_ISO_8859(1); @@ -91,7 +81,7 @@ static int bfs_onig_encoding(OnigEncoding *penc) { BFS_MAP_ENCODING("UTF-8", ONIG_ENCODING_UTF8); -#define BFS_MAP_EUC(name) \ +#define BFS_MAP_EUC(name) \ BFS_MAP_ENCODING2("EUC-" #name, "euc" #name, ONIG_ENCODING_EUC_ ## name) BFS_MAP_EUC(JP); @@ -109,22 +99,29 @@ static int bfs_onig_encoding(OnigEncoding *penc) { BFS_MAP_ENCODING("GB18030", ONIG_ENCODING_BIG5); } - int ret = onig_initialize(&enc, 1); - if (ret != ONIG_NORMAL) { - enc = NULL; + bfs_onig_status = onig_initialize(&bfs_onig_enc, 1); + if (bfs_onig_status != ONIG_NORMAL) { + bfs_onig_enc = NULL; } - *penc = enc; - return ret; +} + +/** Initialize Oniguruma. */ +static int bfs_onig_initialize(OnigEncoding *enc) { + static pthread_once_t once = PTHREAD_ONCE_INIT; + invoke_once(&once, bfs_onig_once); + + *enc = bfs_onig_enc; + return bfs_onig_status; } #endif int bfs_regcomp(struct bfs_regex **preg, const char *pattern, enum bfs_regex_type type, enum bfs_regcomp_flags flags) { - struct bfs_regex *regex = *preg = malloc(sizeof(*regex)); + struct bfs_regex *regex = *preg = ALLOC(struct bfs_regex); if (!regex) { return -1; } -#if BFS_WITH_ONIGURUMA +#if BFS_USE_ONIGURUMA // onig_error_code_to_str() says // // don't call this after the pattern argument of onig_new() is freed @@ -153,7 +150,7 @@ int bfs_regcomp(struct bfs_regex **preg, const char *pattern, enum bfs_regex_typ syntax = ONIG_SYNTAX_GREP; break; } - assert(syntax); + bfs_assert(syntax, "Invalid regex type"); OnigOptionType options = syntax->options; if (flags & BFS_REGEX_ICASE) { @@ -161,7 +158,7 @@ int bfs_regcomp(struct bfs_regex **preg, const char *pattern, enum bfs_regex_typ } OnigEncoding enc; - regex->err = bfs_onig_encoding(&enc); + regex->err = bfs_onig_initialize(&enc); if (regex->err != ONIG_NORMAL) { return -1; } @@ -188,13 +185,10 @@ int bfs_regcomp(struct bfs_regex **preg, const char *pattern, enum bfs_regex_typ cflags |= REG_ICASE; } -#if __has_feature(memory_sanitizer) - // https://github.com/google/sanitizers/issues/1496 - memset(®ex->impl, 0, sizeof(regex->impl)); -#endif - regex->err = regcomp(®ex->impl, pattern, cflags); if (regex->err != 0) { + // https://github.com/google/sanitizers/issues/1496 + sanitize_init(®ex->impl); return -1; } #endif @@ -210,7 +204,7 @@ fail: int bfs_regexec(struct bfs_regex *regex, const char *str, enum bfs_regexec_flags flags) { size_t len = strlen(str); -#if BFS_WITH_ONIGURUMA +#if BFS_USE_ONIGURUMA const unsigned char *ustr = (const unsigned char *)str; const unsigned char *end = ustr + len; @@ -269,7 +263,7 @@ int bfs_regexec(struct bfs_regex *regex, const char *str, enum bfs_regexec_flags void bfs_regfree(struct bfs_regex *regex) { if (regex) { -#if BFS_WITH_ONIGURUMA +#if BFS_USE_ONIGURUMA onig_free(regex->impl); free(regex->pattern); #else @@ -281,10 +275,10 @@ void bfs_regfree(struct bfs_regex *regex) { char *bfs_regerror(const struct bfs_regex *regex) { if (!regex) { - return strdup(strerror(ENOMEM)); + return strdup(xstrerror(ENOMEM)); } -#if BFS_WITH_ONIGURUMA +#if BFS_USE_ONIGURUMA unsigned char *str = malloc(ONIG_MAX_ERROR_MESSAGE_LEN); if (str) { onig_error_code_to_str(str, regex->err, ®ex->einfo); |