summaryrefslogtreecommitdiffstats
path: root/src/xregex.c
diff options
context:
space:
mode:
authorTavian Barnes <tavianator@tavianator.com>2024-05-07 15:42:46 -0400
committerTavian Barnes <tavianator@tavianator.com>2024-05-07 15:42:46 -0400
commit452d6697e0f92326ab139eed4eadd9c2fd8b55ca (patch)
tree0feeb3722dcf6debb6c33c5175342bf1d70a1dba /src/xregex.c
parenta4299f9bc1d3e60a7e628561e8d650c2a241e1c2 (diff)
parentc5cf2cf90834f2f56b2940d2a499a1a614ebfd21 (diff)
downloadbfs-find2fd.tar.xz
Merge branch 'main' into find2fdfind2fd
Diffstat (limited to 'src/xregex.c')
-rw-r--r--src/xregex.c114
1 files changed, 54 insertions, 60 deletions
diff --git a/src/xregex.c b/src/xregex.c
index 6f0e5a1..c2711bc 100644
--- a/src/xregex.c
+++ b/src/xregex.c
@@ -1,35 +1,27 @@
-/****************************************************************************
- * bfs *
- * Copyright (C) 2022 Tavian Barnes <tavianator@tavianator.com> *
- * *
- * Permission to use, copy, modify, and/or distribute this software for any *
- * purpose with or without fee is hereby granted. *
- * *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES *
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF *
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR *
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES *
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN *
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF *
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. *
- ****************************************************************************/
+// Copyright © Tavian Barnes <tavianator@tavianator.com>
+// SPDX-License-Identifier: 0BSD
+#include "prelude.h"
#include "xregex.h"
-#include "config.h"
-#include <assert.h>
+#include "alloc.h"
+#include "bfstd.h"
+#include "diag.h"
+#include "sanity.h"
+#include "thread.h"
#include <errno.h>
+#include <pthread.h>
#include <stdlib.h>
#include <string.h>
-#if BFS_WITH_ONIGURUMA
-# include <langinfo.h>
-# include <oniguruma.h>
+#if BFS_USE_ONIGURUMA
+# include <langinfo.h>
+# include <oniguruma.h>
#else
-# include <regex.h>
+# include <regex.h>
#endif
struct bfs_regex {
-#if BFS_WITH_ONIGURUMA
+#if BFS_USE_ONIGURUMA
unsigned char *pattern;
OnigRegex impl;
int err;
@@ -40,36 +32,34 @@ struct bfs_regex {
#endif
};
-#if BFS_WITH_ONIGURUMA
-/** Get (and initialize) the appropriate encoding for the current locale. */
-static int bfs_onig_encoding(OnigEncoding *penc) {
- static OnigEncoding enc = NULL;
- if (enc) {
- *penc = enc;
- return ONIG_NORMAL;
- }
+#if BFS_USE_ONIGURUMA
+
+static int bfs_onig_status;
+static OnigEncoding bfs_onig_enc;
+/** pthread_once() callback. */
+static void bfs_onig_once(void) {
// Fall back to ASCII by default
- enc = ONIG_ENCODING_ASCII;
+ bfs_onig_enc = ONIG_ENCODING_ASCII;
// Oniguruma has no locale support, so try to guess the right encoding
// from the current locale.
const char *charmap = nl_langinfo(CODESET);
if (charmap) {
-#define BFS_MAP_ENCODING(name, value) \
- do { \
- if (strcmp(charmap, name) == 0) { \
- enc = value; \
- } \
+#define BFS_MAP_ENCODING(name, value) \
+ do { \
+ if (strcmp(charmap, name) == 0) { \
+ bfs_onig_enc = value; \
+ } \
} while (0)
-#define BFS_MAP_ENCODING2(name1, name2, value) \
- do { \
- BFS_MAP_ENCODING(name1, value); \
- BFS_MAP_ENCODING(name2, value); \
+#define BFS_MAP_ENCODING2(name1, name2, value) \
+ do { \
+ BFS_MAP_ENCODING(name1, value); \
+ BFS_MAP_ENCODING(name2, value); \
} while (0)
// These names were found with locale -m on Linux and FreeBSD
-#define BFS_MAP_ISO_8859(n) \
+#define BFS_MAP_ISO_8859(n) \
BFS_MAP_ENCODING2("ISO-8859-" #n, "ISO8859-" #n, ONIG_ENCODING_ISO_8859_ ## n)
BFS_MAP_ISO_8859(1);
@@ -91,7 +81,7 @@ static int bfs_onig_encoding(OnigEncoding *penc) {
BFS_MAP_ENCODING("UTF-8", ONIG_ENCODING_UTF8);
-#define BFS_MAP_EUC(name) \
+#define BFS_MAP_EUC(name) \
BFS_MAP_ENCODING2("EUC-" #name, "euc" #name, ONIG_ENCODING_EUC_ ## name)
BFS_MAP_EUC(JP);
@@ -109,22 +99,29 @@ static int bfs_onig_encoding(OnigEncoding *penc) {
BFS_MAP_ENCODING("GB18030", ONIG_ENCODING_BIG5);
}
- int ret = onig_initialize(&enc, 1);
- if (ret != ONIG_NORMAL) {
- enc = NULL;
+ bfs_onig_status = onig_initialize(&bfs_onig_enc, 1);
+ if (bfs_onig_status != ONIG_NORMAL) {
+ bfs_onig_enc = NULL;
}
- *penc = enc;
- return ret;
+}
+
+/** Initialize Oniguruma. */
+static int bfs_onig_initialize(OnigEncoding *enc) {
+ static pthread_once_t once = PTHREAD_ONCE_INIT;
+ invoke_once(&once, bfs_onig_once);
+
+ *enc = bfs_onig_enc;
+ return bfs_onig_status;
}
#endif
int bfs_regcomp(struct bfs_regex **preg, const char *pattern, enum bfs_regex_type type, enum bfs_regcomp_flags flags) {
- struct bfs_regex *regex = *preg = malloc(sizeof(*regex));
+ struct bfs_regex *regex = *preg = ALLOC(struct bfs_regex);
if (!regex) {
return -1;
}
-#if BFS_WITH_ONIGURUMA
+#if BFS_USE_ONIGURUMA
// onig_error_code_to_str() says
//
// don't call this after the pattern argument of onig_new() is freed
@@ -153,7 +150,7 @@ int bfs_regcomp(struct bfs_regex **preg, const char *pattern, enum bfs_regex_typ
syntax = ONIG_SYNTAX_GREP;
break;
}
- assert(syntax);
+ bfs_assert(syntax, "Invalid regex type");
OnigOptionType options = syntax->options;
if (flags & BFS_REGEX_ICASE) {
@@ -161,7 +158,7 @@ int bfs_regcomp(struct bfs_regex **preg, const char *pattern, enum bfs_regex_typ
}
OnigEncoding enc;
- regex->err = bfs_onig_encoding(&enc);
+ regex->err = bfs_onig_initialize(&enc);
if (regex->err != ONIG_NORMAL) {
return -1;
}
@@ -188,13 +185,10 @@ int bfs_regcomp(struct bfs_regex **preg, const char *pattern, enum bfs_regex_typ
cflags |= REG_ICASE;
}
-#if __has_feature(memory_sanitizer)
- // https://github.com/google/sanitizers/issues/1496
- memset(&regex->impl, 0, sizeof(regex->impl));
-#endif
-
regex->err = regcomp(&regex->impl, pattern, cflags);
if (regex->err != 0) {
+ // https://github.com/google/sanitizers/issues/1496
+ sanitize_init(&regex->impl);
return -1;
}
#endif
@@ -210,7 +204,7 @@ fail:
int bfs_regexec(struct bfs_regex *regex, const char *str, enum bfs_regexec_flags flags) {
size_t len = strlen(str);
-#if BFS_WITH_ONIGURUMA
+#if BFS_USE_ONIGURUMA
const unsigned char *ustr = (const unsigned char *)str;
const unsigned char *end = ustr + len;
@@ -269,7 +263,7 @@ int bfs_regexec(struct bfs_regex *regex, const char *str, enum bfs_regexec_flags
void bfs_regfree(struct bfs_regex *regex) {
if (regex) {
-#if BFS_WITH_ONIGURUMA
+#if BFS_USE_ONIGURUMA
onig_free(regex->impl);
free(regex->pattern);
#else
@@ -281,10 +275,10 @@ void bfs_regfree(struct bfs_regex *regex) {
char *bfs_regerror(const struct bfs_regex *regex) {
if (!regex) {
- return strdup(strerror(ENOMEM));
+ return strdup(xstrerror(ENOMEM));
}
-#if BFS_WITH_ONIGURUMA
+#if BFS_USE_ONIGURUMA
unsigned char *str = malloc(ONIG_MAX_ERROR_MESSAGE_LEN);
if (str) {
onig_error_code_to_str(str, regex->err, &regex->einfo);