diff options
author | Tavian Barnes <tavianator@tavianator.com> | 2024-05-07 15:42:46 -0400 |
---|---|---|
committer | Tavian Barnes <tavianator@tavianator.com> | 2024-05-07 15:42:46 -0400 |
commit | 452d6697e0f92326ab139eed4eadd9c2fd8b55ca (patch) | |
tree | 0feeb3722dcf6debb6c33c5175342bf1d70a1dba /src/bftw.c | |
parent | a4299f9bc1d3e60a7e628561e8d650c2a241e1c2 (diff) | |
parent | c5cf2cf90834f2f56b2940d2a499a1a614ebfd21 (diff) | |
download | bfs-find2fd.tar.xz |
Merge branch 'main' into find2fdfind2fd
Diffstat (limited to 'src/bftw.c')
-rw-r--r-- | src/bftw.c | 2453 |
1 files changed, 1643 insertions, 810 deletions
@@ -1,18 +1,5 @@ -/**************************************************************************** - * bfs * - * Copyright (C) 2015-2022 Tavian Barnes <tavianator@tavianator.com> * - * * - * Permission to use, copy, modify, and/or distribute this software for any * - * purpose with or without fee is hereby granted. * - * * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * - ****************************************************************************/ +// Copyright © Tavian Barnes <tavianator@tavianator.com> +// SPDX-License-Identifier: 0BSD /** * The bftw() implementation consists of the following components: @@ -20,31 +7,186 @@ * - struct bftw_file: A file that has been encountered during the traversal. * They have reference-counted links to their parents in the directory tree. * + * - struct bftw_list: A linked list of bftw_file's. + * + * - struct bftw_queue: A multi-stage queue of bftw_file's. + * * - struct bftw_cache: An LRU list of bftw_file's with open file descriptors, * used for openat() to minimize the amount of path re-traversals. * - * - struct bftw_queue: The queue of bftw_file's left to explore. Implemented - * as a simple circular buffer. - * * - struct bftw_state: Represents the current state of the traversal, allowing * various helper functions to take fewer parameters. */ +#include "prelude.h" #include "bftw.h" +#include "alloc.h" #include "bfstd.h" -#include "config.h" +#include "diag.h" #include "dir.h" #include "dstring.h" +#include "ioq.h" +#include "list.h" #include "mtab.h" #include "stat.h" #include "trie.h" -#include <assert.h> #include <errno.h> #include <fcntl.h> -#include <stdbool.h> #include <stdlib.h> #include <string.h> #include <sys/stat.h> +#include <sys/types.h> + +/** Initialize a bftw_stat cache. */ +static void bftw_stat_init(struct bftw_stat *bufs, struct bfs_stat *stat_buf, struct bfs_stat *lstat_buf) { + bufs->stat_buf = stat_buf; + bufs->lstat_buf = lstat_buf; + bufs->stat_err = -1; + bufs->lstat_err = -1; +} + +/** Fill a bftw_stat cache from another one. */ +static void bftw_stat_fill(struct bftw_stat *dest, const struct bftw_stat *src) { + if (dest->stat_err < 0 && src->stat_err >= 0) { + dest->stat_buf = src->stat_buf; + dest->stat_err = src->stat_err; + } + + if (dest->lstat_err < 0 && src->lstat_err >= 0) { + dest->lstat_buf = src->lstat_buf; + dest->lstat_err = src->lstat_err; + } +} + +/** Cache a bfs_stat() result. */ +static void bftw_stat_cache(struct bftw_stat *bufs, enum bfs_stat_flags flags, const struct bfs_stat *buf, int err) { + if (flags & BFS_STAT_NOFOLLOW) { + bufs->lstat_buf = buf; + bufs->lstat_err = err; + if (err || !S_ISLNK(buf->mode)) { + // Non-link, so share stat info + bufs->stat_buf = buf; + bufs->stat_err = err; + } + } else if (flags & BFS_STAT_TRYFOLLOW) { + if (err) { + bufs->stat_err = err; + } else if (S_ISLNK(buf->mode)) { + bufs->lstat_buf = buf; + bufs->lstat_err = err; + bufs->stat_err = ENOENT; + } else { + bufs->stat_buf = buf; + bufs->stat_err = err; + } + } else { + bufs->stat_buf = buf; + bufs->stat_err = err; + } +} + +/** Caching bfs_stat(). */ +static const struct bfs_stat *bftw_stat_impl(struct BFTW *ftwbuf, enum bfs_stat_flags flags) { + struct bftw_stat *bufs = &ftwbuf->stat_bufs; + struct bfs_stat *buf; + + if (flags & BFS_STAT_NOFOLLOW) { + buf = (struct bfs_stat *)bufs->lstat_buf; + if (bufs->lstat_err == 0) { + return buf; + } else if (bufs->lstat_err > 0) { + errno = bufs->lstat_err; + return NULL; + } + } else { + buf = (struct bfs_stat *)bufs->stat_buf; + if (bufs->stat_err == 0) { + return buf; + } else if (bufs->stat_err > 0) { + errno = bufs->stat_err; + return NULL; + } + } + + struct bfs_stat *ret; + int err; + if (bfs_stat(ftwbuf->at_fd, ftwbuf->at_path, flags, buf) == 0) { + ret = buf; + err = 0; +#ifdef S_IFWHT + } else if (errno == ENOENT && ftwbuf->type == BFS_WHT) { + // This matches the behavior of FTS_WHITEOUT on BSD + ret = memset(buf, 0, sizeof(*buf)); + ret->mode = S_IFWHT; + err = 0; +#endif + } else { + ret = NULL; + err = errno; + } + + bftw_stat_cache(bufs, flags, ret, err); + return ret; +} + +const struct bfs_stat *bftw_stat(const struct BFTW *ftwbuf, enum bfs_stat_flags flags) { + struct BFTW *mutbuf = (struct BFTW *)ftwbuf; + const struct bfs_stat *ret; + + if (flags & BFS_STAT_TRYFOLLOW) { + ret = bftw_stat_impl(mutbuf, BFS_STAT_FOLLOW); + if (!ret && errno_is_like(ENOENT)) { + ret = bftw_stat_impl(mutbuf, BFS_STAT_NOFOLLOW); + } + } else { + ret = bftw_stat_impl(mutbuf, flags); + } + + return ret; +} + +const struct bfs_stat *bftw_cached_stat(const struct BFTW *ftwbuf, enum bfs_stat_flags flags) { + const struct bftw_stat *bufs = &ftwbuf->stat_bufs; + + if (flags & BFS_STAT_NOFOLLOW) { + if (bufs->lstat_err == 0) { + return bufs->lstat_buf; + } + } else if (bufs->stat_err == 0) { + return bufs->stat_buf; + } else if ((flags & BFS_STAT_TRYFOLLOW) && error_is_like(bufs->stat_err, ENOENT)) { + if (bufs->lstat_err == 0) { + return bufs->lstat_buf; + } + } + + return NULL; +} + +enum bfs_type bftw_type(const struct BFTW *ftwbuf, enum bfs_stat_flags flags) { + if (flags & BFS_STAT_NOFOLLOW) { + if (ftwbuf->type == BFS_LNK || (ftwbuf->stat_flags & BFS_STAT_NOFOLLOW)) { + return ftwbuf->type; + } + } else if (flags & BFS_STAT_TRYFOLLOW) { + if (ftwbuf->type != BFS_LNK || (ftwbuf->stat_flags & BFS_STAT_TRYFOLLOW)) { + return ftwbuf->type; + } + } else { + if (ftwbuf->type != BFS_LNK) { + return ftwbuf->type; + } else if (ftwbuf->stat_flags & BFS_STAT_TRYFOLLOW) { + return BFS_ERROR; + } + } + + const struct bfs_stat *statbuf = bftw_stat(ftwbuf, flags); + if (statbuf) { + return bfs_mode_to_type(statbuf->mode); + } else { + return BFS_ERROR; + } +} /** * A file. @@ -54,21 +196,45 @@ struct bftw_file { struct bftw_file *parent; /** The root under which this file was found. */ struct bftw_file *root; - /** The next file in the queue, if any. */ + + /** + * List node for: + * + * bftw_queue::buffer + * bftw_queue::waiting + * bftw_file_open()::parents + */ struct bftw_file *next; - /** The previous file in the LRU list. */ - struct bftw_file *lru_prev; - /** The next file in the LRU list. */ - struct bftw_file *lru_next; + /** + * List node for: + * + * bftw_queue::ready + * bftw_state::to_close + */ + struct { struct bftw_file *next; } ready; + + /** + * List node for bftw_cache. + */ + struct { + struct bftw_file *prev; + struct bftw_file *next; + } lru; /** This file's depth in the walk. */ size_t depth; - /** Reference count. */ + /** Reference count (for ->parent). */ size_t refcount; + /** Pin count (for ->fd). */ + size_t pincount; /** An open descriptor to this file, or -1. */ int fd; + /** Whether this file has a pending ioq request. */ + bool ioqueued; + /** An open directory for this file, if any. */ + struct bfs_dir *dir; /** This file's type, if known. */ enum bfs_type type; @@ -77,6 +243,9 @@ struct bftw_file { /** The inode number, for cycle detection. */ ino_t ino; + /** Cached bfs_stat() info. */ + struct bftw_stat stat_bufs; + /** The offset of this file in the full path. */ size_t nameoff; /** The length of the file's name. */ @@ -86,145 +255,445 @@ struct bftw_file { }; /** + * A linked list of bftw_file's. + */ +struct bftw_list { + struct bftw_file *head; + struct bftw_file **tail; +}; + +/** + * bftw_queue flags. + */ +enum bftw_qflags { + /** Track the sync/async service balance. */ + BFTW_QBALANCE = 1 << 0, + /** Buffer files before adding them to the queue. */ + BFTW_QBUFFER = 1 << 1, + /** Use LIFO (stack/DFS) ordering. */ + BFTW_QLIFO = 1 << 2, + /** Maintain a strict order. */ + BFTW_QORDER = 1 << 3, +}; + +/** + * A queue of bftw_file's that may be serviced asynchronously. + * + * A bftw_queue comprises three linked lists each tracking different stages. + * When BFTW_QBUFFER is set, files are initially pushed to the buffer: + * + * ╔═══╗ ╔═══╦═══╗ + * buffer: ║ 𝘩 ║ ║ 𝘩 ║ 𝘪 ║ + * ╠═══╬═══╦═══╗ ╠═══╬═══╬═══╗ + * waiting: ║ e ║ f ║ g ║ → ║ e ║ f ║ g ║ + * ╠═══╬═══╬═══╬═══╗ ╠═══╬═══╬═══╬═══╗ + * ready: ║ 𝕒 ║ 𝕓 ║ 𝕔 ║ 𝕕 ║ ║ 𝕒 ║ 𝕓 ║ 𝕔 ║ 𝕕 ║ + * ╚═══╩═══╩═══╩═══╝ ╚═══╩═══╩═══╩═══╝ + * + * When bftw_queue_flush() is called, the files in the buffer are appended to + * the waiting list (or prepended, if BFTW_QLIFO is set): + * + * ╔═╗ + * buffer: ║ ║ + * ╠═╩═╦═══╦═══╦═══╦═══╗ + * waiting: ║ e ║ f ║ g ║ h ║ i ║ + * ╠═══╬═══╬═══╬═══╬═══╝ + * ready: ║ 𝕒 ║ 𝕓 ║ 𝕔 ║ 𝕕 ║ + * ╚═══╩═══╩═══╩═══╝ + * + * Using the buffer gives a more natural ordering for BFTW_QLIFO, and allows + * files to be sorted before adding them to the waiting list. If BFTW_QBUFFER + * is not set, files are pushed directly to the waiting list instead. + * + * Files on the waiting list are waiting to be "serviced" asynchronously by the + * ioq (for example, by an ioq_opendir() or ioq_stat() call). While they are + * being serviced, they are detached from the queue by bftw_queue_detach() and + * are not tracked by the queue at all: + * + * ╔═╗ + * buffer: ║ ║ + * ╠═╩═╦═══╦═══╗ ⎛ ┌───┬───┐ ⎞ + * waiting: ║ g ║ h ║ i ║ ⎜ ioq: │ 𝓮 │ 𝓯 │ ⎟ + * ╠═══╬═══╬═══╬═══╗ ⎝ └───┴───┘ ⎠ + * ready: ║ 𝕒 ║ 𝕓 ║ 𝕔 ║ 𝕕 ║ + * ╚═══╩═══╩═══╩═══╝ + * + * When their async service is complete, files are reattached to the queue by + * bftw_queue_attach(), this time on the ready list: + * + * ╔═╗ + * buffer: ║ ║ + * ╠═╩═╦═══╦═══╗ ⎛ ┌───┐ ⎞ + * waiting: ║ g ║ h ║ i ║ ⎜ ioq: │ 𝓮 │ ⎟ + * ╠═══╬═══╬═══╬═══╦═══╗ ⎝ └───┘ ⎠ + * ready: ║ 𝕒 ║ 𝕓 ║ 𝕔 ║ 𝕕 ║ 𝕗 ║ + * ╚═══╩═══╩═══╩═══╩═══╝ + * + * Files are added to the ready list in the order they are finished by the ioq. + * bftw_queue_pop() pops a file from the ready list if possible. Otherwise, it + * pops from the waiting list, and the file must be serviced synchronously. + * + * However, if BFTW_QORDER is set, files must be popped in the exact order they + * are added to the waiting list (to maintain sorted order). In this case, + * files are added to the waiting and ready lists at the same time. The + * file->ioqueued flag is set while it is in-service, so that bftw() can wait + * for it to be truly ready before using it. + * + * ╔═╗ + * buffer: ║ ║ + * ╠═╩═╦═══╦═══╗ ⎛ ┌───┐ ⎞ + * waiting: ║ g ║ h ║ i ║ ⎜ ioq: │ 𝓮 │ ⎟ + * ╠═══╬═══╬═══╬═══╦═══╦═══╦═══╦═══╦═══╗ ⎝ └───┘ ⎠ + * ready: ║ 𝕒 ║ 𝕓 ║ 𝕔 ║ 𝕕 ║ 𝓮 ║ 𝕗 ║ g ║ h ║ i ║ + * ╚═══╩═══╩═══╩═══╩═══╩═══╩═══╩═══╩═══╝ + * + * If BFTW_QBALANCE is set, queue->imbalance tracks the delta between async + * service (negative) and synchronous service (positive). The queue is + * considered "balanced" when this number is non-negative. Only a balanced + * queue will perform any async service, ensuring work is fairly distributed + * between the main thread and the ioq. + * + * BFTW_QBALANCE is only set for single-threaded ioqs. When an ioq has multiple + * threads, it is faster to wait for the ioq to complete an operation than it is + * to perform it on the main thread. + */ +struct bftw_queue { + /** Queue flags. */ + enum bftw_qflags flags; + /** A buffer of files to be enqueued together. */ + struct bftw_list buffer; + /** A list of files which are waiting to be serviced. */ + struct bftw_list waiting; + /** A list of already-serviced files. */ + struct bftw_list ready; + /** The current size of the queue. */ + size_t size; + /** The number of files currently in-service. */ + size_t ioqueued; + /** Tracks the imbalance between synchronous and async service. */ + unsigned long imbalance; +}; + +/** Initialize a queue. */ +static void bftw_queue_init(struct bftw_queue *queue, enum bftw_qflags flags) { + queue->flags = flags; + SLIST_INIT(&queue->buffer); + SLIST_INIT(&queue->waiting); + SLIST_INIT(&queue->ready); + queue->size = 0; + queue->ioqueued = 0; + queue->imbalance = 0; +} + +/** Add a file to the queue. */ +static void bftw_queue_push(struct bftw_queue *queue, struct bftw_file *file) { + if (queue->flags & BFTW_QBUFFER) { + SLIST_APPEND(&queue->buffer, file); + } else if (queue->flags & BFTW_QLIFO) { + SLIST_PREPEND(&queue->waiting, file); + if (queue->flags & BFTW_QORDER) { + SLIST_PREPEND(&queue->ready, file, ready); + } + } else { + SLIST_APPEND(&queue->waiting, file); + if (queue->flags & BFTW_QORDER) { + SLIST_APPEND(&queue->ready, file, ready); + } + } + + ++queue->size; +} + +/** Add any buffered files to the queue. */ +static void bftw_queue_flush(struct bftw_queue *queue) { + if (!(queue->flags & BFTW_QBUFFER)) { + return; + } + + if (queue->flags & BFTW_QORDER) { + // When sorting, add files to the ready list at the same time + // (and in the same order) as they are added to the waiting list + struct bftw_file **cursor = (queue->flags & BFTW_QLIFO) + ? &queue->ready.head + : queue->ready.tail; + for_slist (struct bftw_file, file, &queue->buffer) { + cursor = SLIST_INSERT(&queue->ready, cursor, file, ready); + } + } + + if (queue->flags & BFTW_QLIFO) { + SLIST_EXTEND(&queue->buffer, &queue->waiting); + } + + SLIST_EXTEND(&queue->waiting, &queue->buffer); +} + +/** Check if the queue is properly balanced for async work. */ +static bool bftw_queue_balanced(const struct bftw_queue *queue) { + if (queue->flags & BFTW_QBALANCE) { + return (long)queue->imbalance >= 0; + } else { + return true; + } +} + +/** Update the queue balance for (a)sync service. */ +static void bftw_queue_rebalance(struct bftw_queue *queue, bool async) { + if (async) { + --queue->imbalance; + } else { + ++queue->imbalance; + } +} + +/** Detatch the next waiting file. */ +static void bftw_queue_detach(struct bftw_queue *queue, struct bftw_file *file, bool async) { + bfs_assert(!file->ioqueued); + + if (file == SLIST_HEAD(&queue->buffer)) { + // To maintain order, we can't detach any files until they're + // added to the waiting/ready lists + bfs_assert(!(queue->flags & BFTW_QORDER)); + SLIST_POP(&queue->buffer); + } else if (file == SLIST_HEAD(&queue->waiting)) { + SLIST_POP(&queue->waiting); + } else { + bfs_bug("Detached file was not buffered or waiting"); + } + + if (async) { + file->ioqueued = true; + ++queue->ioqueued; + bftw_queue_rebalance(queue, true); + } +} + +/** Reattach a serviced file to the queue. */ +static void bftw_queue_attach(struct bftw_queue *queue, struct bftw_file *file, bool async) { + if (async) { + bfs_assert(file->ioqueued); + file->ioqueued = false; + --queue->ioqueued; + } else { + bfs_assert(!file->ioqueued); + } + + if (!(queue->flags & BFTW_QORDER)) { + SLIST_APPEND(&queue->ready, file, ready); + } +} + +/** Make a file ready immediately. */ +static void bftw_queue_skip(struct bftw_queue *queue, struct bftw_file *file) { + bftw_queue_detach(queue, file, false); + bftw_queue_attach(queue, file, false); +} + +/** Get the next waiting file. */ +static struct bftw_file *bftw_queue_waiting(const struct bftw_queue *queue) { + if (!(queue->flags & BFTW_QBUFFER)) { + return SLIST_HEAD(&queue->waiting); + } + + if (queue->flags & BFTW_QORDER) { + // Don't detach files until they're on the waiting/ready lists + return SLIST_HEAD(&queue->waiting); + } + + const struct bftw_list *prefix = &queue->waiting; + const struct bftw_list *suffix = &queue->buffer; + if (queue->flags & BFTW_QLIFO) { + prefix = &queue->buffer; + suffix = &queue->waiting; + } + + struct bftw_file *file = SLIST_HEAD(prefix); + if (!file) { + file = SLIST_HEAD(suffix); + } + return file; +} + +/** Get the next ready file. */ +static struct bftw_file *bftw_queue_ready(const struct bftw_queue *queue) { + return SLIST_HEAD(&queue->ready); +} + +/** Pop a file from the queue. */ +static struct bftw_file *bftw_queue_pop(struct bftw_queue *queue) { + // Don't pop until we've had a chance to sort the buffer + bfs_assert(SLIST_EMPTY(&queue->buffer)); + + struct bftw_file *file = SLIST_POP(&queue->ready, ready); + + if (!file || file == SLIST_HEAD(&queue->waiting)) { + // If no files are ready, try the waiting list. Or, if + // BFTW_QORDER is set, we may need to pop from both lists. + file = SLIST_POP(&queue->waiting); + } + + if (file) { + --queue->size; + } + + return file; +} + +/** * A cache of open directories. */ struct bftw_cache { /** The head of the LRU list. */ struct bftw_file *head; - /** The insertion target for the LRU list. */ - struct bftw_file *target; /** The tail of the LRU list. */ struct bftw_file *tail; + /** The insertion target for the LRU list. */ + struct bftw_file *target; /** The remaining capacity of the LRU list. */ size_t capacity; + + /** bftw_file arena. */ + struct varena files; + + /** bfs_dir arena. */ + struct arena dirs; + /** Remaining bfs_dir capacity. */ + int dir_limit; + + /** bfs_stat arena. */ + struct arena stat_bufs; }; /** Initialize a cache. */ static void bftw_cache_init(struct bftw_cache *cache, size_t capacity) { - cache->head = NULL; + LIST_INIT(cache); cache->target = NULL; - cache->tail = NULL; cache->capacity = capacity; -} -/** Destroy a cache. */ -static void bftw_cache_destroy(struct bftw_cache *cache) { - assert(!cache->tail); - assert(!cache->target); - assert(!cache->head); -} + VARENA_INIT(&cache->files, struct bftw_file, name); -/** Add a bftw_file to the cache. */ -static void bftw_cache_add(struct bftw_cache *cache, struct bftw_file *file) { - assert(cache->capacity > 0); - assert(file->fd >= 0); - assert(!file->lru_prev); - assert(!file->lru_next); - - if (cache->target) { - file->lru_prev = cache->target; - file->lru_next = cache->target->lru_next; - } else { - file->lru_next = cache->head; - } + bfs_dir_arena(&cache->dirs); - if (file->lru_prev) { - file->lru_prev->lru_next = file; + if (cache->capacity > 1024) { + cache->dir_limit = 1024; } else { - cache->head = file; + cache->dir_limit = capacity - 1; } - if (file->lru_next) { - file->lru_next->lru_prev = file; - } else { - cache->tail = file; + ARENA_INIT(&cache->stat_bufs, struct bfs_stat); +} + +/** Allocate a directory. */ +static struct bfs_dir *bftw_allocdir(struct bftw_cache *cache, bool force) { + if (!force && cache->dir_limit <= 0) { + errno = ENOMEM; + return NULL; } - // Prefer to keep the root paths open by keeping them at the head of the list - if (file->depth == 0) { - cache->target = file; + struct bfs_dir *dir = arena_alloc(&cache->dirs); + if (dir) { + --cache->dir_limit; } + return dir; +} - --cache->capacity; +/** Free a directory. */ +static void bftw_freedir(struct bftw_cache *cache, struct bfs_dir *dir) { + ++cache->dir_limit; + arena_free(&cache->dirs, dir); } -/** Remove a bftw_file from the cache. */ -static void bftw_cache_remove(struct bftw_cache *cache, struct bftw_file *file) { +/** Remove a bftw_file from the LRU list. */ +static void bftw_lru_remove(struct bftw_cache *cache, struct bftw_file *file) { if (cache->target == file) { - cache->target = file->lru_prev; - } - - if (file->lru_prev) { - assert(cache->head != file); - file->lru_prev->lru_next = file->lru_next; - } else { - assert(cache->head == file); - cache->head = file->lru_next; - } - - if (file->lru_next) { - assert(cache->tail != file); - file->lru_next->lru_prev = file->lru_prev; - } else { - assert(cache->tail == file); - cache->tail = file->lru_prev; + cache->target = file->lru.prev; } - file->lru_prev = NULL; - file->lru_next = NULL; - ++cache->capacity; + LIST_REMOVE(cache, file, lru); } -/** Mark a cache entry as recently used. */ -static void bftw_cache_use(struct bftw_cache *cache, struct bftw_file *file) { - bftw_cache_remove(cache, file); - bftw_cache_add(cache, file); +/** Remove a bftw_file from the cache. */ +static void bftw_cache_remove(struct bftw_cache *cache, struct bftw_file *file) { + bftw_lru_remove(cache, file); + ++cache->capacity; } /** Close a bftw_file. */ static void bftw_file_close(struct bftw_cache *cache, struct bftw_file *file) { - assert(file->fd >= 0); + bfs_assert(file->fd >= 0); + bfs_assert(file->pincount == 0); - bftw_cache_remove(cache, file); + if (file->dir) { + bfs_closedir(file->dir); + bftw_freedir(cache, file->dir); + file->dir = NULL; + } else { + xclose(file->fd); + } - xclose(file->fd); file->fd = -1; + bftw_cache_remove(cache, file); } -/** Pop a directory from the cache. */ -static void bftw_cache_pop(struct bftw_cache *cache) { - assert(cache->tail); - bftw_file_close(cache, cache->tail); -} - -/** - * Shrink the cache, to recover from EMFILE. - * - * @param cache - * The cache in question. - * @param saved - * A bftw_file that must be preserved. - * @return - * 0 if successfully shrunk, otherwise -1. - */ -static int bftw_cache_shrink(struct bftw_cache *cache, const struct bftw_file *saved) { +/** Pop the least recently used directory from the cache. */ +static int bftw_cache_pop(struct bftw_cache *cache) { struct bftw_file *file = cache->tail; if (!file) { return -1; } - if (file == saved) { - file = file->lru_prev; - if (!file) { - return -1; - } + bftw_file_close(cache, file); + return 0; +} + +/** Add a bftw_file to the LRU list. */ +static void bftw_lru_add(struct bftw_cache *cache, struct bftw_file *file) { + bfs_assert(file->fd >= 0); + + LIST_INSERT(cache, cache->target, file, lru); + + // Prefer to keep the root paths open by keeping them at the head of the list + if (file->depth == 0) { + cache->target = file; } +} - bftw_file_close(cache, file); - cache->capacity = 0; +/** Add a bftw_file to the cache. */ +static int bftw_cache_add(struct bftw_cache *cache, struct bftw_file *file) { + bfs_assert(file->fd >= 0); + + if (cache->capacity == 0 && bftw_cache_pop(cache) != 0) { + bftw_file_close(cache, file); + errno = EMFILE; + return -1; + } + + bfs_assert(cache->capacity > 0); + --cache->capacity; + + bftw_lru_add(cache, file); return 0; } +/** Pin a cache entry so it won't be closed. */ +static void bftw_cache_pin(struct bftw_cache *cache, struct bftw_file *file) { + bfs_assert(file->fd >= 0); + + if (file->pincount++ == 0) { + bftw_lru_remove(cache, file); + } +} + +/** Unpin a cache entry. */ +static void bftw_cache_unpin(struct bftw_cache *cache, struct bftw_file *file) { + bfs_assert(file->fd >= 0); + bfs_assert(file->pincount > 0); + + if (--file->pincount == 0) { + bftw_lru_add(cache, file); + } +} + /** Compute the name offset of a child path. */ static size_t bftw_child_nameoff(const struct bftw_file *parent) { size_t ret = parent->nameoff + parent->namelen; @@ -234,12 +703,20 @@ static size_t bftw_child_nameoff(const struct bftw_file *parent) { return ret; } +/** Destroy a cache. */ +static void bftw_cache_destroy(struct bftw_cache *cache) { + bfs_assert(LIST_EMPTY(cache)); + bfs_assert(!cache->target); + + arena_destroy(&cache->stat_bufs); + arena_destroy(&cache->dirs); + varena_destroy(&cache->files); +} + /** Create a new bftw_file. */ -static struct bftw_file *bftw_file_new(struct bftw_file *parent, const char *name) { +static struct bftw_file *bftw_file_new(struct bftw_cache *cache, struct bftw_file *parent, const char *name) { size_t namelen = strlen(name); - size_t size = BFS_FLEX_SIZEOF(struct bftw_file, name, namelen + 1); - - struct bftw_file *file = malloc(size); + struct bftw_file *file = varena_alloc(&cache->files, namelen + 1); if (!file) { return NULL; } @@ -257,63 +734,403 @@ static struct bftw_file *bftw_file_new(struct bftw_file *parent, const char *nam file->nameoff = 0; } - file->next = NULL; - - file->lru_prev = NULL; - file->lru_next = NULL; + SLIST_ITEM_INIT(file); + SLIST_ITEM_INIT(file, ready); + LIST_ITEM_INIT(file, lru); file->refcount = 1; + file->pincount = 0; file->fd = -1; + file->ioqueued = false; + file->dir = NULL; file->type = BFS_UNKNOWN; file->dev = -1; file->ino = -1; + bftw_stat_init(&file->stat_bufs, NULL, NULL); + file->namelen = namelen; memcpy(file->name, name, namelen + 1); return file; } +/** Associate an open directory with a bftw_file. */ +static void bftw_file_set_dir(struct bftw_cache *cache, struct bftw_file *file, struct bfs_dir *dir) { + bfs_assert(!file->dir); + file->dir = dir; + + if (file->fd >= 0) { + bfs_assert(file->fd == bfs_dirfd(dir)); + } else { + file->fd = bfs_dirfd(dir); + bftw_cache_add(cache, file); + } +} + +/** Free a file's cached stat() buffers. */ +static void bftw_stat_recycle(struct bftw_cache *cache, struct bftw_file *file) { + struct bftw_stat *bufs = &file->stat_bufs; + + struct bfs_stat *stat_buf = (struct bfs_stat *)bufs->stat_buf; + struct bfs_stat *lstat_buf = (struct bfs_stat *)bufs->lstat_buf; + if (stat_buf) { + arena_free(&cache->stat_bufs, stat_buf); + } else if (lstat_buf) { + arena_free(&cache->stat_bufs, lstat_buf); + } + + bftw_stat_init(bufs, NULL, NULL); +} + +/** Free a bftw_file. */ +static void bftw_file_free(struct bftw_cache *cache, struct bftw_file *file) { + bfs_assert(file->refcount == 0); + + if (file->fd >= 0) { + bftw_file_close(cache, file); + } + + bftw_stat_recycle(cache, file); + + varena_free(&cache->files, file, file->namelen + 1); +} + /** - * Open a bftw_file relative to another one. - * - * @param cache - * The cache to hold the file. - * @param file - * The file to open. - * @param base - * The base directory for the relative path (may be NULL). - * @param at_fd - * The base file descriptor, AT_FDCWD if base == NULL. - * @param at_path - * The relative path to the file. - * @return - * The opened file descriptor, or negative on error. + * Holds the current state of the bftw() traversal. */ -static int bftw_file_openat(struct bftw_cache *cache, struct bftw_file *file, struct bftw_file *base, const char *at_path) { - assert(file->fd < 0); +struct bftw_state { + /** The path(s) to start from. */ + const char **paths; + /** The number of starting paths. */ + size_t npaths; + /** bftw() callback. */ + bftw_callback *callback; + /** bftw() callback data. */ + void *ptr; + /** bftw() flags. */ + enum bftw_flags flags; + /** Search strategy. */ + enum bftw_strategy strategy; + /** The mount table. */ + const struct bfs_mtab *mtab; + /** bfs_opendir() flags. */ + enum bfs_dir_flags dir_flags; + + /** The appropriate errno value, if any. */ + int error; + + /** The cache of open directories. */ + struct bftw_cache cache; + + /** The async I/O queue. */ + struct ioq *ioq; + /** The number of I/O threads. */ + size_t nthreads; + + /** The queue of unpinned directories to unwrap. */ + struct bftw_list to_close; + /** The queue of files to visit. */ + struct bftw_queue fileq; + /** The queue of directories to open/read. */ + struct bftw_queue dirq; + + /** The current path. */ + dchar *path; + /** The current file. */ + struct bftw_file *file; + /** The previous file. */ + struct bftw_file *previous; + + /** The currently open directory. */ + struct bfs_dir *dir; + /** The current directory entry. */ + struct bfs_dirent *de; + /** Storage for the directory entry. */ + struct bfs_dirent de_storage; + /** Any error encountered while reading the directory. */ + int direrror; + + /** Extra data about the current file. */ + struct BFTW ftwbuf; + /** stat() buffer storage. */ + struct bfs_stat stat_buf; + /** lstat() buffer storage. */ + struct bfs_stat lstat_buf; +}; + +/** Check if we have to buffer files before visiting them. */ +static bool bftw_must_buffer(const struct bftw_state *state) { + if (state->flags & BFTW_SORT) { + // Have to buffer the files to sort them + return true; + } + + if (state->strategy == BFTW_DFS && state->nthreads == 0) { + // Without buffering, we would get a not-quite-depth-first + // ordering: + // + // a + // b + // a/c + // a/c/d + // b/e + // b/e/f + // + // This is okay for iterative deepening, since the caller only + // sees files at the target depth. We also deem it okay for + // parallel searches, since the order is unpredictable anyway. + return true; + } + + if ((state->flags & BFTW_STAT) && state->nthreads > 1) { + // We will be buffering every file anyway for ioq_stat() + return true; + } + + return false; +} + +/** Initialize the bftw() state. */ +static int bftw_state_init(struct bftw_state *state, const struct bftw_args *args) { + state->paths = args->paths; + state->npaths = args->npaths; + state->callback = args->callback; + state->ptr = args->ptr; + state->flags = args->flags; + state->strategy = args->strategy; + state->mtab = args->mtab; + state->dir_flags = 0; + state->error = 0; + + if (args->nopenfd < 2) { + errno = EMFILE; + return -1; + } + + size_t nopenfd = args->nopenfd; + size_t qdepth = 4096; + size_t nthreads = args->nthreads; + +#if BFS_USE_LIBURING + // io_uring uses one fd per ring, ioq uses one ring per thread + if (nthreads >= nopenfd - 1) { + nthreads = nopenfd - 2; + } + nopenfd -= nthreads; +#endif + + bftw_cache_init(&state->cache, nopenfd); + + if (nthreads > 0) { + state->ioq = ioq_create(qdepth, nthreads); + if (!state->ioq) { + return -1; + } + } else { + state->ioq = NULL; + } + state->nthreads = nthreads; + + if (bftw_must_buffer(state)) { + state->flags |= BFTW_BUFFER; + } + + if (state->flags & BFTW_WHITEOUTS) { + state->dir_flags |= BFS_DIR_WHITEOUTS; + } + + SLIST_INIT(&state->to_close); + + enum bftw_qflags qflags = 0; + if (state->strategy != BFTW_BFS) { + qflags |= BFTW_QBUFFER | BFTW_QLIFO; + } + if (state->flags & BFTW_BUFFER) { + qflags |= BFTW_QBUFFER; + } + if (state->flags & BFTW_SORT) { + qflags |= BFTW_QORDER; + } else if (nthreads == 1) { + qflags |= BFTW_QBALANCE; + } + bftw_queue_init(&state->fileq, qflags); + + if (state->strategy == BFTW_BFS || (state->flags & BFTW_BUFFER)) { + // In breadth-first mode, or if we're already buffering files, + // directories can be queued in FIFO order + qflags &= ~(BFTW_QBUFFER | BFTW_QLIFO); + } + bftw_queue_init(&state->dirq, qflags); + + state->path = NULL; + state->file = NULL; + state->previous = NULL; + + state->dir = NULL; + state->de = NULL; + state->direrror = 0; + + return 0; +} + +/** Queue a directory for unwrapping. */ +static void bftw_delayed_unwrap(struct bftw_state *state, struct bftw_file *file) { + bfs_assert(file->dir); + + if (!SLIST_ATTACHED(&state->to_close, file, ready)) { + SLIST_APPEND(&state->to_close, file, ready); + } +} + +/** Unpin a file's parent. */ +static void bftw_unpin_parent(struct bftw_state *state, struct bftw_file *file, bool unwrap) { + struct bftw_file *parent = file->parent; + if (!parent) { + return; + } + + bftw_cache_unpin(&state->cache, parent); + + if (unwrap && parent->dir && parent->pincount == 0) { + bftw_delayed_unwrap(state, parent); + } +} + +/** Pop a response from the I/O queue. */ +static int bftw_ioq_pop(struct bftw_state *state, bool block) { + struct bftw_cache *cache = &state->cache; + struct ioq *ioq = state->ioq; + if (!ioq) { + return -1; + } + + struct ioq_ent *ent = ioq_pop(ioq, block); + if (!ent) { + return -1; + } + + struct bftw_file *file = ent->ptr; + if (file) { + bftw_unpin_parent(state, file, true); + } + + enum ioq_op op = ent->op; + switch (op) { + case IOQ_CLOSE: + ++cache->capacity; + break; + + case IOQ_CLOSEDIR: + ++cache->capacity; + bftw_freedir(cache, ent->closedir.dir); + break; + + case IOQ_OPENDIR: + ++cache->capacity; + + if (ent->result >= 0) { + bftw_file_set_dir(cache, file, ent->opendir.dir); + } else { + bftw_freedir(cache, ent->opendir.dir); + } + + bftw_queue_attach(&state->dirq, file, true); + break; + + case IOQ_STAT: + if (ent->result >= 0) { + bftw_stat_cache(&file->stat_bufs, ent->stat.flags, ent->stat.buf, 0); + } else { + arena_free(&cache->stat_bufs, ent->stat.buf); + bftw_stat_cache(&file->stat_bufs, ent->stat.flags, NULL, -ent->result); + } + + bftw_queue_attach(&state->fileq, file, true); + break; + } + + ioq_free(ioq, ent); + return op; +} + +/** Try to reserve space in the I/O queue. */ +static int bftw_ioq_reserve(struct bftw_state *state) { + struct ioq *ioq = state->ioq; + if (!ioq) { + return -1; + } + + if (ioq_capacity(ioq) > 0) { + return 0; + } + + // With more than one background thread, it's faster to wait on + // background I/O than it is to do it on the main thread + bool block = state->nthreads > 1; + if (bftw_ioq_pop(state, block) < 0) { + return -1; + } + + return 0; +} + +/** Try to reserve space in the cache. */ +static int bftw_cache_reserve(struct bftw_state *state) { + struct bftw_cache *cache = &state->cache; + if (cache->capacity > 0) { + return 0; + } + + while (bftw_ioq_pop(state, true) >= 0) { + if (cache->capacity > 0) { + return 0; + } + } + + if (bftw_cache_pop(cache) != 0) { + errno = EMFILE; + return -1; + } + + bfs_assert(cache->capacity > 0); + return 0; +} + +/** Open a bftw_file relative to another one. */ +static int bftw_file_openat(struct bftw_state *state, struct bftw_file *file, struct bftw_file *base, const char *at_path) { + bfs_assert(file->fd < 0); + + struct bftw_cache *cache = &state->cache; int at_fd = AT_FDCWD; if (base) { - bftw_cache_use(cache, base); + bftw_cache_pin(cache, base); at_fd = base->fd; } + int fd = -1; + if (bftw_cache_reserve(state) != 0) { + goto unpin; + } + int flags = O_RDONLY | O_CLOEXEC | O_DIRECTORY; - int fd = openat(at_fd, at_path, flags); + fd = openat(at_fd, at_path, flags); if (fd < 0 && errno == EMFILE) { - if (bftw_cache_shrink(cache, base) == 0) { + if (bftw_cache_pop(cache) == 0) { fd = openat(at_fd, at_path, flags); } + cache->capacity = 1; } - if (fd >= 0) { - if (cache->capacity == 0) { - bftw_cache_pop(cache); - } +unpin: + if (base) { + bftw_cache_unpin(cache, base); + } + if (fd >= 0) { file->fd = fd; bftw_cache_add(cache, file); } @@ -321,19 +1138,8 @@ static int bftw_file_openat(struct bftw_cache *cache, struct bftw_file *file, st return fd; } -/** - * Open a bftw_file. - * - * @param cache - * The cache to hold the file. - * @param file - * The file to open. - * @param path - * The full path to the file. - * @return - * The opened file descriptor, or negative on error. - */ -static int bftw_file_open(struct bftw_cache *cache, struct bftw_file *file, const char *path) { +/** Open a bftw_file. */ +static int bftw_file_open(struct bftw_state *state, struct bftw_file *file, const char *path) { // Find the nearest open ancestor struct bftw_file *base = file; do { @@ -345,335 +1151,424 @@ static int bftw_file_open(struct bftw_cache *cache, struct bftw_file *file, cons at_path += bftw_child_nameoff(base); } - int fd = bftw_file_openat(cache, file, base, at_path); - if (fd >= 0 || errno != ENAMETOOLONG) { + int fd = bftw_file_openat(state, file, base, at_path); + if (fd >= 0 || !errno_is_like(ENAMETOOLONG)) { return fd; } // Handle ENAMETOOLONG by manually traversing the path component-by-component + struct bftw_list parents; + SLIST_INIT(&parents); - // Use the ->next linked list to temporarily hold the reversed parent - // chain between base and file struct bftw_file *cur; - for (cur = file; cur->parent != base; cur = cur->parent) { - cur->parent->next = cur; + for (cur = file; cur != base; cur = cur->parent) { + SLIST_PREPEND(&parents, cur); } - // Open the files in the chain one by one - for (base = cur; base; base = base->next) { - fd = bftw_file_openat(cache, base, base->parent, base->name); - if (fd < 0 || base == file) { - break; + while ((cur = SLIST_POP(&parents))) { + if (!cur->parent || cur->parent->fd >= 0) { + bftw_file_openat(state, cur, cur->parent, cur->name); } } - // Clear out the linked list - for (struct bftw_file *next = cur->next; cur != file; cur = next, next = next->next) { - cur->next = NULL; + return file->fd; +} + +/** Close a directory, asynchronously if possible. */ +static int bftw_ioq_closedir(struct bftw_state *state, struct bfs_dir *dir) { + if (bftw_ioq_reserve(state) == 0) { + if (ioq_closedir(state->ioq, dir, NULL) == 0) { + return 0; + } } - return fd; + struct bftw_cache *cache = &state->cache; + int ret = bfs_closedir(dir); + bftw_freedir(cache, dir); + ++cache->capacity; + return ret; } -/** - * Open a bftw_file as a directory. - * - * @param cache - * The cache to hold the file. - * @param file - * The directory to open. - * @param path - * The full path to the directory. - * @return - * The opened directory, or NULL on error. - */ -static struct bfs_dir *bftw_file_opendir(struct bftw_cache *cache, struct bftw_file *file, const char *path) { - int fd = bftw_file_open(cache, file, path); - if (fd < 0) { - return NULL; +/** Close a file descriptor, asynchronously if possible. */ +static int bftw_ioq_close(struct bftw_state *state, int fd) { + if (bftw_ioq_reserve(state) == 0) { + if (ioq_close(state->ioq, fd, NULL) == 0) { + return 0; + } } - return bfs_opendir(fd, NULL); + struct bftw_cache *cache = &state->cache; + int ret = xclose(fd); + ++cache->capacity; + return ret; } -/** Free a bftw_file. */ -static void bftw_file_free(struct bftw_cache *cache, struct bftw_file *file) { - assert(file->refcount == 0); +/** Close a file, asynchronously if possible. */ +static int bftw_close(struct bftw_state *state, struct bftw_file *file) { + bfs_assert(file->fd >= 0); + bfs_assert(file->pincount == 0); - if (file->fd >= 0) { - bftw_file_close(cache, file); - } + struct bfs_dir *dir = file->dir; + int fd = file->fd; - free(file); + bftw_lru_remove(&state->cache, file); + file->dir = NULL; + file->fd = -1; + + if (dir) { + return bftw_ioq_closedir(state, dir); + } else { + return bftw_ioq_close(state, fd); + } } -/** - * A queue of bftw_file's to examine. - */ -struct bftw_queue { - /** The head of the queue. */ - struct bftw_file *head; - /** The insertion target. */ - struct bftw_file **target; -}; +/** Free an open directory. */ +static int bftw_unwrapdir(struct bftw_state *state, struct bftw_file *file) { + struct bfs_dir *dir = file->dir; + if (!dir) { + return 0; + } -/** Initialize a bftw_queue. */ -static void bftw_queue_init(struct bftw_queue *queue) { - queue->head = NULL; - queue->target = &queue->head; -} + struct bftw_cache *cache = &state->cache; -/** Add a file to a bftw_queue. */ -static void bftw_queue_push(struct bftw_queue *queue, struct bftw_file *file) { - assert(file->next == NULL); + // Try to keep an open fd if any children exist + bool reffed = file->refcount > 1; + // Keep the fd the same if it's pinned + bool pinned = file->pincount > 0; - file->next = *queue->target; - *queue->target = file; - queue->target = &file->next; +#if BFS_USE_UNWRAPDIR + if (reffed || pinned) { + bfs_unwrapdir(dir); + bftw_freedir(cache, dir); + file->dir = NULL; + return 0; + } +#else + if (pinned) { + return -1; + } +#endif + + if (!reffed) { + return bftw_close(state, file); + } + + // Make room for dup() + bftw_cache_pin(cache, file); + int ret = bftw_cache_reserve(state); + bftw_cache_unpin(cache, file); + if (ret != 0) { + return ret; + } + + int fd = dup_cloexec(file->fd); + if (fd < 0) { + return -1; + } + --cache->capacity; + + file->dir = NULL; + file->fd = fd; + return bftw_ioq_closedir(state, dir); } -/** Pop the next file from the head of the queue. */ -static struct bftw_file *bftw_queue_pop(struct bftw_queue *queue) { - struct bftw_file *file = queue->head; - queue->head = file->next; - file->next = NULL; - if (queue->target == &file->next) { - queue->target = &queue->head; +/** Try to pin a file's parent. */ +static int bftw_pin_parent(struct bftw_state *state, struct bftw_file *file) { + struct bftw_file *parent = file->parent; + if (!parent) { + return AT_FDCWD; } - return file; + + int fd = parent->fd; + if (fd < 0) { + bfs_static_assert(AT_FDCWD != -1); + return -1; + } + + bftw_cache_pin(&state->cache, parent); + return fd; } -/** The split phase of mergesort. */ -static struct bftw_file **bftw_sort_split(struct bftw_file **head, struct bftw_file **tail) { - struct bftw_file **tortoise = head, **hare = head; +/** Open a directory asynchronously. */ +static int bftw_ioq_opendir(struct bftw_state *state, struct bftw_file *file) { + struct bftw_cache *cache = &state->cache; - while (*hare != *tail) { - tortoise = &(*tortoise)->next; - hare = &(*hare)->next; - if (*hare != *tail) { - hare = &(*hare)->next; - } + if (bftw_ioq_reserve(state) != 0) { + goto fail; + } + + int dfd = bftw_pin_parent(state, file); + if (dfd < 0 && dfd != AT_FDCWD) { + goto fail; + } + + if (bftw_cache_reserve(state) != 0) { + goto unpin; + } + + struct bfs_dir *dir = bftw_allocdir(cache, false); + if (!dir) { + goto unpin; + } + + if (ioq_opendir(state->ioq, dir, dfd, file->name, state->dir_flags, file) != 0) { + goto free; } - return tortoise; + --cache->capacity; + return 0; + +free: + bftw_freedir(cache, dir); +unpin: + bftw_unpin_parent(state, file, false); +fail: + return -1; } -/** The merge phase of mergesort. */ -static struct bftw_file **bftw_sort_merge(struct bftw_file **head, struct bftw_file **mid, struct bftw_file **tail) { - struct bftw_file *left = *head, *right = *mid, *end = *tail; - *mid = NULL; - *tail = NULL; +/** Open a batch of directories asynchronously. */ +static void bftw_ioq_opendirs(struct bftw_state *state) { + while (bftw_queue_balanced(&state->dirq)) { + struct bftw_file *dir = bftw_queue_waiting(&state->dirq); + if (!dir) { + break; + } - while (left || right) { - struct bftw_file *next; - if (left && (!right || strcoll(left->name, right->name) <= 0)) { - next = left; - left = left->next; + if (bftw_ioq_opendir(state, dir) == 0) { + bftw_queue_detach(&state->dirq, dir, true); } else { - next = right; - right = right->next; + break; } - - *head = next; - head = &next->next; } +} - *head = end; - return head; +/** Push a directory onto the queue. */ +static void bftw_push_dir(struct bftw_state *state, struct bftw_file *file) { + bfs_assert(file->type == BFS_DIR); + bftw_queue_push(&state->dirq, file); + bftw_ioq_opendirs(state); } -/** - * Sort a (sub-)list of files. - * - * @param head - * The head of the (sub-)list to sort. - * @param tail - * The tail of the (sub-)list to sort. - * @return - * The new tail of the (sub-)list. - */ -static struct bftw_file **bftw_sort_files(struct bftw_file **head, struct bftw_file **tail) { - struct bftw_file **mid = bftw_sort_split(head, tail); - if (*mid == *head || *mid == *tail) { - return tail; +/** Pop a file from a queue, then activate it. */ +static bool bftw_pop(struct bftw_state *state, struct bftw_queue *queue) { + if (queue->size == 0) { + return false; } - mid = bftw_sort_files(head, mid); - tail = bftw_sort_files(mid, tail); + while (!bftw_queue_ready(queue) && queue->ioqueued > 0) { + bool block = true; + if (bftw_queue_waiting(queue) && state->nthreads == 1) { + // With only one background thread, balance the work + // between it and the main thread + block = false; + } + + if (bftw_ioq_pop(state, block) < 0) { + break; + } + } - return bftw_sort_merge(head, mid, tail); + struct bftw_file *file = bftw_queue_pop(queue); + if (!file) { + return false; + } + + while (file->ioqueued) { + bftw_ioq_pop(state, true); + } + + state->file = file; + return true; } -/** - * Holds the current state of the bftw() traversal. - */ -struct bftw_state { - /** bftw() callback. */ - bftw_callback *callback; - /** bftw() callback data. */ - void *ptr; - /** bftw() flags. */ - enum bftw_flags flags; - /** Search strategy. */ - enum bftw_strategy strategy; - /** The mount table. */ - const struct bfs_mtab *mtab; +/** Pop a directory to read from the queue. */ +static bool bftw_pop_dir(struct bftw_state *state) { + bfs_assert(!state->file); - /** The appropriate errno value, if any. */ - int error; + if (state->flags & BFTW_SORT) { + // Keep strict breadth-first order when sorting + if (state->strategy == BFTW_BFS && bftw_queue_ready(&state->fileq)) { + return false; + } + } else if (!bftw_queue_ready(&state->dirq)) { + // Don't block if we have files ready to visit + if (bftw_queue_ready(&state->fileq)) { + return false; + } + } - /** The cache of open directories. */ - struct bftw_cache cache; - /** The queue of directories left to explore. */ - struct bftw_queue queue; - /** The start of the current batch of files. */ - struct bftw_file **batch; + return bftw_pop(state, &state->dirq); +} - /** The current path. */ - char *path; - /** The current file. */ - struct bftw_file *file; - /** The previous file. */ - struct bftw_file *previous; +/** Figure out bfs_stat() flags. */ +static enum bfs_stat_flags bftw_stat_flags(const struct bftw_state *state, size_t depth) { + enum bftw_flags mask = BFTW_FOLLOW_ALL; + if (depth == 0) { + mask |= BFTW_FOLLOW_ROOTS; + } - /** The currently open directory. */ - struct bfs_dir *dir; - /** The current directory entry. */ - struct bfs_dirent *de; - /** Storage for the directory entry. */ - struct bfs_dirent de_storage; - /** Any error encountered while reading the directory. */ - int direrror; + if (state->flags & mask) { + return BFS_STAT_TRYFOLLOW; + } else { + return BFS_STAT_NOFOLLOW; + } +} - /** Extra data about the current file. */ - struct BFTW ftwbuf; -}; +/** Check if a stat() call is necessary. */ +static bool bftw_must_stat(const struct bftw_state *state, size_t depth, enum bfs_type type, const char *name) { + if (state->flags & BFTW_STAT) { + return true; + } -/** - * Initialize the bftw() state. - */ -static int bftw_state_init(struct bftw_state *state, const struct bftw_args *args) { - state->callback = args->callback; - state->ptr = args->ptr; - state->flags = args->flags; - state->strategy = args->strategy; - state->mtab = args->mtab; + switch (type) { + case BFS_UNKNOWN: + return true; - state->error = 0; + case BFS_DIR: + return state->flags & (BFTW_DETECT_CYCLES | BFTW_SKIP_MOUNTS | BFTW_PRUNE_MOUNTS); - if (args->nopenfd < 1) { - errno = EMFILE; - return -1; + case BFS_LNK: + if (!(bftw_stat_flags(state, depth) & BFS_STAT_NOFOLLOW)) { + return true; + } + fallthru; + + default: +#if __linux__ + if (state->mtab && bfs_might_be_mount(state->mtab, name)) { + return true; + } +#endif + return false; } +} - state->path = dstralloc(0); - if (!state->path) { - return -1; +/** stat() a file asynchronously. */ +static int bftw_ioq_stat(struct bftw_state *state, struct bftw_file *file) { + if (bftw_ioq_reserve(state) != 0) { + goto fail; } - bftw_cache_init(&state->cache, args->nopenfd); - bftw_queue_init(&state->queue); - state->batch = NULL; + int dfd = bftw_pin_parent(state, file); + if (dfd < 0 && dfd != AT_FDCWD) { + goto fail; + } - state->file = NULL; - state->previous = NULL; + struct bftw_cache *cache = &state->cache; + struct bfs_stat *buf = arena_alloc(&cache->stat_bufs); + if (!buf) { + goto unpin; + } - state->dir = NULL; - state->de = NULL; - state->direrror = 0; + enum bfs_stat_flags flags = bftw_stat_flags(state, file->depth); + if (ioq_stat(state->ioq, dfd, file->name, flags, buf, file) != 0) { + goto free; + } return 0; + +free: + arena_free(&cache->stat_bufs, buf); +unpin: + bftw_unpin_parent(state, file, false); +fail: + return -1; } -/** Cached bfs_stat(). */ -static const struct bfs_stat *bftw_stat_impl(struct BFTW *ftwbuf, struct bftw_stat *cache, enum bfs_stat_flags flags) { - if (!cache->buf) { - if (cache->error) { - errno = cache->error; - } else if (bfs_stat(ftwbuf->at_fd, ftwbuf->at_path, flags, &cache->storage) == 0) { - cache->buf = &cache->storage; - } else { - cache->error = errno; - } +/** Check if we should stat() a file asynchronously. */ +static bool bftw_should_ioq_stat(struct bftw_state *state, struct bftw_file *file) { + // To avoid surprising users too much, process the roots in order + if (file->depth == 0) { + return false; } - return cache->buf; +#ifdef S_IFWHT + // ioq_stat() does not do whiteout emulation like bftw_stat_impl() + if (file->type == BFS_WHT) { + return false; + } +#endif + + return bftw_must_stat(state, file->depth, file->type, file->name); } -const struct bfs_stat *bftw_stat(const struct BFTW *ftwbuf, enum bfs_stat_flags flags) { - struct BFTW *mutbuf = (struct BFTW *)ftwbuf; - const struct bfs_stat *ret; +/** Call stat() on files that need it. */ +static void bftw_stat_files(struct bftw_state *state) { + while (true) { + struct bftw_file *file = bftw_queue_waiting(&state->fileq); + if (!file) { + break; + } - if (flags & BFS_STAT_NOFOLLOW) { - ret = bftw_stat_impl(mutbuf, &mutbuf->lstat_cache, BFS_STAT_NOFOLLOW); - if (ret && !S_ISLNK(ret->mode) && !mutbuf->stat_cache.buf) { - // Non-link, so share stat info - mutbuf->stat_cache.buf = ret; + if (!bftw_should_ioq_stat(state, file)) { + bftw_queue_skip(&state->fileq, file); + continue; } - } else { - ret = bftw_stat_impl(mutbuf, &mutbuf->stat_cache, BFS_STAT_FOLLOW); - if (!ret && (flags & BFS_STAT_TRYFOLLOW) && is_nonexistence_error(errno)) { - ret = bftw_stat_impl(mutbuf, &mutbuf->lstat_cache, BFS_STAT_NOFOLLOW); + + if (!bftw_queue_balanced(&state->fileq)) { + break; + } + + if (bftw_ioq_stat(state, file) == 0) { + bftw_queue_detach(&state->fileq, file, true); + } else { + break; } } +} - return ret; +/** Push a file onto the queue. */ +static void bftw_push_file(struct bftw_state *state, struct bftw_file *file) { + bftw_queue_push(&state->fileq, file); + bftw_stat_files(state); } -const struct bfs_stat *bftw_cached_stat(const struct BFTW *ftwbuf, enum bfs_stat_flags flags) { - if (flags & BFS_STAT_NOFOLLOW) { - return ftwbuf->lstat_cache.buf; - } else if (ftwbuf->stat_cache.buf) { - return ftwbuf->stat_cache.buf; - } else if ((flags & BFS_STAT_TRYFOLLOW) && is_nonexistence_error(ftwbuf->stat_cache.error)) { - return ftwbuf->lstat_cache.buf; - } else { - return NULL; - } +/** Pop a file to visit from the queue. */ +static bool bftw_pop_file(struct bftw_state *state) { + bfs_assert(!state->file); + return bftw_pop(state, &state->fileq); } -enum bfs_type bftw_type(const struct BFTW *ftwbuf, enum bfs_stat_flags flags) { - if (flags & BFS_STAT_NOFOLLOW) { - if (ftwbuf->type == BFS_LNK || (ftwbuf->stat_flags & BFS_STAT_NOFOLLOW)) { - return ftwbuf->type; - } - } else if (flags & BFS_STAT_TRYFOLLOW) { - if (ftwbuf->type != BFS_LNK || (ftwbuf->stat_flags & BFS_STAT_TRYFOLLOW)) { - return ftwbuf->type; - } - } else { - if (ftwbuf->type != BFS_LNK) { - return ftwbuf->type; - } else if (ftwbuf->stat_flags & BFS_STAT_TRYFOLLOW) { - return BFS_ERROR; - } +/** Build the path to the current file. */ +static int bftw_build_path(struct bftw_state *state, const char *name) { + const struct bftw_file *file = state->file; + + size_t pathlen = file ? file->nameoff + file->namelen : 0; + if (dstresize(&state->path, pathlen) != 0) { + state->error = errno; + return -1; } - const struct bfs_stat *statbuf = bftw_stat(ftwbuf, flags); - if (statbuf) { - return bfs_mode_to_type(statbuf->mode); - } else { - return BFS_ERROR; + // Try to find a common ancestor with the existing path + const struct bftw_file *ancestor = state->previous; + while (ancestor && ancestor->depth > file->depth) { + ancestor = ancestor->parent; } -} -/** - * Update the path for the current file. - */ -static int bftw_update_path(struct bftw_state *state, const char *name) { - const struct bftw_file *file = state->file; - size_t length = file ? file->nameoff + file->namelen : 0; + // Build the path backwards + while (file && file != ancestor) { + if (file->nameoff > 0) { + state->path[file->nameoff - 1] = '/'; + } + memcpy(state->path + file->nameoff, file->name, file->namelen); - assert(dstrlen(state->path) >= length); - dstresize(&state->path, length); + if (ancestor && ancestor->depth == file->depth) { + ancestor = ancestor->parent; + } + file = file->parent; + } + + state->previous = state->file; if (name) { - if (length > 0 && state->path[length - 1] != '/') { + if (pathlen > 0 && state->path[pathlen - 1] != '/') { if (dstrapp(&state->path, '/') != 0) { + state->error = errno; return -1; } } if (dstrcat(&state->path, name) != 0) { + state->error = errno; return -1; } } @@ -681,58 +1576,79 @@ static int bftw_update_path(struct bftw_state *state, const char *name) { return 0; } -/** Check if a stat() call is needed for this visit. */ -static bool bftw_need_stat(const struct bftw_state *state) { - if (state->flags & BFTW_STAT) { - return true; +/** Open a bftw_file as a directory. */ +static struct bfs_dir *bftw_file_opendir(struct bftw_state *state, struct bftw_file *file, const char *path) { + int fd = bftw_file_open(state, file, path); + if (fd < 0) { + return NULL; } - const struct BFTW *ftwbuf = &state->ftwbuf; - if (ftwbuf->type == BFS_UNKNOWN) { - return true; + struct bftw_cache *cache = &state->cache; + struct bfs_dir *dir = bftw_allocdir(cache, true); + if (!dir) { + return NULL; } - if (ftwbuf->type == BFS_LNK && !(ftwbuf->stat_flags & BFS_STAT_NOFOLLOW)) { - return true; + if (bfs_opendir(dir, fd, NULL, state->dir_flags) != 0) { + bftw_freedir(cache, dir); + return NULL; } - if (ftwbuf->type == BFS_DIR) { - if (state->flags & (BFTW_DETECT_CYCLES | BFTW_SKIP_MOUNTS | BFTW_PRUNE_MOUNTS)) { - return true; - } -#if __linux__ - } else if (state->mtab) { - // Linux fills in d_type from the underlying inode, even when - // the directory entry is a bind mount point. In that case, we - // need to stat() to get the correct type. We don't need to - // check for directories because they can only be mounted over - // by other directories. - if (bfs_might_be_mount(state->mtab, ftwbuf->path)) { - return true; - } -#endif + bftw_file_set_dir(cache, file, dir); + return dir; +} + +/** Open the current directory. */ +static int bftw_opendir(struct bftw_state *state) { + bfs_assert(!state->dir); + bfs_assert(!state->de); + + state->direrror = 0; + + struct bftw_file *file = state->file; + state->dir = file->dir; + if (state->dir) { + goto pin; } - return false; + if (bftw_build_path(state, NULL) != 0) { + return -1; + } + + bftw_queue_rebalance(&state->dirq, false); + + state->dir = bftw_file_opendir(state, file, state->path); + if (!state->dir) { + state->direrror = errno; + return 0; + } + +pin: + bftw_cache_pin(&state->cache, file); + return 0; } -/** Initialize bftw_stat cache. */ -static void bftw_stat_init(struct bftw_stat *cache) { - cache->buf = NULL; - cache->error = 0; +/** Read an entry from the current directory. */ +static int bftw_readdir(struct bftw_state *state) { + if (!state->dir) { + return -1; + } + + int ret = bfs_readdir(state->dir, &state->de_storage); + if (ret > 0) { + state->de = &state->de_storage; + } else if (ret == 0) { + state->de = NULL; + } else { + state->de = NULL; + state->direrror = errno; + } + + return ret; } -/** - * Open a file if necessary. - * - * @param file - * The file to open. - * @param path - * The path to that file or one of its descendants. - * @return - * The opened file descriptor, or -1 on error. - */ -static int bftw_ensure_open(struct bftw_cache *cache, struct bftw_file *file, const char *path) { +/** Open a file if necessary. */ +static int bftw_ensure_open(struct bftw_state *state, struct bftw_file *file, const char *path) { int ret = file->fd; if (ret < 0) { @@ -741,16 +1657,14 @@ static int bftw_ensure_open(struct bftw_cache *cache, struct bftw_file *file, co return -1; } - ret = bftw_file_open(cache, file, copy); + ret = bftw_file_open(state, file, copy); free(copy); } return ret; } -/** - * Initialize the buffers with data about the current path. - */ +/** Initialize the buffers with data about the current path. */ static void bftw_init_ftwbuf(struct bftw_state *state, enum bftw_visit visit) { struct bftw_file *file = state->file; const struct bfs_dirent *de = state->de; @@ -764,9 +1678,7 @@ static void bftw_init_ftwbuf(struct bftw_state *state, enum bftw_visit visit) { ftwbuf->error = state->direrror; ftwbuf->at_fd = AT_FDCWD; ftwbuf->at_path = ftwbuf->path; - ftwbuf->stat_flags = BFS_STAT_NOFOLLOW; - bftw_stat_init(&ftwbuf->lstat_cache); - bftw_stat_init(&ftwbuf->stat_cache); + bftw_stat_init(&ftwbuf->stat_bufs, &state->stat_buf, &state->lstat_buf); struct bftw_file *parent = NULL; if (de) { @@ -779,11 +1691,12 @@ static void bftw_init_ftwbuf(struct bftw_state *state, enum bftw_visit visit) { ftwbuf->depth = file->depth; ftwbuf->type = file->type; ftwbuf->nameoff = file->nameoff; + bftw_stat_fill(&ftwbuf->stat_bufs, &file->stat_bufs); } if (parent) { // Try to ensure the immediate parent is open, to avoid ENAMETOOLONG - if (bftw_ensure_open(&state->cache, parent, state->path) >= 0) { + if (bftw_ensure_open(state, parent, state->path) >= 0) { ftwbuf->at_fd = parent->fd; ftwbuf->at_path += ftwbuf->nameoff; } else { @@ -793,25 +1706,18 @@ static void bftw_init_ftwbuf(struct bftw_state *state, enum bftw_visit visit) { if (ftwbuf->depth == 0) { // Compute the name offset for root paths like "foo/bar" - ftwbuf->nameoff = xbasename(ftwbuf->path) - ftwbuf->path; + ftwbuf->nameoff = xbaseoff(ftwbuf->path); } + ftwbuf->stat_flags = bftw_stat_flags(state, ftwbuf->depth); + if (ftwbuf->error != 0) { ftwbuf->type = BFS_ERROR; return; } - int follow_flags = BFTW_FOLLOW_ALL; - if (ftwbuf->depth == 0) { - follow_flags |= BFTW_FOLLOW_ROOTS; - } - bool follow = state->flags & follow_flags; - if (follow) { - ftwbuf->stat_flags = BFS_STAT_TRYFOLLOW; - } - const struct bfs_stat *statbuf = NULL; - if (bftw_need_stat(state)) { + if (bftw_must_stat(state, ftwbuf->depth, ftwbuf->type, ftwbuf->path + ftwbuf->nameoff)) { statbuf = bftw_stat(ftwbuf, ftwbuf->stat_flags); if (statbuf) { ftwbuf->type = bfs_mode_to_type(statbuf->mode); @@ -850,24 +1756,18 @@ static bool bftw_is_mount(struct bftw_state *state, const char *name) { return statbuf && statbuf->dev != parent->dev; } -/** Fill file identity information from an ftwbuf. */ -static void bftw_fill_id(struct bftw_file *file, const struct BFTW *ftwbuf) { - const struct bfs_stat *statbuf = ftwbuf->stat_cache.buf; - if (!statbuf || (ftwbuf->stat_flags & BFS_STAT_NOFOLLOW)) { - statbuf = ftwbuf->lstat_cache.buf; - } - if (statbuf) { - file->dev = statbuf->dev; - file->ino = statbuf->ino; - } +/** Check if bfs_stat() was called from the main thread. */ +static bool bftw_stat_was_sync(const struct bftw_state *state, const struct bfs_stat *buf) { + return buf == &state->stat_buf || buf == &state->lstat_buf; } -/** - * Visit a path, invoking the callback. - */ -static enum bftw_action bftw_visit(struct bftw_state *state, const char *name, enum bftw_visit visit) { - if (bftw_update_path(state, name) != 0) { - state->error = errno; +/** Invoke the callback. */ +static enum bftw_action bftw_call_back(struct bftw_state *state, const char *name, enum bftw_visit visit) { + if (visit == BFTW_POST && !(state->flags & BFTW_POST_ORDER)) { + return BFTW_PRUNE; + } + + if (bftw_build_path(state, name) != 0) { return BFTW_STOP; } @@ -880,246 +1780,278 @@ static enum bftw_action bftw_visit(struct bftw_state *state, const char *name, e return BFTW_STOP; } + enum bftw_action ret = BFTW_PRUNE; if ((state->flags & BFTW_SKIP_MOUNTS) && bftw_is_mount(state, name)) { - return BFTW_PRUNE; + goto done; } - enum bftw_action ret = state->callback(ftwbuf, state->ptr); + ret = state->callback(ftwbuf, state->ptr); switch (ret) { case BFTW_CONTINUE: + if (visit != BFTW_PRE || ftwbuf->type != BFS_DIR) { + ret = BFTW_PRUNE; + } else if (state->flags & BFTW_PRUNE_MOUNTS) { + if (bftw_is_mount(state, name)) { + ret = BFTW_PRUNE; + } + } break; + case BFTW_PRUNE: case BFTW_STOP: - goto done; + break; + default: state->error = EINVAL; return BFTW_STOP; } - if (visit != BFTW_PRE || ftwbuf->type != BFS_DIR) { - ret = BFTW_PRUNE; - goto done; - } - - if ((state->flags & BFTW_PRUNE_MOUNTS) && bftw_is_mount(state, name)) { - ret = BFTW_PRUNE; - goto done; - } - done: - if (state->file && !name) { - bftw_fill_id(state->file, ftwbuf); + if (state->fileq.flags & BFTW_QBALANCE) { + // Detect any main-thread stat() calls to rebalance the queue + const struct bfs_stat *buf = bftw_cached_stat(ftwbuf, BFS_STAT_FOLLOW); + const struct bfs_stat *lbuf = bftw_cached_stat(ftwbuf, BFS_STAT_NOFOLLOW); + if (bftw_stat_was_sync(state, buf) || bftw_stat_was_sync(state, lbuf)) { + bftw_queue_rebalance(&state->fileq, false); + } } return ret; } /** - * Push a new file onto the queue. + * Flags controlling which files get visited when done with a directory. */ -static int bftw_push(struct bftw_state *state, const char *name, bool fill_id) { - struct bftw_file *parent = state->file; - struct bftw_file *file = bftw_file_new(parent, name); - if (!file) { - state->error = errno; - return -1; - } +enum bftw_gc_flags { + /** Don't visit anything. */ + BFTW_VISIT_NONE = 0, + /** Report directory errors. */ + BFTW_VISIT_ERROR = 1 << 0, + /** Visit the file itself. */ + BFTW_VISIT_FILE = 1 << 1, + /** Visit the file's ancestors. */ + BFTW_VISIT_PARENTS = 1 << 2, + /** Visit both the file and its ancestors. */ + BFTW_VISIT_ALL = BFTW_VISIT_ERROR | BFTW_VISIT_FILE | BFTW_VISIT_PARENTS, +}; - if (state->de) { - file->type = state->de->type; - } +/** Garbage collect the current file and its parents. */ +static int bftw_gc(struct bftw_state *state, enum bftw_gc_flags flags) { + int ret = 0; - if (fill_id) { - bftw_fill_id(file, &state->ftwbuf); + struct bftw_file *file = state->file; + if (file) { + if (state->dir) { + bftw_cache_unpin(&state->cache, file); + } + if (file->dir) { + bftw_delayed_unwrap(state, file); + } } + state->dir = NULL; + state->de = NULL; - bftw_queue_push(&state->queue, file); - - return 0; -} - -/** - * Build the path to the current file. - */ -static int bftw_build_path(struct bftw_state *state) { - const struct bftw_file *file = state->file; - - size_t pathlen = file->nameoff + file->namelen; - if (dstresize(&state->path, pathlen) != 0) { - state->error = errno; - return -1; + if (state->direrror != 0) { + if (flags & BFTW_VISIT_ERROR) { + if (bftw_call_back(state, NULL, BFTW_PRE) == BFTW_STOP) { + ret = -1; + flags = 0; + } + } else { + state->error = state->direrror; + } } + state->direrror = 0; - // Try to find a common ancestor with the existing path - const struct bftw_file *ancestor = state->previous; - while (ancestor && ancestor->depth > file->depth) { - ancestor = ancestor->parent; + while ((file = SLIST_POP(&state->to_close, ready))) { + bftw_unwrapdir(state, file); } - // Build the path backwards - while (file && file != ancestor) { - if (file->nameoff > 0) { - state->path[file->nameoff - 1] = '/'; + enum bftw_gc_flags visit = BFTW_VISIT_FILE; + while ((file = state->file)) { + if (--file->refcount > 0) { + state->file = NULL; + break; } - memcpy(state->path + file->nameoff, file->name, file->namelen); - if (ancestor && ancestor->depth == file->depth) { - ancestor = ancestor->parent; + if (flags & visit) { + if (bftw_call_back(state, NULL, BFTW_POST) == BFTW_STOP) { + ret = -1; + flags = 0; + } } - file = file->parent; + visit = BFTW_VISIT_PARENTS; + + struct bftw_file *parent = file->parent; + if (state->previous == file) { + state->previous = parent; + } + state->file = parent; + + if (file->fd >= 0) { + bftw_close(state, file); + } + bftw_file_free(&state->cache, file); } - state->previous = state->file; - return 0; + return ret; } -/** - * Pop the next file from the queue. - */ -static int bftw_pop(struct bftw_state *state) { - if (!state->queue.head) { - return 0; +/** Sort a bftw_list by filename. */ +static void bftw_list_sort(struct bftw_list *list) { + if (!list->head || !list->head->next) { + return; } - state->file = bftw_queue_pop(&state->queue); + struct bftw_list left, right; + SLIST_INIT(&left); + SLIST_INIT(&right); - if (bftw_build_path(state) != 0) { - return -1; + // Split + for (struct bftw_file *hare = list->head; hare && (hare = hare->next); hare = hare->next) { + struct bftw_file *tortoise = SLIST_POP(list); + SLIST_APPEND(&left, tortoise); } + SLIST_EXTEND(&right, list); - return 1; -} + // Recurse + bftw_list_sort(&left); + bftw_list_sort(&right); -/** - * Open the current directory. - */ -static void bftw_opendir(struct bftw_state *state) { - assert(!state->dir); - assert(!state->de); - - state->direrror = 0; + // Merge + while (!SLIST_EMPTY(&left) && !SLIST_EMPTY(&right)) { + struct bftw_file *lf = left.head; + struct bftw_file *rf = right.head; - state->dir = bftw_file_opendir(&state->cache, state->file, state->path); - if (!state->dir) { - state->direrror = errno; + if (strcoll(lf->name, rf->name) <= 0) { + SLIST_POP(&left); + SLIST_APPEND(list, lf); + } else { + SLIST_POP(&right); + SLIST_APPEND(list, rf); + } } + SLIST_EXTEND(list, &left); + SLIST_EXTEND(list, &right); } -/** - * Read an entry from the current directory. - */ -static int bftw_readdir(struct bftw_state *state) { - if (!state->dir) { - return -1; +/** Flush all the queue buffers. */ +static void bftw_flush(struct bftw_state *state) { + if (state->flags & BFTW_SORT) { + bftw_list_sort(&state->fileq.buffer); } + bftw_queue_flush(&state->fileq); + bftw_stat_files(state); - int ret = bfs_readdir(state->dir, &state->de_storage); - if (ret > 0) { - state->de = &state->de_storage; - } else if (ret == 0) { - state->de = NULL; - } else { - state->de = NULL; - state->direrror = errno; + bftw_queue_flush(&state->dirq); + bftw_ioq_opendirs(state); +} + +/** Close the current directory. */ +static int bftw_closedir(struct bftw_state *state) { + if (bftw_gc(state, BFTW_VISIT_ALL) != 0) { + return -1; } - return ret; + bftw_flush(state); + return 0; } -/** - * Flags controlling which files get visited when done with a directory. - */ -enum bftw_gc_flags { - /** Don't visit anything. */ - BFTW_VISIT_NONE = 0, - /** Visit the file itself. */ - BFTW_VISIT_FILE = 1 << 0, - /** Visit the file's ancestors. */ - BFTW_VISIT_PARENTS = 1 << 1, - /** Visit both the file and its ancestors. */ - BFTW_VISIT_ALL = BFTW_VISIT_FILE | BFTW_VISIT_PARENTS, -}; - -/** - * Close the current directory. - */ -static enum bftw_action bftw_closedir(struct bftw_state *state, enum bftw_gc_flags flags) { - struct bftw_file *file = state->file; - enum bftw_action ret = BFTW_CONTINUE; +/** Fill file identity information from an ftwbuf. */ +static void bftw_save_ftwbuf(struct bftw_file *file, const struct BFTW *ftwbuf) { + file->type = ftwbuf->type; - if (state->dir) { - assert(file->fd >= 0); + const struct bfs_stat *statbuf = bftw_cached_stat(ftwbuf, ftwbuf->stat_flags); + if (statbuf) { + file->dev = statbuf->dev; + file->ino = statbuf->ino; + } +} - if (file->refcount > 1) { - // Keep the fd around if any subdirectories exist - file->fd = bfs_freedir(state->dir); - } else { - bfs_closedir(state->dir); - file->fd = -1; - } +/** Check if we should buffer a file instead of visiting it. */ +static bool bftw_buffer_file(const struct bftw_state *state, const struct bftw_file *file, const char *name) { + if (!name) { + // Already buffered + return false; + } - if (file->fd < 0) { - bftw_cache_remove(&state->cache, file); - } + if (state->flags & BFTW_BUFFER) { + return true; } - state->de = NULL; - state->dir = NULL; + // If we need to call stat(), and can do it async, buffer this file + if (!state->ioq) { + return false; + } - if (state->direrror != 0) { - if (flags & BFTW_VISIT_FILE) { - ret = bftw_visit(state, NULL, BFTW_PRE); - } else { - state->error = state->direrror; - } - state->direrror = 0; + if (!bftw_queue_balanced(&state->fileq)) { + // stat() would run synchronously anyway + return false; } - return ret; + size_t depth = file ? file->depth + 1 : 1; + enum bfs_type type = state->de ? state->de->type : BFS_UNKNOWN; + return bftw_must_stat(state, depth, type, name); } -/** - * Finalize and free a file we're done with. - */ -static enum bftw_action bftw_gc_file(struct bftw_state *state, enum bftw_gc_flags flags) { - enum bftw_action ret = BFTW_CONTINUE; +/** Visit and/or enqueue the current file. */ +static int bftw_visit(struct bftw_state *state, const char *name) { + struct bftw_cache *cache = &state->cache; + struct bftw_file *file = state->file; + + if (bftw_buffer_file(state, file, name)) { + file = bftw_file_new(cache, file, name); + if (!file) { + state->error = errno; + return -1; + } - if (!(state->flags & BFTW_POST_ORDER)) { - flags = 0; + if (state->de) { + file->type = state->de->type; + } + + bftw_push_file(state, file); + return 0; } - bool visit = flags & BFTW_VISIT_FILE; - while (state->file) { - struct bftw_file *file = state->file; - if (--file->refcount > 0) { + switch (bftw_call_back(state, name, BFTW_PRE)) { + case BFTW_CONTINUE: + if (name) { + file = bftw_file_new(cache, state->file, name); + } else { state->file = NULL; - break; } + if (!file) { + state->error = errno; + return -1; + } + + bftw_save_ftwbuf(file, &state->ftwbuf); + bftw_stat_recycle(cache, file); + bftw_push_dir(state, file); + return 0; - if (visit && bftw_visit(state, NULL, BFTW_POST) == BFTW_STOP) { - ret = BFTW_STOP; - flags &= ~BFTW_VISIT_PARENTS; + case BFTW_PRUNE: + if (file && !name) { + return bftw_gc(state, BFTW_VISIT_PARENTS); + } else { + return 0; } - visit = flags & BFTW_VISIT_PARENTS; - struct bftw_file *parent = file->parent; - if (state->previous == file) { - state->previous = parent; + default: + if (file && !name) { + bftw_gc(state, BFTW_VISIT_NONE); } - bftw_file_free(&state->cache, file); - state->file = parent; + return -1; } - - return ret; } -/** - * Drain all the entries from a bftw_queue. - */ -static void bftw_drain_queue(struct bftw_state *state, struct bftw_queue *queue) { - while (queue->head) { - state->file = bftw_queue_pop(queue); - bftw_gc_file(state, BFTW_VISIT_NONE); +/** Drain a bftw_queue. */ +static void bftw_drain(struct bftw_state *state, struct bftw_queue *queue) { + bftw_queue_flush(queue); + + while (bftw_pop(state, queue)) { + bftw_gc(state, BFTW_VISIT_NONE); } } @@ -1132,10 +2064,18 @@ static void bftw_drain_queue(struct bftw_state *state, struct bftw_queue *queue) static int bftw_state_destroy(struct bftw_state *state) { dstrfree(state->path); - bftw_closedir(state, BFTW_VISIT_NONE); + struct ioq *ioq = state->ioq; + if (ioq) { + ioq_cancel(ioq); + while (bftw_ioq_pop(state, true) >= 0); + state->ioq = NULL; + } + + bftw_gc(state, BFTW_VISIT_NONE); + bftw_drain(state, &state->dirq); + bftw_drain(state, &state->fileq); - bftw_gc_file(state, BFTW_VISIT_NONE); - bftw_drain_queue(state, &state->queue); + ioq_destroy(ioq); bftw_cache_destroy(&state->cache); @@ -1143,152 +2083,63 @@ static int bftw_state_destroy(struct bftw_state *state) { return state->error ? -1 : 0; } -/** Start a batch of files. */ -static void bftw_batch_start(struct bftw_state *state) { - if (state->strategy == BFTW_DFS) { - state->queue.target = &state->queue.head; - } - state->batch = state->queue.target; -} - -/** Finish adding a batch of files. */ -static void bftw_batch_finish(struct bftw_state *state) { - if (state->flags & BFTW_SORT) { - state->queue.target = bftw_sort_files(state->batch, state->queue.target); - } -} - /** - * Streaming mode: visit files as they are encountered. + * Shared implementation for all search strategies. */ -static int bftw_stream(const struct bftw_args *args) { - struct bftw_state state; - if (bftw_state_init(&state, args) != 0) { - return -1; - } - - assert(!(state.flags & (BFTW_SORT | BFTW_BUFFER))); - - bftw_batch_start(&state); - for (size_t i = 0; i < args->npaths; ++i) { - const char *path = args->paths[i]; - - switch (bftw_visit(&state, path, BFTW_PRE)) { - case BFTW_CONTINUE: - break; - case BFTW_PRUNE: - continue; - case BFTW_STOP: - goto done; - } - - if (bftw_push(&state, path, true) != 0) { - goto done; +static int bftw_impl(struct bftw_state *state) { + for (size_t i = 0; i < state->npaths; ++i) { + if (bftw_visit(state, state->paths[i]) != 0) { + return -1; } } - bftw_batch_finish(&state); - - while (bftw_pop(&state) > 0) { - bftw_opendir(&state); + bftw_flush(state); - bftw_batch_start(&state); - while (bftw_readdir(&state) > 0) { - const char *name = state.de->name; - - switch (bftw_visit(&state, name, BFTW_PRE)) { - case BFTW_CONTINUE: - break; - case BFTW_PRUNE: - continue; - case BFTW_STOP: - goto done; + while (true) { + while (bftw_pop_dir(state)) { + if (bftw_opendir(state) != 0) { + return -1; } - - if (bftw_push(&state, name, true) != 0) { - goto done; + while (bftw_readdir(state) > 0) { + if (bftw_visit(state, state->de->name) != 0) { + return -1; + } + } + if (bftw_closedir(state) != 0) { + return -1; } } - bftw_batch_finish(&state); - if (bftw_closedir(&state, BFTW_VISIT_ALL) == BFTW_STOP) { - goto done; + if (!bftw_pop_file(state)) { + break; } - if (bftw_gc_file(&state, BFTW_VISIT_ALL) == BFTW_STOP) { - goto done; + if (bftw_visit(state, NULL) != 0) { + return -1; } + bftw_flush(state); } -done: - return bftw_state_destroy(&state); + return 0; } /** - * Batching mode: queue up all children before visiting them. + * bftw() implementation for simple breadth-/depth-first search. */ -static int bftw_batch(const struct bftw_args *args) { +static int bftw_walk(const struct bftw_args *args) { struct bftw_state state; if (bftw_state_init(&state, args) != 0) { return -1; } - bftw_batch_start(&state); - for (size_t i = 0; i < args->npaths; ++i) { - if (bftw_push(&state, args->paths[i], false) != 0) { - goto done; - } - } - bftw_batch_finish(&state); - - while (bftw_pop(&state) > 0) { - enum bftw_gc_flags gcflags = BFTW_VISIT_ALL; - - switch (bftw_visit(&state, NULL, BFTW_PRE)) { - case BFTW_CONTINUE: - break; - case BFTW_PRUNE: - gcflags &= ~BFTW_VISIT_FILE; - goto next; - case BFTW_STOP: - goto done; - } - - bftw_opendir(&state); - - bftw_batch_start(&state); - while (bftw_readdir(&state) > 0) { - if (bftw_push(&state, state.de->name, false) != 0) { - goto done; - } - } - bftw_batch_finish(&state); - - if (bftw_closedir(&state, gcflags) == BFTW_STOP) { - goto done; - } - - next: - if (bftw_gc_file(&state, gcflags) == BFTW_STOP) { - goto done; - } - } - -done: + bftw_impl(&state); return bftw_state_destroy(&state); } -/** Select bftw_stream() or bftw_batch() appropriately. */ -static int bftw_auto(const struct bftw_args *args) { - if (args->flags & (BFTW_SORT | BFTW_BUFFER)) { - return bftw_batch(args); - } else { - return bftw_stream(args); - } -} - /** * Iterative deepening search state. */ struct bftw_ids_state { + /** Nested walk state. */ + struct bftw_state nested; /** The wrapped callback. */ bftw_callback *delegate; /** The wrapped callback arguments. */ @@ -1303,12 +2154,8 @@ struct bftw_ids_state { size_t max_depth; /** The set of pruned paths. */ struct trie pruned; - /** An error code to report. */ - int error; /** Whether the bottom has been found. */ bool bottom; - /** Whether to quit the search. */ - bool quit; }; /** Iterative deepening callback function. */ @@ -1352,17 +2199,17 @@ static enum bftw_action bftw_ids_callback(const struct BFTW *ftwbuf, void *ptr) ret = BFTW_PRUNE; } break; + case BFTW_PRUNE: if (ftwbuf->type == BFS_DIR) { if (!trie_insert_str(&state->pruned, ftwbuf->path)) { - state->error = errno; - state->quit = true; + state->nested.error = errno; ret = BFTW_STOP; } } break; + case BFTW_STOP: - state->quit = true; break; } @@ -1370,7 +2217,7 @@ static enum bftw_action bftw_ids_callback(const struct BFTW *ftwbuf, void *ptr) } /** Initialize iterative deepening state. */ -static void bftw_ids_init(const struct bftw_args *args, struct bftw_ids_state *state, struct bftw_args *ids_args) { +static int bftw_ids_init(struct bftw_ids_state *state, const struct bftw_args *args) { state->delegate = args->callback; state->ptr = args->ptr; state->visit = BFTW_PRE; @@ -1378,31 +2225,19 @@ static void bftw_ids_init(const struct bftw_args *args, struct bftw_ids_state *s state->min_depth = 0; state->max_depth = 1; trie_init(&state->pruned); - state->error = 0; state->bottom = false; - state->quit = false; - *ids_args = *args; - ids_args->callback = bftw_ids_callback; - ids_args->ptr = state; - ids_args->flags &= ~BFTW_POST_ORDER; - ids_args->strategy = BFTW_DFS; + struct bftw_args ids_args = *args; + ids_args.callback = bftw_ids_callback; + ids_args.ptr = state; + ids_args.flags &= ~BFTW_POST_ORDER; + return bftw_state_init(&state->nested, &ids_args); } /** Finish an iterative deepening search. */ -static int bftw_ids_finish(struct bftw_ids_state *state) { - int ret = 0; - - if (state->error) { - ret = -1; - } else { - state->error = errno; - } - +static int bftw_ids_destroy(struct bftw_ids_state *state) { trie_destroy(&state->pruned); - - errno = state->error; - return ret; + return bftw_state_destroy(&state->nested); } /** @@ -1410,15 +2245,15 @@ static int bftw_ids_finish(struct bftw_ids_state *state) { */ static int bftw_ids(const struct bftw_args *args) { struct bftw_ids_state state; - struct bftw_args ids_args; - bftw_ids_init(args, &state, &ids_args); + if (bftw_ids_init(&state, args) != 0) { + return -1; + } - while (!state.quit && !state.bottom) { + while (!state.bottom) { state.bottom = true; - if (bftw_auto(&ids_args) != 0) { - state.error = errno; - state.quit = true; + if (bftw_impl(&state.nested) != 0) { + goto done; } ++state.min_depth; @@ -1429,18 +2264,18 @@ static int bftw_ids(const struct bftw_args *args) { state.visit = BFTW_POST; state.force_visit = true; - while (!state.quit && state.min_depth > 0) { + while (state.min_depth > 0) { --state.max_depth; --state.min_depth; - if (bftw_auto(&ids_args) != 0) { - state.error = errno; - state.quit = true; + if (bftw_impl(&state.nested) != 0) { + goto done; } } } - return bftw_ids_finish(&state); +done: + return bftw_ids_destroy(&state); } /** @@ -1448,40 +2283,38 @@ static int bftw_ids(const struct bftw_args *args) { */ static int bftw_eds(const struct bftw_args *args) { struct bftw_ids_state state; - struct bftw_args ids_args; - bftw_ids_init(args, &state, &ids_args); + if (bftw_ids_init(&state, args) != 0) { + return -1; + } - while (!state.quit && !state.bottom) { + while (!state.bottom) { state.bottom = true; - if (bftw_auto(&ids_args) != 0) { - state.error = errno; - state.quit = true; + if (bftw_impl(&state.nested) != 0) { + goto done; } state.min_depth = state.max_depth; state.max_depth *= 2; } - if (!state.quit && (args->flags & BFTW_POST_ORDER)) { + if (args->flags & BFTW_POST_ORDER) { state.visit = BFTW_POST; state.min_depth = 0; - ids_args.flags |= BFTW_POST_ORDER; + state.nested.flags |= BFTW_POST_ORDER; - if (bftw_auto(&ids_args) != 0) { - state.error = errno; - } + bftw_impl(&state.nested); } - return bftw_ids_finish(&state); +done: + return bftw_ids_destroy(&state); } int bftw(const struct bftw_args *args) { switch (args->strategy) { case BFTW_BFS: - return bftw_auto(args); case BFTW_DFS: - return bftw_batch(args); + return bftw_walk(args); case BFTW_IDS: return bftw_ids(args); case BFTW_EDS: |