diff options
author | トトも <85485984+ElectronicsArchiver@users.noreply.github.com> | 2022-04-16 20:18:56 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-04-16 14:18:56 -0400 |
commit | 33cc3b9dd7bf3dae1c6cf86e46bb4923f96e7fff (patch) | |
tree | 02fb808d19aee560ac9d381ca5a52509881cdd44 /src/bftw.c | |
parent | 8f5a73a6585bd425807430fd80ce1e3a737f4c5f (diff) | |
download | bfs-33cc3b9dd7bf3dae1c6cf86e46bb4923f96e7fff.tar.xz |
Source / Include Folder (#88)
Moved Source Files Into `src` Folder
Diffstat (limited to 'src/bftw.c')
-rw-r--r-- | src/bftw.c | 1494 |
1 files changed, 1494 insertions, 0 deletions
diff --git a/src/bftw.c b/src/bftw.c new file mode 100644 index 0000000..6f97bf6 --- /dev/null +++ b/src/bftw.c @@ -0,0 +1,1494 @@ +/**************************************************************************** + * bfs * + * Copyright (C) 2015-2022 Tavian Barnes <tavianator@tavianator.com> * + * * + * Permission to use, copy, modify, and/or distribute this software for any * + * purpose with or without fee is hereby granted. * + * * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * + ****************************************************************************/ + +/** + * The bftw() implementation consists of the following components: + * + * - struct bftw_file: A file that has been encountered during the traversal. + * They have reference-counted links to their parents in the directory tree. + * + * - struct bftw_cache: An LRU list of bftw_file's with open file descriptors, + * used for openat() to minimize the amount of path re-traversals. + * + * - struct bftw_queue: The queue of bftw_file's left to explore. Implemented + * as a simple circular buffer. + * + * - struct bftw_state: Represents the current state of the traversal, allowing + * various helper functions to take fewer parameters. + */ + +#include "bftw.h" +#include "dir.h" +#include "darray.h" +#include "dstring.h" +#include "mtab.h" +#include "stat.h" +#include "trie.h" +#include "util.h" +#include <assert.h> +#include <errno.h> +#include <fcntl.h> +#include <stdbool.h> +#include <stdlib.h> +#include <string.h> +#include <sys/stat.h> +#include <unistd.h> + +/** + * A file. + */ +struct bftw_file { + /** The parent directory, if any. */ + struct bftw_file *parent; + /** The root under which this file was found. */ + struct bftw_file *root; + /** The next file in the queue, if any. */ + struct bftw_file *next; + + /** The previous file in the LRU list. */ + struct bftw_file *lru_prev; + /** The next file in the LRU list. */ + struct bftw_file *lru_next; + + /** This file's depth in the walk. */ + size_t depth; + /** Reference count. */ + size_t refcount; + + /** An open descriptor to this file, or -1. */ + int fd; + + /** This file's type, if known. */ + enum bfs_type type; + /** The device number, for cycle detection. */ + dev_t dev; + /** The inode number, for cycle detection. */ + ino_t ino; + + /** The offset of this file in the full path. */ + size_t nameoff; + /** The length of the file's name. */ + size_t namelen; + /** The file's name. */ + char name[]; +}; + +/** + * A cache of open directories. + */ +struct bftw_cache { + /** The head of the LRU list. */ + struct bftw_file *head; + /** The insertion target for the LRU list. */ + struct bftw_file *target; + /** The tail of the LRU list. */ + struct bftw_file *tail; + /** The remaining capacity of the LRU list. */ + size_t capacity; +}; + +/** Initialize a cache. */ +static void bftw_cache_init(struct bftw_cache *cache, size_t capacity) { + cache->head = NULL; + cache->target = NULL; + cache->tail = NULL; + cache->capacity = capacity; +} + +/** Destroy a cache. */ +static void bftw_cache_destroy(struct bftw_cache *cache) { + assert(!cache->tail); + assert(!cache->target); + assert(!cache->head); +} + +/** Add a bftw_file to the cache. */ +static void bftw_cache_add(struct bftw_cache *cache, struct bftw_file *file) { + assert(cache->capacity > 0); + assert(file->fd >= 0); + assert(!file->lru_prev); + assert(!file->lru_next); + + if (cache->target) { + file->lru_prev = cache->target; + file->lru_next = cache->target->lru_next; + } else { + file->lru_next = cache->head; + } + + if (file->lru_prev) { + file->lru_prev->lru_next = file; + } else { + cache->head = file; + } + + if (file->lru_next) { + file->lru_next->lru_prev = file; + } else { + cache->tail = file; + } + + // Prefer to keep the root paths open by keeping them at the head of the list + if (file->depth == 0) { + cache->target = file; + } + + --cache->capacity; +} + +/** Remove a bftw_file from the cache. */ +static void bftw_cache_remove(struct bftw_cache *cache, struct bftw_file *file) { + if (cache->target == file) { + cache->target = file->lru_prev; + } + + if (file->lru_prev) { + assert(cache->head != file); + file->lru_prev->lru_next = file->lru_next; + } else { + assert(cache->head == file); + cache->head = file->lru_next; + } + + if (file->lru_next) { + assert(cache->tail != file); + file->lru_next->lru_prev = file->lru_prev; + } else { + assert(cache->tail == file); + cache->tail = file->lru_prev; + } + + file->lru_prev = NULL; + file->lru_next = NULL; + ++cache->capacity; +} + +/** Mark a cache entry as recently used. */ +static void bftw_cache_use(struct bftw_cache *cache, struct bftw_file *file) { + bftw_cache_remove(cache, file); + bftw_cache_add(cache, file); +} + +/** Close a bftw_file. */ +static void bftw_file_close(struct bftw_cache *cache, struct bftw_file *file) { + assert(file->fd >= 0); + + bftw_cache_remove(cache, file); + + xclose(file->fd); + file->fd = -1; +} + +/** Pop a directory from the cache. */ +static void bftw_cache_pop(struct bftw_cache *cache) { + assert(cache->tail); + bftw_file_close(cache, cache->tail); +} + +/** + * Shrink the cache, to recover from EMFILE. + * + * @param cache + * The cache in question. + * @param saved + * A bftw_file that must be preserved. + * @return + * 0 if successfully shrunk, otherwise -1. + */ +static int bftw_cache_shrink(struct bftw_cache *cache, const struct bftw_file *saved) { + struct bftw_file *file = cache->tail; + if (!file) { + return -1; + } + + if (file == saved) { + file = file->lru_prev; + if (!file) { + return -1; + } + } + + bftw_file_close(cache, file); + cache->capacity = 0; + return 0; +} + +/** Compute the name offset of a child path. */ +static size_t bftw_child_nameoff(const struct bftw_file *parent) { + size_t ret = parent->nameoff + parent->namelen; + if (parent->name[parent->namelen - 1] != '/') { + ++ret; + } + return ret; +} + +/** Create a new bftw_file. */ +static struct bftw_file *bftw_file_new(struct bftw_file *parent, const char *name) { + size_t namelen = strlen(name); + size_t size = BFS_FLEX_SIZEOF(struct bftw_file, name, namelen + 1); + + struct bftw_file *file = malloc(size); + if (!file) { + return NULL; + } + + file->parent = parent; + + if (parent) { + file->root = parent->root; + file->depth = parent->depth + 1; + file->nameoff = bftw_child_nameoff(parent); + ++parent->refcount; + } else { + file->root = file; + file->depth = 0; + file->nameoff = 0; + } + + file->next = NULL; + + file->lru_prev = NULL; + file->lru_next = NULL; + + file->refcount = 1; + file->fd = -1; + + file->type = BFS_UNKNOWN; + file->dev = -1; + file->ino = -1; + + file->namelen = namelen; + memcpy(file->name, name, namelen + 1); + + return file; +} + +/** + * Open a bftw_file relative to another one. + * + * @param cache + * The cache to hold the file. + * @param file + * The file to open. + * @param base + * The base directory for the relative path (may be NULL). + * @param at_fd + * The base file descriptor, AT_FDCWD if base == NULL. + * @param at_path + * The relative path to the file. + * @return + * The opened file descriptor, or negative on error. + */ +static int bftw_file_openat(struct bftw_cache *cache, struct bftw_file *file, struct bftw_file *base, const char *at_path) { + assert(file->fd < 0); + + int at_fd = AT_FDCWD; + if (base) { + bftw_cache_use(cache, base); + at_fd = base->fd; + } + + int flags = O_RDONLY | O_CLOEXEC | O_DIRECTORY; + int fd = openat(at_fd, at_path, flags); + + if (fd < 0 && errno == EMFILE) { + if (bftw_cache_shrink(cache, base) == 0) { + fd = openat(at_fd, at_path, flags); + } + } + + if (fd >= 0) { + if (cache->capacity == 0) { + bftw_cache_pop(cache); + } + + file->fd = fd; + bftw_cache_add(cache, file); + } + + return fd; +} + +/** + * Open a bftw_file. + * + * @param cache + * The cache to hold the file. + * @param file + * The file to open. + * @param path + * The full path to the file. + * @return + * The opened file descriptor, or negative on error. + */ +static int bftw_file_open(struct bftw_cache *cache, struct bftw_file *file, const char *path) { + // Find the nearest open ancestor + struct bftw_file *base = file; + do { + base = base->parent; + } while (base && base->fd < 0); + + const char *at_path = path; + if (base) { + at_path += bftw_child_nameoff(base); + } + + int fd = bftw_file_openat(cache, file, base, at_path); + if (fd >= 0 || errno != ENAMETOOLONG) { + return fd; + } + + // Handle ENAMETOOLONG by manually traversing the path component-by-component + + // Use the ->next linked list to temporarily hold the reversed parent + // chain between base and file + struct bftw_file *cur; + for (cur = file; cur->parent != base; cur = cur->parent) { + cur->parent->next = cur; + } + + // Open the files in the chain one by one + for (base = cur; base; base = base->next) { + fd = bftw_file_openat(cache, base, base->parent, base->name); + if (fd < 0 || base == file) { + break; + } + } + + // Clear out the linked list + for (struct bftw_file *next = cur->next; cur != file; cur = next, next = next->next) { + cur->next = NULL; + } + + return fd; +} + +/** + * Open a bftw_file as a directory. + * + * @param cache + * The cache to hold the file. + * @param file + * The directory to open. + * @param path + * The full path to the directory. + * @return + * The opened directory, or NULL on error. + */ +static struct bfs_dir *bftw_file_opendir(struct bftw_cache *cache, struct bftw_file *file, const char *path) { + int fd = bftw_file_open(cache, file, path); + if (fd < 0) { + return NULL; + } + + return bfs_opendir(fd, NULL); +} + +/** Free a bftw_file. */ +static void bftw_file_free(struct bftw_cache *cache, struct bftw_file *file) { + assert(file->refcount == 0); + + if (file->fd >= 0) { + bftw_file_close(cache, file); + } + + free(file); +} + +/** + * A queue of bftw_file's to examine. + */ +struct bftw_queue { + /** The head of the queue. */ + struct bftw_file *head; + /** The insertion target. */ + struct bftw_file **target; +}; + +/** Initialize a bftw_queue. */ +static void bftw_queue_init(struct bftw_queue *queue) { + queue->head = NULL; + queue->target = &queue->head; +} + +/** Add a file to a bftw_queue. */ +static void bftw_queue_push(struct bftw_queue *queue, struct bftw_file *file) { + assert(file->next == NULL); + + file->next = *queue->target; + *queue->target = file; + queue->target = &file->next; +} + +/** Pop the next file from the head of the queue. */ +static struct bftw_file *bftw_queue_pop(struct bftw_queue *queue) { + struct bftw_file *file = queue->head; + queue->head = file->next; + file->next = NULL; + if (queue->target == &file->next) { + queue->target = &queue->head; + } + return file; +} + +/** The split phase of mergesort. */ +static struct bftw_file **bftw_sort_split(struct bftw_file **head, struct bftw_file **tail) { + struct bftw_file **tortoise = head, **hare = head; + + while (*hare != *tail) { + tortoise = &(*tortoise)->next; + hare = &(*hare)->next; + if (*hare != *tail) { + hare = &(*hare)->next; + } + } + + return tortoise; +} + +/** The merge phase of mergesort. */ +static struct bftw_file **bftw_sort_merge(struct bftw_file **head, struct bftw_file **mid, struct bftw_file **tail) { + struct bftw_file *left = *head, *right = *mid, *end = *tail; + *mid = NULL; + *tail = NULL; + + while (left || right) { + struct bftw_file *next; + if (left && (!right || strcoll(left->name, right->name) <= 0)) { + next = left; + left = left->next; + } else { + next = right; + right = right->next; + } + + *head = next; + head = &next->next; + } + + *head = end; + return head; +} + +/** + * Sort a (sub-)list of files. + * + * @param head + * The head of the (sub-)list to sort. + * @param tail + * The tail of the (sub-)list to sort. + * @return + * The new tail of the (sub-)list. + */ +static struct bftw_file **bftw_sort_files(struct bftw_file **head, struct bftw_file **tail) { + struct bftw_file **mid = bftw_sort_split(head, tail); + if (*mid == *head || *mid == *tail) { + return tail; + } + + mid = bftw_sort_files(head, mid); + tail = bftw_sort_files(mid, tail); + + return bftw_sort_merge(head, mid, tail); +} + +/** + * Holds the current state of the bftw() traversal. + */ +struct bftw_state { + /** bftw() callback. */ + bftw_callback *callback; + /** bftw() callback data. */ + void *ptr; + /** bftw() flags. */ + enum bftw_flags flags; + /** Search strategy. */ + enum bftw_strategy strategy; + /** The mount table. */ + const struct bfs_mtab *mtab; + + /** The appropriate errno value, if any. */ + int error; + + /** The cache of open directories. */ + struct bftw_cache cache; + /** The queue of directories left to explore. */ + struct bftw_queue queue; + /** The start of the current batch of files. */ + struct bftw_file **batch; + + /** The current path. */ + char *path; + /** The current file. */ + struct bftw_file *file; + /** The previous file. */ + struct bftw_file *previous; + + /** The currently open directory. */ + struct bfs_dir *dir; + /** The current directory entry. */ + struct bfs_dirent *de; + /** Storage for the directory entry. */ + struct bfs_dirent de_storage; + /** Any error encountered while reading the directory. */ + int direrror; + + /** Extra data about the current file. */ + struct BFTW ftwbuf; +}; + +/** + * Initialize the bftw() state. + */ +static int bftw_state_init(struct bftw_state *state, const struct bftw_args *args) { + state->callback = args->callback; + state->ptr = args->ptr; + state->flags = args->flags; + state->strategy = args->strategy; + state->mtab = args->mtab; + + state->error = 0; + + if (args->nopenfd < 1) { + errno = EMFILE; + return -1; + } + + state->path = dstralloc(0); + if (!state->path) { + return -1; + } + + bftw_cache_init(&state->cache, args->nopenfd); + bftw_queue_init(&state->queue); + state->batch = NULL; + + state->file = NULL; + state->previous = NULL; + + state->dir = NULL; + state->de = NULL; + state->direrror = 0; + + return 0; +} + +/** Cached bfs_stat(). */ +static const struct bfs_stat *bftw_stat_impl(struct BFTW *ftwbuf, struct bftw_stat *cache, enum bfs_stat_flags flags) { + if (!cache->buf) { + if (cache->error) { + errno = cache->error; + } else if (bfs_stat(ftwbuf->at_fd, ftwbuf->at_path, flags, &cache->storage) == 0) { + cache->buf = &cache->storage; + } else { + cache->error = errno; + } + } + + return cache->buf; +} + +const struct bfs_stat *bftw_stat(const struct BFTW *ftwbuf, enum bfs_stat_flags flags) { + struct BFTW *mutbuf = (struct BFTW *)ftwbuf; + const struct bfs_stat *ret; + + if (flags & BFS_STAT_NOFOLLOW) { + ret = bftw_stat_impl(mutbuf, &mutbuf->lstat_cache, BFS_STAT_NOFOLLOW); + if (ret && !S_ISLNK(ret->mode) && !mutbuf->stat_cache.buf) { + // Non-link, so share stat info + mutbuf->stat_cache.buf = ret; + } + } else { + ret = bftw_stat_impl(mutbuf, &mutbuf->stat_cache, BFS_STAT_FOLLOW); + if (!ret && (flags & BFS_STAT_TRYFOLLOW) && is_nonexistence_error(errno)) { + ret = bftw_stat_impl(mutbuf, &mutbuf->lstat_cache, BFS_STAT_NOFOLLOW); + } + } + + return ret; +} + +const struct bfs_stat *bftw_cached_stat(const struct BFTW *ftwbuf, enum bfs_stat_flags flags) { + if (flags & BFS_STAT_NOFOLLOW) { + return ftwbuf->lstat_cache.buf; + } else if (ftwbuf->stat_cache.buf) { + return ftwbuf->stat_cache.buf; + } else if ((flags & BFS_STAT_TRYFOLLOW) && is_nonexistence_error(ftwbuf->stat_cache.error)) { + return ftwbuf->lstat_cache.buf; + } else { + return NULL; + } +} + +enum bfs_type bftw_type(const struct BFTW *ftwbuf, enum bfs_stat_flags flags) { + if (flags & BFS_STAT_NOFOLLOW) { + if (ftwbuf->type == BFS_LNK || (ftwbuf->stat_flags & BFS_STAT_NOFOLLOW)) { + return ftwbuf->type; + } + } else if (flags & BFS_STAT_TRYFOLLOW) { + if (ftwbuf->type != BFS_LNK || (ftwbuf->stat_flags & BFS_STAT_TRYFOLLOW)) { + return ftwbuf->type; + } + } else { + if (ftwbuf->type != BFS_LNK) { + return ftwbuf->type; + } else if (ftwbuf->stat_flags & BFS_STAT_TRYFOLLOW) { + return BFS_ERROR; + } + } + + const struct bfs_stat *statbuf = bftw_stat(ftwbuf, flags); + if (statbuf) { + return bfs_mode_to_type(statbuf->mode); + } else { + return BFS_ERROR; + } +} + +/** + * Update the path for the current file. + */ +static int bftw_update_path(struct bftw_state *state, const char *name) { + const struct bftw_file *file = state->file; + size_t length = file ? file->nameoff + file->namelen : 0; + + assert(dstrlen(state->path) >= length); + dstresize(&state->path, length); + + if (name) { + if (length > 0 && state->path[length - 1] != '/') { + if (dstrapp(&state->path, '/') != 0) { + return -1; + } + } + if (dstrcat(&state->path, name) != 0) { + return -1; + } + } + + return 0; +} + +/** Check if a stat() call is needed for this visit. */ +static bool bftw_need_stat(const struct bftw_state *state) { + if (state->flags & BFTW_STAT) { + return true; + } + + const struct BFTW *ftwbuf = &state->ftwbuf; + if (ftwbuf->type == BFS_UNKNOWN) { + return true; + } + + if (ftwbuf->type == BFS_LNK && !(ftwbuf->stat_flags & BFS_STAT_NOFOLLOW)) { + return true; + } + + if (ftwbuf->type == BFS_DIR) { + if (state->flags & (BFTW_DETECT_CYCLES | BFTW_SKIP_MOUNTS | BFTW_PRUNE_MOUNTS)) { + return true; + } +#if __linux__ + } else if (state->mtab) { + // Linux fills in d_type from the underlying inode, even when + // the directory entry is a bind mount point. In that case, we + // need to stat() to get the correct type. We don't need to + // check for directories because they can only be mounted over + // by other directories. + if (bfs_might_be_mount(state->mtab, ftwbuf->path)) { + return true; + } +#endif + } + + return false; +} + +/** Initialize bftw_stat cache. */ +static void bftw_stat_init(struct bftw_stat *cache) { + cache->buf = NULL; + cache->error = 0; +} + +/** + * Open a file if necessary. + * + * @param file + * The file to open. + * @param path + * The path to that file or one of its descendants. + * @return + * The opened file descriptor, or -1 on error. + */ +static int bftw_ensure_open(struct bftw_cache *cache, struct bftw_file *file, const char *path) { + int ret = file->fd; + + if (ret < 0) { + char *copy = strndup(path, file->nameoff + file->namelen); + if (!copy) { + return -1; + } + + ret = bftw_file_open(cache, file, copy); + free(copy); + } + + return ret; +} + +/** + * Initialize the buffers with data about the current path. + */ +static void bftw_init_ftwbuf(struct bftw_state *state, enum bftw_visit visit) { + struct bftw_file *file = state->file; + const struct bfs_dirent *de = state->de; + + struct BFTW *ftwbuf = &state->ftwbuf; + ftwbuf->path = state->path; + ftwbuf->root = file ? file->root->name : ftwbuf->path; + ftwbuf->depth = 0; + ftwbuf->visit = visit; + ftwbuf->type = BFS_UNKNOWN; + ftwbuf->error = state->direrror; + ftwbuf->at_fd = AT_FDCWD; + ftwbuf->at_path = ftwbuf->path; + ftwbuf->stat_flags = BFS_STAT_NOFOLLOW; + bftw_stat_init(&ftwbuf->lstat_cache); + bftw_stat_init(&ftwbuf->stat_cache); + + struct bftw_file *parent = NULL; + if (de) { + parent = file; + ftwbuf->depth = file->depth + 1; + ftwbuf->type = de->type; + ftwbuf->nameoff = bftw_child_nameoff(file); + } else if (file) { + parent = file->parent; + ftwbuf->depth = file->depth; + ftwbuf->type = file->type; + ftwbuf->nameoff = file->nameoff; + } + + if (parent) { + // Try to ensure the immediate parent is open, to avoid ENAMETOOLONG + if (bftw_ensure_open(&state->cache, parent, state->path) >= 0) { + ftwbuf->at_fd = parent->fd; + ftwbuf->at_path += ftwbuf->nameoff; + } else { + ftwbuf->error = errno; + } + } + + if (ftwbuf->depth == 0) { + // Compute the name offset for root paths like "foo/bar" + ftwbuf->nameoff = xbasename(ftwbuf->path) - ftwbuf->path; + } + + if (ftwbuf->error != 0) { + ftwbuf->type = BFS_ERROR; + return; + } + + int follow_flags = BFTW_FOLLOW_ALL; + if (ftwbuf->depth == 0) { + follow_flags |= BFTW_FOLLOW_ROOTS; + } + bool follow = state->flags & follow_flags; + if (follow) { + ftwbuf->stat_flags = BFS_STAT_TRYFOLLOW; + } + + const struct bfs_stat *statbuf = NULL; + if (bftw_need_stat(state)) { + statbuf = bftw_stat(ftwbuf, ftwbuf->stat_flags); + if (statbuf) { + ftwbuf->type = bfs_mode_to_type(statbuf->mode); + } else { + ftwbuf->type = BFS_ERROR; + ftwbuf->error = errno; + return; + } + } + + if (ftwbuf->type == BFS_DIR && (state->flags & BFTW_DETECT_CYCLES)) { + for (const struct bftw_file *ancestor = parent; ancestor; ancestor = ancestor->parent) { + if (ancestor->dev == statbuf->dev && ancestor->ino == statbuf->ino) { + ftwbuf->type = BFS_ERROR; + ftwbuf->error = ELOOP; + return; + } + } + } +} + +/** Check if the current file is a mount point. */ +static bool bftw_is_mount(struct bftw_state *state, const char *name) { + const struct bftw_file *file = state->file; + if (!file) { + return false; + } + + const struct bftw_file *parent = name ? file : file->parent; + if (!parent) { + return false; + } + + const struct BFTW *ftwbuf = &state->ftwbuf; + const struct bfs_stat *statbuf = bftw_stat(ftwbuf, ftwbuf->stat_flags); + return statbuf && statbuf->dev != parent->dev; +} + +/** Fill file identity information from an ftwbuf. */ +static void bftw_fill_id(struct bftw_file *file, const struct BFTW *ftwbuf) { + const struct bfs_stat *statbuf = ftwbuf->stat_cache.buf; + if (!statbuf || (ftwbuf->stat_flags & BFS_STAT_NOFOLLOW)) { + statbuf = ftwbuf->lstat_cache.buf; + } + if (statbuf) { + file->dev = statbuf->dev; + file->ino = statbuf->ino; + } +} + +/** + * Visit a path, invoking the callback. + */ +static enum bftw_action bftw_visit(struct bftw_state *state, const char *name, enum bftw_visit visit) { + if (bftw_update_path(state, name) != 0) { + state->error = errno; + return BFTW_STOP; + } + + const struct BFTW *ftwbuf = &state->ftwbuf; + bftw_init_ftwbuf(state, visit); + + // Never give the callback BFS_ERROR unless BFTW_RECOVER is specified + if (ftwbuf->type == BFS_ERROR && !(state->flags & BFTW_RECOVER)) { + state->error = ftwbuf->error; + return BFTW_STOP; + } + + if ((state->flags & BFTW_SKIP_MOUNTS) && bftw_is_mount(state, name)) { + return BFTW_PRUNE; + } + + enum bftw_action ret = state->callback(ftwbuf, state->ptr); + switch (ret) { + case BFTW_CONTINUE: + break; + case BFTW_PRUNE: + case BFTW_STOP: + goto done; + default: + state->error = EINVAL; + return BFTW_STOP; + } + + if (visit != BFTW_PRE || ftwbuf->type != BFS_DIR) { + ret = BFTW_PRUNE; + goto done; + } + + if ((state->flags & BFTW_PRUNE_MOUNTS) && bftw_is_mount(state, name)) { + ret = BFTW_PRUNE; + goto done; + } + +done: + if (state->file && !name) { + bftw_fill_id(state->file, ftwbuf); + } + + return ret; +} + +/** + * Push a new file onto the queue. + */ +static int bftw_push(struct bftw_state *state, const char *name, bool fill_id) { + struct bftw_file *parent = state->file; + struct bftw_file *file = bftw_file_new(parent, name); + if (!file) { + state->error = errno; + return -1; + } + + if (state->de) { + file->type = state->de->type; + } + + if (fill_id) { + bftw_fill_id(file, &state->ftwbuf); + } + + bftw_queue_push(&state->queue, file); + + return 0; +} + +/** + * Build the path to the current file. + */ +static int bftw_build_path(struct bftw_state *state) { + const struct bftw_file *file = state->file; + + size_t pathlen = file->nameoff + file->namelen; + if (dstresize(&state->path, pathlen) != 0) { + state->error = errno; + return -1; + } + + // Try to find a common ancestor with the existing path + const struct bftw_file *ancestor = state->previous; + while (ancestor && ancestor->depth > file->depth) { + ancestor = ancestor->parent; + } + + // Build the path backwards + while (file && file != ancestor) { + if (file->nameoff > 0) { + state->path[file->nameoff - 1] = '/'; + } + memcpy(state->path + file->nameoff, file->name, file->namelen); + + if (ancestor && ancestor->depth == file->depth) { + ancestor = ancestor->parent; + } + file = file->parent; + } + + state->previous = state->file; + return 0; +} + +/** + * Pop the next file from the queue. + */ +static int bftw_pop(struct bftw_state *state) { + if (!state->queue.head) { + return 0; + } + + state->file = bftw_queue_pop(&state->queue); + + if (bftw_build_path(state) != 0) { + return -1; + } + + return 1; +} + +/** + * Open the current directory. + */ +static void bftw_opendir(struct bftw_state *state) { + assert(!state->dir); + assert(!state->de); + + state->direrror = 0; + + state->dir = bftw_file_opendir(&state->cache, state->file, state->path); + if (!state->dir) { + state->direrror = errno; + } +} + +/** + * Read an entry from the current directory. + */ +static int bftw_readdir(struct bftw_state *state) { + if (!state->dir) { + return -1; + } + + int ret = bfs_readdir(state->dir, &state->de_storage); + if (ret > 0) { + state->de = &state->de_storage; + } else if (ret == 0) { + state->de = NULL; + } else { + state->de = NULL; + state->direrror = errno; + } + + return ret; +} + +/** + * Flags controlling which files get visited when done with a directory. + */ +enum bftw_gc_flags { + /** Don't visit anything. */ + BFTW_VISIT_NONE = 0, + /** Visit the file itself. */ + BFTW_VISIT_FILE = 1 << 0, + /** Visit the file's ancestors. */ + BFTW_VISIT_PARENTS = 1 << 1, + /** Visit both the file and its ancestors. */ + BFTW_VISIT_ALL = BFTW_VISIT_FILE | BFTW_VISIT_PARENTS, +}; + +/** + * Close the current directory. + */ +static enum bftw_action bftw_closedir(struct bftw_state *state, enum bftw_gc_flags flags) { + struct bftw_file *file = state->file; + enum bftw_action ret = BFTW_CONTINUE; + + if (state->dir) { + assert(file->fd >= 0); + + if (file->refcount > 1) { + // Keep the fd around if any subdirectories exist + file->fd = bfs_freedir(state->dir); + } else { + bfs_closedir(state->dir); + file->fd = -1; + } + + if (file->fd < 0) { + bftw_cache_remove(&state->cache, file); + } + } + + state->de = NULL; + state->dir = NULL; + + if (state->direrror != 0) { + if (flags & BFTW_VISIT_FILE) { + ret = bftw_visit(state, NULL, BFTW_PRE); + } else { + state->error = state->direrror; + } + state->direrror = 0; + } + + return ret; +} + +/** + * Finalize and free a file we're done with. + */ +static enum bftw_action bftw_gc_file(struct bftw_state *state, enum bftw_gc_flags flags) { + enum bftw_action ret = BFTW_CONTINUE; + + if (!(state->flags & BFTW_POST_ORDER)) { + flags = 0; + } + bool visit = flags & BFTW_VISIT_FILE; + + while (state->file) { + struct bftw_file *file = state->file; + if (--file->refcount > 0) { + state->file = NULL; + break; + } + + if (visit && bftw_visit(state, NULL, BFTW_POST) == BFTW_STOP) { + ret = BFTW_STOP; + flags &= ~BFTW_VISIT_PARENTS; + } + visit = flags & BFTW_VISIT_PARENTS; + + struct bftw_file *parent = file->parent; + if (state->previous == file) { + state->previous = parent; + } + bftw_file_free(&state->cache, file); + state->file = parent; + } + + return ret; +} + +/** + * Drain all the entries from a bftw_queue. + */ +static void bftw_drain_queue(struct bftw_state *state, struct bftw_queue *queue) { + while (queue->head) { + state->file = bftw_queue_pop(queue); + bftw_gc_file(state, BFTW_VISIT_NONE); + } +} + +/** + * Dispose of the bftw() state. + * + * @return + * The bftw() return value. + */ +static int bftw_state_destroy(struct bftw_state *state) { + dstrfree(state->path); + + bftw_closedir(state, BFTW_VISIT_NONE); + + bftw_gc_file(state, BFTW_VISIT_NONE); + bftw_drain_queue(state, &state->queue); + + bftw_cache_destroy(&state->cache); + + errno = state->error; + return state->error ? -1 : 0; +} + +/** Start a batch of files. */ +static void bftw_batch_start(struct bftw_state *state) { + if (state->strategy == BFTW_DFS) { + state->queue.target = &state->queue.head; + } + state->batch = state->queue.target; +} + +/** Finish adding a batch of files. */ +static void bftw_batch_finish(struct bftw_state *state) { + if (state->flags & BFTW_SORT) { + state->queue.target = bftw_sort_files(state->batch, state->queue.target); + } +} + +/** + * Streaming mode: visit files as they are encountered. + */ +static int bftw_stream(const struct bftw_args *args) { + struct bftw_state state; + if (bftw_state_init(&state, args) != 0) { + return -1; + } + + assert(!(state.flags & (BFTW_SORT | BFTW_BUFFER))); + + bftw_batch_start(&state); + for (size_t i = 0; i < args->npaths; ++i) { + const char *path = args->paths[i]; + + switch (bftw_visit(&state, path, BFTW_PRE)) { + case BFTW_CONTINUE: + break; + case BFTW_PRUNE: + continue; + case BFTW_STOP: + goto done; + } + + if (bftw_push(&state, path, true) != 0) { + goto done; + } + } + bftw_batch_finish(&state); + + while (bftw_pop(&state) > 0) { + bftw_opendir(&state); + + bftw_batch_start(&state); + while (bftw_readdir(&state) > 0) { + const char *name = state.de->name; + + switch (bftw_visit(&state, name, BFTW_PRE)) { + case BFTW_CONTINUE: + break; + case BFTW_PRUNE: + continue; + case BFTW_STOP: + goto done; + } + + if (bftw_push(&state, name, true) != 0) { + goto done; + } + } + bftw_batch_finish(&state); + + if (bftw_closedir(&state, BFTW_VISIT_ALL) == BFTW_STOP) { + goto done; + } + if (bftw_gc_file(&state, BFTW_VISIT_ALL) == BFTW_STOP) { + goto done; + } + } + +done: + return bftw_state_destroy(&state); +} + +/** + * Batching mode: queue up all children before visiting them. + */ +static int bftw_batch(const struct bftw_args *args) { + struct bftw_state state; + if (bftw_state_init(&state, args) != 0) { + return -1; + } + + bftw_batch_start(&state); + for (size_t i = 0; i < args->npaths; ++i) { + if (bftw_push(&state, args->paths[i], false) != 0) { + goto done; + } + } + bftw_batch_finish(&state); + + while (bftw_pop(&state) > 0) { + enum bftw_gc_flags gcflags = BFTW_VISIT_ALL; + + switch (bftw_visit(&state, NULL, BFTW_PRE)) { + case BFTW_CONTINUE: + break; + case BFTW_PRUNE: + gcflags &= ~BFTW_VISIT_FILE; + goto next; + case BFTW_STOP: + goto done; + } + + bftw_opendir(&state); + + bftw_batch_start(&state); + while (bftw_readdir(&state) > 0) { + if (bftw_push(&state, state.de->name, false) != 0) { + goto done; + } + } + bftw_batch_finish(&state); + + if (bftw_closedir(&state, gcflags) == BFTW_STOP) { + goto done; + } + + next: + if (bftw_gc_file(&state, gcflags) == BFTW_STOP) { + goto done; + } + } + +done: + return bftw_state_destroy(&state); +} + +/** Select bftw_stream() or bftw_batch() appropriately. */ +static int bftw_auto(const struct bftw_args *args) { + if (args->flags & (BFTW_SORT | BFTW_BUFFER)) { + return bftw_batch(args); + } else { + return bftw_stream(args); + } +} + +/** + * Iterative deepening search state. + */ +struct bftw_ids_state { + /** The wrapped callback. */ + bftw_callback *delegate; + /** The wrapped callback arguments. */ + void *ptr; + /** Which visit this search corresponds to. */ + enum bftw_visit visit; + /** Whether to override the bftw_visit. */ + bool force_visit; + /** The current minimum depth (inclusive). */ + size_t min_depth; + /** The current maximum depth (exclusive). */ + size_t max_depth; + /** The set of pruned paths. */ + struct trie pruned; + /** An error code to report. */ + int error; + /** Whether the bottom has been found. */ + bool bottom; + /** Whether to quit the search. */ + bool quit; +}; + +/** Iterative deepening callback function. */ +static enum bftw_action bftw_ids_callback(const struct BFTW *ftwbuf, void *ptr) { + struct bftw_ids_state *state = ptr; + + if (state->force_visit) { + struct BFTW *mutbuf = (struct BFTW *)ftwbuf; + mutbuf->visit = state->visit; + } + + if (ftwbuf->type == BFS_ERROR) { + if (ftwbuf->depth + 1 >= state->min_depth) { + return state->delegate(ftwbuf, state->ptr); + } else { + return BFTW_PRUNE; + } + } + + if (ftwbuf->depth < state->min_depth) { + if (trie_find_str(&state->pruned, ftwbuf->path)) { + return BFTW_PRUNE; + } else { + return BFTW_CONTINUE; + } + } else if (state->visit == BFTW_POST) { + if (trie_find_str(&state->pruned, ftwbuf->path)) { + return BFTW_PRUNE; + } + } + + enum bftw_action ret = BFTW_CONTINUE; + if (ftwbuf->visit == state->visit) { + ret = state->delegate(ftwbuf, state->ptr); + } + + switch (ret) { + case BFTW_CONTINUE: + if (ftwbuf->type == BFS_DIR && ftwbuf->depth + 1 >= state->max_depth) { + state->bottom = false; + ret = BFTW_PRUNE; + } + break; + case BFTW_PRUNE: + if (ftwbuf->type == BFS_DIR) { + if (!trie_insert_str(&state->pruned, ftwbuf->path)) { + state->error = errno; + state->quit = true; + ret = BFTW_STOP; + } + } + break; + case BFTW_STOP: + state->quit = true; + break; + } + + return ret; +} + +/** Initialize iterative deepening state. */ +static void bftw_ids_init(const struct bftw_args *args, struct bftw_ids_state *state, struct bftw_args *ids_args) { + state->delegate = args->callback; + state->ptr = args->ptr; + state->visit = BFTW_PRE; + state->force_visit = false; + state->min_depth = 0; + state->max_depth = 1; + trie_init(&state->pruned); + state->error = 0; + state->bottom = false; + state->quit = false; + + *ids_args = *args; + ids_args->callback = bftw_ids_callback; + ids_args->ptr = state; + ids_args->flags &= ~BFTW_POST_ORDER; + ids_args->strategy = BFTW_DFS; +} + +/** Finish an iterative deepening search. */ +static int bftw_ids_finish(struct bftw_ids_state *state) { + int ret = 0; + + if (state->error) { + ret = -1; + } else { + state->error = errno; + } + + trie_destroy(&state->pruned); + + errno = state->error; + return ret; +} + +/** + * Iterative deepening bftw() wrapper. + */ +static int bftw_ids(const struct bftw_args *args) { + struct bftw_ids_state state; + struct bftw_args ids_args; + bftw_ids_init(args, &state, &ids_args); + + while (!state.quit && !state.bottom) { + state.bottom = true; + + if (bftw_auto(&ids_args) != 0) { + state.error = errno; + state.quit = true; + } + + ++state.min_depth; + ++state.max_depth; + } + + if (args->flags & BFTW_POST_ORDER) { + state.visit = BFTW_POST; + state.force_visit = true; + + while (!state.quit && state.min_depth > 0) { + --state.max_depth; + --state.min_depth; + + if (bftw_auto(&ids_args) != 0) { + state.error = errno; + state.quit = true; + } + } + } + + return bftw_ids_finish(&state); +} + +/** + * Exponential deepening bftw() wrapper. + */ +static int bftw_eds(const struct bftw_args *args) { + struct bftw_ids_state state; + struct bftw_args ids_args; + bftw_ids_init(args, &state, &ids_args); + + while (!state.quit && !state.bottom) { + state.bottom = true; + + if (bftw_auto(&ids_args) != 0) { + state.error = errno; + state.quit = true; + } + + state.min_depth = state.max_depth; + state.max_depth *= 2; + } + + if (!state.quit && (args->flags & BFTW_POST_ORDER)) { + state.visit = BFTW_POST; + state.min_depth = 0; + ids_args.flags |= BFTW_POST_ORDER; + + if (bftw_auto(&ids_args) != 0) { + state.error = errno; + } + } + + return bftw_ids_finish(&state); +} + +int bftw(const struct bftw_args *args) { + switch (args->strategy) { + case BFTW_BFS: + return bftw_auto(args); + case BFTW_DFS: + return bftw_batch(args); + case BFTW_IDS: + return bftw_ids(args); + case BFTW_EDS: + return bftw_eds(args); + } + + errno = EINVAL; + return -1; +} |