diff options
Diffstat (limited to 'bftw.c')
-rw-r--r-- | bftw.c | 1494 |
1 files changed, 0 insertions, 1494 deletions
@@ -1,1494 +0,0 @@ -/**************************************************************************** - * bfs * - * Copyright (C) 2015-2022 Tavian Barnes <tavianator@tavianator.com> * - * * - * Permission to use, copy, modify, and/or distribute this software for any * - * purpose with or without fee is hereby granted. * - * * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * - ****************************************************************************/ - -/** - * The bftw() implementation consists of the following components: - * - * - struct bftw_file: A file that has been encountered during the traversal. - * They have reference-counted links to their parents in the directory tree. - * - * - struct bftw_cache: An LRU list of bftw_file's with open file descriptors, - * used for openat() to minimize the amount of path re-traversals. - * - * - struct bftw_queue: The queue of bftw_file's left to explore. Implemented - * as a simple circular buffer. - * - * - struct bftw_state: Represents the current state of the traversal, allowing - * various helper functions to take fewer parameters. - */ - -#include "bftw.h" -#include "dir.h" -#include "darray.h" -#include "dstring.h" -#include "mtab.h" -#include "stat.h" -#include "trie.h" -#include "util.h" -#include <assert.h> -#include <errno.h> -#include <fcntl.h> -#include <stdbool.h> -#include <stdlib.h> -#include <string.h> -#include <sys/stat.h> -#include <unistd.h> - -/** - * A file. - */ -struct bftw_file { - /** The parent directory, if any. */ - struct bftw_file *parent; - /** The root under which this file was found. */ - struct bftw_file *root; - /** The next file in the queue, if any. */ - struct bftw_file *next; - - /** The previous file in the LRU list. */ - struct bftw_file *lru_prev; - /** The next file in the LRU list. */ - struct bftw_file *lru_next; - - /** This file's depth in the walk. */ - size_t depth; - /** Reference count. */ - size_t refcount; - - /** An open descriptor to this file, or -1. */ - int fd; - - /** This file's type, if known. */ - enum bfs_type type; - /** The device number, for cycle detection. */ - dev_t dev; - /** The inode number, for cycle detection. */ - ino_t ino; - - /** The offset of this file in the full path. */ - size_t nameoff; - /** The length of the file's name. */ - size_t namelen; - /** The file's name. */ - char name[]; -}; - -/** - * A cache of open directories. - */ -struct bftw_cache { - /** The head of the LRU list. */ - struct bftw_file *head; - /** The insertion target for the LRU list. */ - struct bftw_file *target; - /** The tail of the LRU list. */ - struct bftw_file *tail; - /** The remaining capacity of the LRU list. */ - size_t capacity; -}; - -/** Initialize a cache. */ -static void bftw_cache_init(struct bftw_cache *cache, size_t capacity) { - cache->head = NULL; - cache->target = NULL; - cache->tail = NULL; - cache->capacity = capacity; -} - -/** Destroy a cache. */ -static void bftw_cache_destroy(struct bftw_cache *cache) { - assert(!cache->tail); - assert(!cache->target); - assert(!cache->head); -} - -/** Add a bftw_file to the cache. */ -static void bftw_cache_add(struct bftw_cache *cache, struct bftw_file *file) { - assert(cache->capacity > 0); - assert(file->fd >= 0); - assert(!file->lru_prev); - assert(!file->lru_next); - - if (cache->target) { - file->lru_prev = cache->target; - file->lru_next = cache->target->lru_next; - } else { - file->lru_next = cache->head; - } - - if (file->lru_prev) { - file->lru_prev->lru_next = file; - } else { - cache->head = file; - } - - if (file->lru_next) { - file->lru_next->lru_prev = file; - } else { - cache->tail = file; - } - - // Prefer to keep the root paths open by keeping them at the head of the list - if (file->depth == 0) { - cache->target = file; - } - - --cache->capacity; -} - -/** Remove a bftw_file from the cache. */ -static void bftw_cache_remove(struct bftw_cache *cache, struct bftw_file *file) { - if (cache->target == file) { - cache->target = file->lru_prev; - } - - if (file->lru_prev) { - assert(cache->head != file); - file->lru_prev->lru_next = file->lru_next; - } else { - assert(cache->head == file); - cache->head = file->lru_next; - } - - if (file->lru_next) { - assert(cache->tail != file); - file->lru_next->lru_prev = file->lru_prev; - } else { - assert(cache->tail == file); - cache->tail = file->lru_prev; - } - - file->lru_prev = NULL; - file->lru_next = NULL; - ++cache->capacity; -} - -/** Mark a cache entry as recently used. */ -static void bftw_cache_use(struct bftw_cache *cache, struct bftw_file *file) { - bftw_cache_remove(cache, file); - bftw_cache_add(cache, file); -} - -/** Close a bftw_file. */ -static void bftw_file_close(struct bftw_cache *cache, struct bftw_file *file) { - assert(file->fd >= 0); - - bftw_cache_remove(cache, file); - - xclose(file->fd); - file->fd = -1; -} - -/** Pop a directory from the cache. */ -static void bftw_cache_pop(struct bftw_cache *cache) { - assert(cache->tail); - bftw_file_close(cache, cache->tail); -} - -/** - * Shrink the cache, to recover from EMFILE. - * - * @param cache - * The cache in question. - * @param saved - * A bftw_file that must be preserved. - * @return - * 0 if successfully shrunk, otherwise -1. - */ -static int bftw_cache_shrink(struct bftw_cache *cache, const struct bftw_file *saved) { - struct bftw_file *file = cache->tail; - if (!file) { - return -1; - } - - if (file == saved) { - file = file->lru_prev; - if (!file) { - return -1; - } - } - - bftw_file_close(cache, file); - cache->capacity = 0; - return 0; -} - -/** Compute the name offset of a child path. */ -static size_t bftw_child_nameoff(const struct bftw_file *parent) { - size_t ret = parent->nameoff + parent->namelen; - if (parent->name[parent->namelen - 1] != '/') { - ++ret; - } - return ret; -} - -/** Create a new bftw_file. */ -static struct bftw_file *bftw_file_new(struct bftw_file *parent, const char *name) { - size_t namelen = strlen(name); - size_t size = BFS_FLEX_SIZEOF(struct bftw_file, name, namelen + 1); - - struct bftw_file *file = malloc(size); - if (!file) { - return NULL; - } - - file->parent = parent; - - if (parent) { - file->root = parent->root; - file->depth = parent->depth + 1; - file->nameoff = bftw_child_nameoff(parent); - ++parent->refcount; - } else { - file->root = file; - file->depth = 0; - file->nameoff = 0; - } - - file->next = NULL; - - file->lru_prev = NULL; - file->lru_next = NULL; - - file->refcount = 1; - file->fd = -1; - - file->type = BFS_UNKNOWN; - file->dev = -1; - file->ino = -1; - - file->namelen = namelen; - memcpy(file->name, name, namelen + 1); - - return file; -} - -/** - * Open a bftw_file relative to another one. - * - * @param cache - * The cache to hold the file. - * @param file - * The file to open. - * @param base - * The base directory for the relative path (may be NULL). - * @param at_fd - * The base file descriptor, AT_FDCWD if base == NULL. - * @param at_path - * The relative path to the file. - * @return - * The opened file descriptor, or negative on error. - */ -static int bftw_file_openat(struct bftw_cache *cache, struct bftw_file *file, struct bftw_file *base, const char *at_path) { - assert(file->fd < 0); - - int at_fd = AT_FDCWD; - if (base) { - bftw_cache_use(cache, base); - at_fd = base->fd; - } - - int flags = O_RDONLY | O_CLOEXEC | O_DIRECTORY; - int fd = openat(at_fd, at_path, flags); - - if (fd < 0 && errno == EMFILE) { - if (bftw_cache_shrink(cache, base) == 0) { - fd = openat(at_fd, at_path, flags); - } - } - - if (fd >= 0) { - if (cache->capacity == 0) { - bftw_cache_pop(cache); - } - - file->fd = fd; - bftw_cache_add(cache, file); - } - - return fd; -} - -/** - * Open a bftw_file. - * - * @param cache - * The cache to hold the file. - * @param file - * The file to open. - * @param path - * The full path to the file. - * @return - * The opened file descriptor, or negative on error. - */ -static int bftw_file_open(struct bftw_cache *cache, struct bftw_file *file, const char *path) { - // Find the nearest open ancestor - struct bftw_file *base = file; - do { - base = base->parent; - } while (base && base->fd < 0); - - const char *at_path = path; - if (base) { - at_path += bftw_child_nameoff(base); - } - - int fd = bftw_file_openat(cache, file, base, at_path); - if (fd >= 0 || errno != ENAMETOOLONG) { - return fd; - } - - // Handle ENAMETOOLONG by manually traversing the path component-by-component - - // Use the ->next linked list to temporarily hold the reversed parent - // chain between base and file - struct bftw_file *cur; - for (cur = file; cur->parent != base; cur = cur->parent) { - cur->parent->next = cur; - } - - // Open the files in the chain one by one - for (base = cur; base; base = base->next) { - fd = bftw_file_openat(cache, base, base->parent, base->name); - if (fd < 0 || base == file) { - break; - } - } - - // Clear out the linked list - for (struct bftw_file *next = cur->next; cur != file; cur = next, next = next->next) { - cur->next = NULL; - } - - return fd; -} - -/** - * Open a bftw_file as a directory. - * - * @param cache - * The cache to hold the file. - * @param file - * The directory to open. - * @param path - * The full path to the directory. - * @return - * The opened directory, or NULL on error. - */ -static struct bfs_dir *bftw_file_opendir(struct bftw_cache *cache, struct bftw_file *file, const char *path) { - int fd = bftw_file_open(cache, file, path); - if (fd < 0) { - return NULL; - } - - return bfs_opendir(fd, NULL); -} - -/** Free a bftw_file. */ -static void bftw_file_free(struct bftw_cache *cache, struct bftw_file *file) { - assert(file->refcount == 0); - - if (file->fd >= 0) { - bftw_file_close(cache, file); - } - - free(file); -} - -/** - * A queue of bftw_file's to examine. - */ -struct bftw_queue { - /** The head of the queue. */ - struct bftw_file *head; - /** The insertion target. */ - struct bftw_file **target; -}; - -/** Initialize a bftw_queue. */ -static void bftw_queue_init(struct bftw_queue *queue) { - queue->head = NULL; - queue->target = &queue->head; -} - -/** Add a file to a bftw_queue. */ -static void bftw_queue_push(struct bftw_queue *queue, struct bftw_file *file) { - assert(file->next == NULL); - - file->next = *queue->target; - *queue->target = file; - queue->target = &file->next; -} - -/** Pop the next file from the head of the queue. */ -static struct bftw_file *bftw_queue_pop(struct bftw_queue *queue) { - struct bftw_file *file = queue->head; - queue->head = file->next; - file->next = NULL; - if (queue->target == &file->next) { - queue->target = &queue->head; - } - return file; -} - -/** The split phase of mergesort. */ -static struct bftw_file **bftw_sort_split(struct bftw_file **head, struct bftw_file **tail) { - struct bftw_file **tortoise = head, **hare = head; - - while (*hare != *tail) { - tortoise = &(*tortoise)->next; - hare = &(*hare)->next; - if (*hare != *tail) { - hare = &(*hare)->next; - } - } - - return tortoise; -} - -/** The merge phase of mergesort. */ -static struct bftw_file **bftw_sort_merge(struct bftw_file **head, struct bftw_file **mid, struct bftw_file **tail) { - struct bftw_file *left = *head, *right = *mid, *end = *tail; - *mid = NULL; - *tail = NULL; - - while (left || right) { - struct bftw_file *next; - if (left && (!right || strcoll(left->name, right->name) <= 0)) { - next = left; - left = left->next; - } else { - next = right; - right = right->next; - } - - *head = next; - head = &next->next; - } - - *head = end; - return head; -} - -/** - * Sort a (sub-)list of files. - * - * @param head - * The head of the (sub-)list to sort. - * @param tail - * The tail of the (sub-)list to sort. - * @return - * The new tail of the (sub-)list. - */ -static struct bftw_file **bftw_sort_files(struct bftw_file **head, struct bftw_file **tail) { - struct bftw_file **mid = bftw_sort_split(head, tail); - if (*mid == *head || *mid == *tail) { - return tail; - } - - mid = bftw_sort_files(head, mid); - tail = bftw_sort_files(mid, tail); - - return bftw_sort_merge(head, mid, tail); -} - -/** - * Holds the current state of the bftw() traversal. - */ -struct bftw_state { - /** bftw() callback. */ - bftw_callback *callback; - /** bftw() callback data. */ - void *ptr; - /** bftw() flags. */ - enum bftw_flags flags; - /** Search strategy. */ - enum bftw_strategy strategy; - /** The mount table. */ - const struct bfs_mtab *mtab; - - /** The appropriate errno value, if any. */ - int error; - - /** The cache of open directories. */ - struct bftw_cache cache; - /** The queue of directories left to explore. */ - struct bftw_queue queue; - /** The start of the current batch of files. */ - struct bftw_file **batch; - - /** The current path. */ - char *path; - /** The current file. */ - struct bftw_file *file; - /** The previous file. */ - struct bftw_file *previous; - - /** The currently open directory. */ - struct bfs_dir *dir; - /** The current directory entry. */ - struct bfs_dirent *de; - /** Storage for the directory entry. */ - struct bfs_dirent de_storage; - /** Any error encountered while reading the directory. */ - int direrror; - - /** Extra data about the current file. */ - struct BFTW ftwbuf; -}; - -/** - * Initialize the bftw() state. - */ -static int bftw_state_init(struct bftw_state *state, const struct bftw_args *args) { - state->callback = args->callback; - state->ptr = args->ptr; - state->flags = args->flags; - state->strategy = args->strategy; - state->mtab = args->mtab; - - state->error = 0; - - if (args->nopenfd < 1) { - errno = EMFILE; - return -1; - } - - state->path = dstralloc(0); - if (!state->path) { - return -1; - } - - bftw_cache_init(&state->cache, args->nopenfd); - bftw_queue_init(&state->queue); - state->batch = NULL; - - state->file = NULL; - state->previous = NULL; - - state->dir = NULL; - state->de = NULL; - state->direrror = 0; - - return 0; -} - -/** Cached bfs_stat(). */ -static const struct bfs_stat *bftw_stat_impl(struct BFTW *ftwbuf, struct bftw_stat *cache, enum bfs_stat_flags flags) { - if (!cache->buf) { - if (cache->error) { - errno = cache->error; - } else if (bfs_stat(ftwbuf->at_fd, ftwbuf->at_path, flags, &cache->storage) == 0) { - cache->buf = &cache->storage; - } else { - cache->error = errno; - } - } - - return cache->buf; -} - -const struct bfs_stat *bftw_stat(const struct BFTW *ftwbuf, enum bfs_stat_flags flags) { - struct BFTW *mutbuf = (struct BFTW *)ftwbuf; - const struct bfs_stat *ret; - - if (flags & BFS_STAT_NOFOLLOW) { - ret = bftw_stat_impl(mutbuf, &mutbuf->lstat_cache, BFS_STAT_NOFOLLOW); - if (ret && !S_ISLNK(ret->mode) && !mutbuf->stat_cache.buf) { - // Non-link, so share stat info - mutbuf->stat_cache.buf = ret; - } - } else { - ret = bftw_stat_impl(mutbuf, &mutbuf->stat_cache, BFS_STAT_FOLLOW); - if (!ret && (flags & BFS_STAT_TRYFOLLOW) && is_nonexistence_error(errno)) { - ret = bftw_stat_impl(mutbuf, &mutbuf->lstat_cache, BFS_STAT_NOFOLLOW); - } - } - - return ret; -} - -const struct bfs_stat *bftw_cached_stat(const struct BFTW *ftwbuf, enum bfs_stat_flags flags) { - if (flags & BFS_STAT_NOFOLLOW) { - return ftwbuf->lstat_cache.buf; - } else if (ftwbuf->stat_cache.buf) { - return ftwbuf->stat_cache.buf; - } else if ((flags & BFS_STAT_TRYFOLLOW) && is_nonexistence_error(ftwbuf->stat_cache.error)) { - return ftwbuf->lstat_cache.buf; - } else { - return NULL; - } -} - -enum bfs_type bftw_type(const struct BFTW *ftwbuf, enum bfs_stat_flags flags) { - if (flags & BFS_STAT_NOFOLLOW) { - if (ftwbuf->type == BFS_LNK || (ftwbuf->stat_flags & BFS_STAT_NOFOLLOW)) { - return ftwbuf->type; - } - } else if (flags & BFS_STAT_TRYFOLLOW) { - if (ftwbuf->type != BFS_LNK || (ftwbuf->stat_flags & BFS_STAT_TRYFOLLOW)) { - return ftwbuf->type; - } - } else { - if (ftwbuf->type != BFS_LNK) { - return ftwbuf->type; - } else if (ftwbuf->stat_flags & BFS_STAT_TRYFOLLOW) { - return BFS_ERROR; - } - } - - const struct bfs_stat *statbuf = bftw_stat(ftwbuf, flags); - if (statbuf) { - return bfs_mode_to_type(statbuf->mode); - } else { - return BFS_ERROR; - } -} - -/** - * Update the path for the current file. - */ -static int bftw_update_path(struct bftw_state *state, const char *name) { - const struct bftw_file *file = state->file; - size_t length = file ? file->nameoff + file->namelen : 0; - - assert(dstrlen(state->path) >= length); - dstresize(&state->path, length); - - if (name) { - if (length > 0 && state->path[length - 1] != '/') { - if (dstrapp(&state->path, '/') != 0) { - return -1; - } - } - if (dstrcat(&state->path, name) != 0) { - return -1; - } - } - - return 0; -} - -/** Check if a stat() call is needed for this visit. */ -static bool bftw_need_stat(const struct bftw_state *state) { - if (state->flags & BFTW_STAT) { - return true; - } - - const struct BFTW *ftwbuf = &state->ftwbuf; - if (ftwbuf->type == BFS_UNKNOWN) { - return true; - } - - if (ftwbuf->type == BFS_LNK && !(ftwbuf->stat_flags & BFS_STAT_NOFOLLOW)) { - return true; - } - - if (ftwbuf->type == BFS_DIR) { - if (state->flags & (BFTW_DETECT_CYCLES | BFTW_SKIP_MOUNTS | BFTW_PRUNE_MOUNTS)) { - return true; - } -#if __linux__ - } else if (state->mtab) { - // Linux fills in d_type from the underlying inode, even when - // the directory entry is a bind mount point. In that case, we - // need to stat() to get the correct type. We don't need to - // check for directories because they can only be mounted over - // by other directories. - if (bfs_might_be_mount(state->mtab, ftwbuf->path)) { - return true; - } -#endif - } - - return false; -} - -/** Initialize bftw_stat cache. */ -static void bftw_stat_init(struct bftw_stat *cache) { - cache->buf = NULL; - cache->error = 0; -} - -/** - * Open a file if necessary. - * - * @param file - * The file to open. - * @param path - * The path to that file or one of its descendants. - * @return - * The opened file descriptor, or -1 on error. - */ -static int bftw_ensure_open(struct bftw_cache *cache, struct bftw_file *file, const char *path) { - int ret = file->fd; - - if (ret < 0) { - char *copy = strndup(path, file->nameoff + file->namelen); - if (!copy) { - return -1; - } - - ret = bftw_file_open(cache, file, copy); - free(copy); - } - - return ret; -} - -/** - * Initialize the buffers with data about the current path. - */ -static void bftw_init_ftwbuf(struct bftw_state *state, enum bftw_visit visit) { - struct bftw_file *file = state->file; - const struct bfs_dirent *de = state->de; - - struct BFTW *ftwbuf = &state->ftwbuf; - ftwbuf->path = state->path; - ftwbuf->root = file ? file->root->name : ftwbuf->path; - ftwbuf->depth = 0; - ftwbuf->visit = visit; - ftwbuf->type = BFS_UNKNOWN; - ftwbuf->error = state->direrror; - ftwbuf->at_fd = AT_FDCWD; - ftwbuf->at_path = ftwbuf->path; - ftwbuf->stat_flags = BFS_STAT_NOFOLLOW; - bftw_stat_init(&ftwbuf->lstat_cache); - bftw_stat_init(&ftwbuf->stat_cache); - - struct bftw_file *parent = NULL; - if (de) { - parent = file; - ftwbuf->depth = file->depth + 1; - ftwbuf->type = de->type; - ftwbuf->nameoff = bftw_child_nameoff(file); - } else if (file) { - parent = file->parent; - ftwbuf->depth = file->depth; - ftwbuf->type = file->type; - ftwbuf->nameoff = file->nameoff; - } - - if (parent) { - // Try to ensure the immediate parent is open, to avoid ENAMETOOLONG - if (bftw_ensure_open(&state->cache, parent, state->path) >= 0) { - ftwbuf->at_fd = parent->fd; - ftwbuf->at_path += ftwbuf->nameoff; - } else { - ftwbuf->error = errno; - } - } - - if (ftwbuf->depth == 0) { - // Compute the name offset for root paths like "foo/bar" - ftwbuf->nameoff = xbasename(ftwbuf->path) - ftwbuf->path; - } - - if (ftwbuf->error != 0) { - ftwbuf->type = BFS_ERROR; - return; - } - - int follow_flags = BFTW_FOLLOW_ALL; - if (ftwbuf->depth == 0) { - follow_flags |= BFTW_FOLLOW_ROOTS; - } - bool follow = state->flags & follow_flags; - if (follow) { - ftwbuf->stat_flags = BFS_STAT_TRYFOLLOW; - } - - const struct bfs_stat *statbuf = NULL; - if (bftw_need_stat(state)) { - statbuf = bftw_stat(ftwbuf, ftwbuf->stat_flags); - if (statbuf) { - ftwbuf->type = bfs_mode_to_type(statbuf->mode); - } else { - ftwbuf->type = BFS_ERROR; - ftwbuf->error = errno; - return; - } - } - - if (ftwbuf->type == BFS_DIR && (state->flags & BFTW_DETECT_CYCLES)) { - for (const struct bftw_file *ancestor = parent; ancestor; ancestor = ancestor->parent) { - if (ancestor->dev == statbuf->dev && ancestor->ino == statbuf->ino) { - ftwbuf->type = BFS_ERROR; - ftwbuf->error = ELOOP; - return; - } - } - } -} - -/** Check if the current file is a mount point. */ -static bool bftw_is_mount(struct bftw_state *state, const char *name) { - const struct bftw_file *file = state->file; - if (!file) { - return false; - } - - const struct bftw_file *parent = name ? file : file->parent; - if (!parent) { - return false; - } - - const struct BFTW *ftwbuf = &state->ftwbuf; - const struct bfs_stat *statbuf = bftw_stat(ftwbuf, ftwbuf->stat_flags); - return statbuf && statbuf->dev != parent->dev; -} - -/** Fill file identity information from an ftwbuf. */ -static void bftw_fill_id(struct bftw_file *file, const struct BFTW *ftwbuf) { - const struct bfs_stat *statbuf = ftwbuf->stat_cache.buf; - if (!statbuf || (ftwbuf->stat_flags & BFS_STAT_NOFOLLOW)) { - statbuf = ftwbuf->lstat_cache.buf; - } - if (statbuf) { - file->dev = statbuf->dev; - file->ino = statbuf->ino; - } -} - -/** - * Visit a path, invoking the callback. - */ -static enum bftw_action bftw_visit(struct bftw_state *state, const char *name, enum bftw_visit visit) { - if (bftw_update_path(state, name) != 0) { - state->error = errno; - return BFTW_STOP; - } - - const struct BFTW *ftwbuf = &state->ftwbuf; - bftw_init_ftwbuf(state, visit); - - // Never give the callback BFS_ERROR unless BFTW_RECOVER is specified - if (ftwbuf->type == BFS_ERROR && !(state->flags & BFTW_RECOVER)) { - state->error = ftwbuf->error; - return BFTW_STOP; - } - - if ((state->flags & BFTW_SKIP_MOUNTS) && bftw_is_mount(state, name)) { - return BFTW_PRUNE; - } - - enum bftw_action ret = state->callback(ftwbuf, state->ptr); - switch (ret) { - case BFTW_CONTINUE: - break; - case BFTW_PRUNE: - case BFTW_STOP: - goto done; - default: - state->error = EINVAL; - return BFTW_STOP; - } - - if (visit != BFTW_PRE || ftwbuf->type != BFS_DIR) { - ret = BFTW_PRUNE; - goto done; - } - - if ((state->flags & BFTW_PRUNE_MOUNTS) && bftw_is_mount(state, name)) { - ret = BFTW_PRUNE; - goto done; - } - -done: - if (state->file && !name) { - bftw_fill_id(state->file, ftwbuf); - } - - return ret; -} - -/** - * Push a new file onto the queue. - */ -static int bftw_push(struct bftw_state *state, const char *name, bool fill_id) { - struct bftw_file *parent = state->file; - struct bftw_file *file = bftw_file_new(parent, name); - if (!file) { - state->error = errno; - return -1; - } - - if (state->de) { - file->type = state->de->type; - } - - if (fill_id) { - bftw_fill_id(file, &state->ftwbuf); - } - - bftw_queue_push(&state->queue, file); - - return 0; -} - -/** - * Build the path to the current file. - */ -static int bftw_build_path(struct bftw_state *state) { - const struct bftw_file *file = state->file; - - size_t pathlen = file->nameoff + file->namelen; - if (dstresize(&state->path, pathlen) != 0) { - state->error = errno; - return -1; - } - - // Try to find a common ancestor with the existing path - const struct bftw_file *ancestor = state->previous; - while (ancestor && ancestor->depth > file->depth) { - ancestor = ancestor->parent; - } - - // Build the path backwards - while (file && file != ancestor) { - if (file->nameoff > 0) { - state->path[file->nameoff - 1] = '/'; - } - memcpy(state->path + file->nameoff, file->name, file->namelen); - - if (ancestor && ancestor->depth == file->depth) { - ancestor = ancestor->parent; - } - file = file->parent; - } - - state->previous = state->file; - return 0; -} - -/** - * Pop the next file from the queue. - */ -static int bftw_pop(struct bftw_state *state) { - if (!state->queue.head) { - return 0; - } - - state->file = bftw_queue_pop(&state->queue); - - if (bftw_build_path(state) != 0) { - return -1; - } - - return 1; -} - -/** - * Open the current directory. - */ -static void bftw_opendir(struct bftw_state *state) { - assert(!state->dir); - assert(!state->de); - - state->direrror = 0; - - state->dir = bftw_file_opendir(&state->cache, state->file, state->path); - if (!state->dir) { - state->direrror = errno; - } -} - -/** - * Read an entry from the current directory. - */ -static int bftw_readdir(struct bftw_state *state) { - if (!state->dir) { - return -1; - } - - int ret = bfs_readdir(state->dir, &state->de_storage); - if (ret > 0) { - state->de = &state->de_storage; - } else if (ret == 0) { - state->de = NULL; - } else { - state->de = NULL; - state->direrror = errno; - } - - return ret; -} - -/** - * Flags controlling which files get visited when done with a directory. - */ -enum bftw_gc_flags { - /** Don't visit anything. */ - BFTW_VISIT_NONE = 0, - /** Visit the file itself. */ - BFTW_VISIT_FILE = 1 << 0, - /** Visit the file's ancestors. */ - BFTW_VISIT_PARENTS = 1 << 1, - /** Visit both the file and its ancestors. */ - BFTW_VISIT_ALL = BFTW_VISIT_FILE | BFTW_VISIT_PARENTS, -}; - -/** - * Close the current directory. - */ -static enum bftw_action bftw_closedir(struct bftw_state *state, enum bftw_gc_flags flags) { - struct bftw_file *file = state->file; - enum bftw_action ret = BFTW_CONTINUE; - - if (state->dir) { - assert(file->fd >= 0); - - if (file->refcount > 1) { - // Keep the fd around if any subdirectories exist - file->fd = bfs_freedir(state->dir); - } else { - bfs_closedir(state->dir); - file->fd = -1; - } - - if (file->fd < 0) { - bftw_cache_remove(&state->cache, file); - } - } - - state->de = NULL; - state->dir = NULL; - - if (state->direrror != 0) { - if (flags & BFTW_VISIT_FILE) { - ret = bftw_visit(state, NULL, BFTW_PRE); - } else { - state->error = state->direrror; - } - state->direrror = 0; - } - - return ret; -} - -/** - * Finalize and free a file we're done with. - */ -static enum bftw_action bftw_gc_file(struct bftw_state *state, enum bftw_gc_flags flags) { - enum bftw_action ret = BFTW_CONTINUE; - - if (!(state->flags & BFTW_POST_ORDER)) { - flags = 0; - } - bool visit = flags & BFTW_VISIT_FILE; - - while (state->file) { - struct bftw_file *file = state->file; - if (--file->refcount > 0) { - state->file = NULL; - break; - } - - if (visit && bftw_visit(state, NULL, BFTW_POST) == BFTW_STOP) { - ret = BFTW_STOP; - flags &= ~BFTW_VISIT_PARENTS; - } - visit = flags & BFTW_VISIT_PARENTS; - - struct bftw_file *parent = file->parent; - if (state->previous == file) { - state->previous = parent; - } - bftw_file_free(&state->cache, file); - state->file = parent; - } - - return ret; -} - -/** - * Drain all the entries from a bftw_queue. - */ -static void bftw_drain_queue(struct bftw_state *state, struct bftw_queue *queue) { - while (queue->head) { - state->file = bftw_queue_pop(queue); - bftw_gc_file(state, BFTW_VISIT_NONE); - } -} - -/** - * Dispose of the bftw() state. - * - * @return - * The bftw() return value. - */ -static int bftw_state_destroy(struct bftw_state *state) { - dstrfree(state->path); - - bftw_closedir(state, BFTW_VISIT_NONE); - - bftw_gc_file(state, BFTW_VISIT_NONE); - bftw_drain_queue(state, &state->queue); - - bftw_cache_destroy(&state->cache); - - errno = state->error; - return state->error ? -1 : 0; -} - -/** Start a batch of files. */ -static void bftw_batch_start(struct bftw_state *state) { - if (state->strategy == BFTW_DFS) { - state->queue.target = &state->queue.head; - } - state->batch = state->queue.target; -} - -/** Finish adding a batch of files. */ -static void bftw_batch_finish(struct bftw_state *state) { - if (state->flags & BFTW_SORT) { - state->queue.target = bftw_sort_files(state->batch, state->queue.target); - } -} - -/** - * Streaming mode: visit files as they are encountered. - */ -static int bftw_stream(const struct bftw_args *args) { - struct bftw_state state; - if (bftw_state_init(&state, args) != 0) { - return -1; - } - - assert(!(state.flags & (BFTW_SORT | BFTW_BUFFER))); - - bftw_batch_start(&state); - for (size_t i = 0; i < args->npaths; ++i) { - const char *path = args->paths[i]; - - switch (bftw_visit(&state, path, BFTW_PRE)) { - case BFTW_CONTINUE: - break; - case BFTW_PRUNE: - continue; - case BFTW_STOP: - goto done; - } - - if (bftw_push(&state, path, true) != 0) { - goto done; - } - } - bftw_batch_finish(&state); - - while (bftw_pop(&state) > 0) { - bftw_opendir(&state); - - bftw_batch_start(&state); - while (bftw_readdir(&state) > 0) { - const char *name = state.de->name; - - switch (bftw_visit(&state, name, BFTW_PRE)) { - case BFTW_CONTINUE: - break; - case BFTW_PRUNE: - continue; - case BFTW_STOP: - goto done; - } - - if (bftw_push(&state, name, true) != 0) { - goto done; - } - } - bftw_batch_finish(&state); - - if (bftw_closedir(&state, BFTW_VISIT_ALL) == BFTW_STOP) { - goto done; - } - if (bftw_gc_file(&state, BFTW_VISIT_ALL) == BFTW_STOP) { - goto done; - } - } - -done: - return bftw_state_destroy(&state); -} - -/** - * Batching mode: queue up all children before visiting them. - */ -static int bftw_batch(const struct bftw_args *args) { - struct bftw_state state; - if (bftw_state_init(&state, args) != 0) { - return -1; - } - - bftw_batch_start(&state); - for (size_t i = 0; i < args->npaths; ++i) { - if (bftw_push(&state, args->paths[i], false) != 0) { - goto done; - } - } - bftw_batch_finish(&state); - - while (bftw_pop(&state) > 0) { - enum bftw_gc_flags gcflags = BFTW_VISIT_ALL; - - switch (bftw_visit(&state, NULL, BFTW_PRE)) { - case BFTW_CONTINUE: - break; - case BFTW_PRUNE: - gcflags &= ~BFTW_VISIT_FILE; - goto next; - case BFTW_STOP: - goto done; - } - - bftw_opendir(&state); - - bftw_batch_start(&state); - while (bftw_readdir(&state) > 0) { - if (bftw_push(&state, state.de->name, false) != 0) { - goto done; - } - } - bftw_batch_finish(&state); - - if (bftw_closedir(&state, gcflags) == BFTW_STOP) { - goto done; - } - - next: - if (bftw_gc_file(&state, gcflags) == BFTW_STOP) { - goto done; - } - } - -done: - return bftw_state_destroy(&state); -} - -/** Select bftw_stream() or bftw_batch() appropriately. */ -static int bftw_auto(const struct bftw_args *args) { - if (args->flags & (BFTW_SORT | BFTW_BUFFER)) { - return bftw_batch(args); - } else { - return bftw_stream(args); - } -} - -/** - * Iterative deepening search state. - */ -struct bftw_ids_state { - /** The wrapped callback. */ - bftw_callback *delegate; - /** The wrapped callback arguments. */ - void *ptr; - /** Which visit this search corresponds to. */ - enum bftw_visit visit; - /** Whether to override the bftw_visit. */ - bool force_visit; - /** The current minimum depth (inclusive). */ - size_t min_depth; - /** The current maximum depth (exclusive). */ - size_t max_depth; - /** The set of pruned paths. */ - struct trie pruned; - /** An error code to report. */ - int error; - /** Whether the bottom has been found. */ - bool bottom; - /** Whether to quit the search. */ - bool quit; -}; - -/** Iterative deepening callback function. */ -static enum bftw_action bftw_ids_callback(const struct BFTW *ftwbuf, void *ptr) { - struct bftw_ids_state *state = ptr; - - if (state->force_visit) { - struct BFTW *mutbuf = (struct BFTW *)ftwbuf; - mutbuf->visit = state->visit; - } - - if (ftwbuf->type == BFS_ERROR) { - if (ftwbuf->depth + 1 >= state->min_depth) { - return state->delegate(ftwbuf, state->ptr); - } else { - return BFTW_PRUNE; - } - } - - if (ftwbuf->depth < state->min_depth) { - if (trie_find_str(&state->pruned, ftwbuf->path)) { - return BFTW_PRUNE; - } else { - return BFTW_CONTINUE; - } - } else if (state->visit == BFTW_POST) { - if (trie_find_str(&state->pruned, ftwbuf->path)) { - return BFTW_PRUNE; - } - } - - enum bftw_action ret = BFTW_CONTINUE; - if (ftwbuf->visit == state->visit) { - ret = state->delegate(ftwbuf, state->ptr); - } - - switch (ret) { - case BFTW_CONTINUE: - if (ftwbuf->type == BFS_DIR && ftwbuf->depth + 1 >= state->max_depth) { - state->bottom = false; - ret = BFTW_PRUNE; - } - break; - case BFTW_PRUNE: - if (ftwbuf->type == BFS_DIR) { - if (!trie_insert_str(&state->pruned, ftwbuf->path)) { - state->error = errno; - state->quit = true; - ret = BFTW_STOP; - } - } - break; - case BFTW_STOP: - state->quit = true; - break; - } - - return ret; -} - -/** Initialize iterative deepening state. */ -static void bftw_ids_init(const struct bftw_args *args, struct bftw_ids_state *state, struct bftw_args *ids_args) { - state->delegate = args->callback; - state->ptr = args->ptr; - state->visit = BFTW_PRE; - state->force_visit = false; - state->min_depth = 0; - state->max_depth = 1; - trie_init(&state->pruned); - state->error = 0; - state->bottom = false; - state->quit = false; - - *ids_args = *args; - ids_args->callback = bftw_ids_callback; - ids_args->ptr = state; - ids_args->flags &= ~BFTW_POST_ORDER; - ids_args->strategy = BFTW_DFS; -} - -/** Finish an iterative deepening search. */ -static int bftw_ids_finish(struct bftw_ids_state *state) { - int ret = 0; - - if (state->error) { - ret = -1; - } else { - state->error = errno; - } - - trie_destroy(&state->pruned); - - errno = state->error; - return ret; -} - -/** - * Iterative deepening bftw() wrapper. - */ -static int bftw_ids(const struct bftw_args *args) { - struct bftw_ids_state state; - struct bftw_args ids_args; - bftw_ids_init(args, &state, &ids_args); - - while (!state.quit && !state.bottom) { - state.bottom = true; - - if (bftw_auto(&ids_args) != 0) { - state.error = errno; - state.quit = true; - } - - ++state.min_depth; - ++state.max_depth; - } - - if (args->flags & BFTW_POST_ORDER) { - state.visit = BFTW_POST; - state.force_visit = true; - - while (!state.quit && state.min_depth > 0) { - --state.max_depth; - --state.min_depth; - - if (bftw_auto(&ids_args) != 0) { - state.error = errno; - state.quit = true; - } - } - } - - return bftw_ids_finish(&state); -} - -/** - * Exponential deepening bftw() wrapper. - */ -static int bftw_eds(const struct bftw_args *args) { - struct bftw_ids_state state; - struct bftw_args ids_args; - bftw_ids_init(args, &state, &ids_args); - - while (!state.quit && !state.bottom) { - state.bottom = true; - - if (bftw_auto(&ids_args) != 0) { - state.error = errno; - state.quit = true; - } - - state.min_depth = state.max_depth; - state.max_depth *= 2; - } - - if (!state.quit && (args->flags & BFTW_POST_ORDER)) { - state.visit = BFTW_POST; - state.min_depth = 0; - ids_args.flags |= BFTW_POST_ORDER; - - if (bftw_auto(&ids_args) != 0) { - state.error = errno; - } - } - - return bftw_ids_finish(&state); -} - -int bftw(const struct bftw_args *args) { - switch (args->strategy) { - case BFTW_BFS: - return bftw_auto(args); - case BFTW_DFS: - return bftw_batch(args); - case BFTW_IDS: - return bftw_ids(args); - case BFTW_EDS: - return bftw_eds(args); - } - - errno = EINVAL; - return -1; -} |