diff options
Diffstat (limited to 'bench')
-rw-r--r-- | bench/bench.sh | 41 | ||||
-rw-r--r-- | bench/ioq.c | 238 |
2 files changed, 225 insertions, 54 deletions
diff --git a/bench/bench.sh b/bench/bench.sh index f249ffc..c9ed978 100644 --- a/bench/bench.sh +++ b/bench/bench.sh @@ -22,6 +22,7 @@ PRINT_DEFAULT=(linux) STRATEGIES_DEFAULT=(rust) JOBS_DEFAULT=(rust) EXEC_DEFAULT=(linux) +SORTED_DEFAULT=(chromium) usage() { printf 'Usage: tailfin run %s\n' "${BASH_SOURCE[0]}" @@ -60,6 +61,10 @@ usage() { printf ' Process spawning benchmark.\n' printf ' Default corpus is --exec=%s\n\n' "${EXEC_DEFAULT[*]}" + printf ' --sorted[=CORPUS]\n' + printf ' Sorted traversal benchmark.\n' + printf ' Default corpus is --sorted=%s\n\n' "${SORTED_DEFAULT[*]}" + printf ' --build=COMMIT\n' printf ' Build this bfs commit and benchmark it. Specify multiple times to\n' printf ' compare, e.g. --build=3.0.1 --build=3.0.2\n\n' @@ -121,6 +126,7 @@ setup() { STRATEGIES=() JOBS=() EXEC=() + SORTED=() for arg; do case "$arg" in @@ -195,6 +201,12 @@ setup() { --exec=*) read -ra EXEC <<<"${arg#*=}" ;; + --sorted) + SORTED=("${SORTED_DEFAULT[@]}") + ;; + --sorted=*) + read -ra SORTED <<<"${arg#*=}" + ;; --default) COMPLETE=("${COMPLETE_DEFAULT[@]}") EARLY_QUIT=("${EARLY_QUIT_DEFAULT[@]}") @@ -203,6 +215,7 @@ setup() { STRATEGIES=("${STRATEGIES_DEFAULT[@]}") JOBS=("${JOBS_DEFAULT[@]}") EXEC=("${EXEC_DEFAULT[@]}") + SORTED=("${SORTED_DEFAULT[@]}") ;; --help) usage @@ -227,7 +240,7 @@ setup() { as-user mkdir -p bench/corpus declare -A cloned=() - for corpus in "${COMPLETE[@]}" "${EARLY_QUIT[@]}" "${STAT[@]}" "${PRINT[@]}" "${STRATEGIES[@]}" "${JOBS[@]}" "${EXEC[@]}"; do + for corpus in "${COMPLETE[@]}" "${EARLY_QUIT[@]}" "${STAT[@]}" "${PRINT[@]}" "${STRATEGIES[@]}" "${JOBS[@]}" "${EXEC[@]}" "${SORTED[@]}"; do if ((cloned["$corpus"])); then continue fi @@ -283,6 +296,7 @@ setup() { export_array STRATEGIES export_array JOBS export_array EXEC + export_array SORTED if ((UID == 0)); then turbo-off @@ -650,6 +664,29 @@ bench-exec() { fi } +# Benchmark sorted traversal +bench-sorted-corpus() { + subgroup '%s' "$1" + + cmds=() + for bfs in "${BFS[@]}"; do + cmds+=("$bfs -s $2 -false") + done + + do-hyperfine "${cmds[@]}" +} + +# All sorted traversal benchmarks +bench-sorted() { + if (($#)); then + group "Sorted traversal" + + for corpus; do + bench-sorted-corpus "$corpus ${TAGS[$corpus]}" "bench/corpus/$corpus" + done + fi +} + # Print benchmarked versions bench-versions() { subgroup "Versions" @@ -698,6 +735,7 @@ bench() { import_array STRATEGIES import_array JOBS import_array EXEC + import_array SORTED bench-complete "${COMPLETE[@]}" bench-early-quit "${EARLY_QUIT[@]}" @@ -706,5 +744,6 @@ bench() { bench-strategies "${STRATEGIES[@]}" bench-jobs "${JOBS[@]}" bench-exec "${EXEC[@]}" + bench-sorted "${SORTED[@]}" bench-details } diff --git a/bench/ioq.c b/bench/ioq.c index 5db585a..fb9edbc 100644 --- a/bench/ioq.c +++ b/bench/ioq.c @@ -17,6 +17,149 @@ #include <time.h> #include <unistd.h> +/** A latency sample. */ +struct lat { + /** The sampled latency. */ + struct timespec time; + /** A random integer, for reservoir sampling. */ + long key; +}; + +/** Number of latency samples to keep. */ +#define SAMPLES 1000 +/** Latency sampling period. */ +#define PERIOD 128 + +/** Latency measurements. */ +struct lats { + /** Lowest observed latency. */ + struct timespec min; + /** Highest observed latency. */ + struct timespec max; + /** Total latency. */ + struct timespec sum; + /** Number of measured requests. */ + size_t count; + + /** Priority queue for reservoir sampling. */ + struct lat heap[SAMPLES]; + /** Current size of the heap. */ + size_t heap_size; +}; + +/** Initialize a latency reservoir. */ +static void lats_init(struct lats *lats) { + lats->min = (struct timespec) { .tv_sec = 1000 }; + lats->max = (struct timespec) { 0 }; + lats->sum = (struct timespec) { 0 }; + lats->count = 0; + lats->heap_size = 0; +} + +/** Binary heap parent. */ +static size_t heap_parent(size_t i) { + return (i - 1) / 2; +} + +/** Binary heap left child. */ +static size_t heap_child(size_t i) { + return 2 * i + 1; +} + +/** Binary heap smallest child. */ +static size_t heap_min_child(const struct lats *lats, size_t i) { + size_t j = heap_child(i); + size_t k = j + 1; + if (k < lats->heap_size && lats->heap[k].key < lats->heap[j].key) { + return k; + } else { + return j; + } +} + +/** Check if the heap property is met. */ +static bool heap_check(const struct lat *parent, const struct lat *child) { + return parent->key <= child->key; +} + +/** Reservoir sampling. */ +static void heap_push(struct lats *lats, const struct lat *lat) { + size_t i; + + if (lats->heap_size < SAMPLES) { + // Heapify up + i = lats->heap_size++; + while (i > 0) { + size_t j = heap_parent(i); + if (heap_check(&lats->heap[j], lat)) { + break; + } + lats->heap[i] = lats->heap[j]; + i = j; + } + } else if (lat->key > lats->heap[0].key) { + // Heapify down + i = 0; + while (true) { + size_t j = heap_min_child(lats, i); + if (j >= SAMPLES || heap_check(lat, &lats->heap[j])) { + break; + } + lats->heap[i] = lats->heap[j]; + i = j; + } + } else { + // Reject + return; + } + + lats->heap[i] = *lat; +} + +/** Add a latency sample. */ +static void lats_push(struct lats *lats, const struct timespec *ts) { + timespec_min(&lats->min, ts); + timespec_max(&lats->max, ts); + timespec_add(&lats->sum, ts); + ++lats->count; + + struct lat lat = { + .time = *ts, + .key = lrand48(), + }; + heap_push(lats, &lat); +} + +/** Merge two latency reservoirs. */ +static void lats_merge(struct lats *into, const struct lats *from) { + timespec_min(&into->min, &from->min); + timespec_max(&into->max, &from->max); + timespec_add(&into->sum, &from->sum); + into->count += from->count; + + for (size_t i = 0; i < from->heap_size; ++i) { + heap_push(into, &from->heap[i]); + } +} + +/** Latency qsort() comparator. */ +static int lat_cmp(const void *a, const void *b) { + const struct lat *la = a; + const struct lat *lb = b; + return timespec_cmp(&la->time, &lb->time); +} + +/** Sort the latency reservoir. */ +static void lats_sort(struct lats *lats) { + qsort(lats->heap, lats->heap_size, sizeof(lats->heap[0]), lat_cmp); +} + +/** Get the nth percentile. */ +static const struct timespec *lats_percentile(const struct lats *lats, int percent) { + size_t i = lats->heap_size * percent / 100; + return &lats->heap[i].time; +} + /** Which clock to use for benchmarking. */ static clockid_t clockid = CLOCK_REALTIME; @@ -38,86 +181,71 @@ struct times { /** Total requests finished. */ size_t popped; - /** Number of timed requests (latency). */ - size_t timed_reqs; /** The start time for the currently tracked request. */ struct timespec req_start; /** Whether a timed request is currently in flight. */ bool timing; /** Latency measurements. */ - struct { - struct timespec min; - struct timespec max; - struct timespec sum; - } latency; + struct lats lats; }; /** Initialize a timer. */ static void times_init(struct times *times) { - *times = (struct times) { - .latency = { - .min = { .tv_sec = 1000 }, - }, - }; gettime(×->start); -} - -/** Start timing a single request. */ -static void start_request(struct times *times) { - gettime(×->req_start); - times->timing = true; + times->pushed = 0; + times->popped = 0; + bfs_assert(!times->timing); + lats_init(×->lats); } /** Finish timing a request. */ -static void finish_request(struct times *times) { +static void track_latency(struct times *times) { struct timespec elapsed; gettime(&elapsed); timespec_sub(&elapsed, ×->req_start); - - timespec_min(×->latency.min, &elapsed); - timespec_max(×->latency.max, &elapsed); - timespec_add(×->latency.sum, &elapsed); + lats_push(×->lats, &elapsed); bfs_assert(times->timing); times->timing = false; - ++times->timed_reqs; } /** Add times to the totals, and reset the lap times. */ static void times_lap(struct times *total, struct times *lap) { total->pushed += lap->pushed; total->popped += lap->popped; - total->timed_reqs += lap->timed_reqs; - - timespec_min(&total->latency.min, &lap->latency.min); - timespec_max(&total->latency.max, &lap->latency.max); - timespec_add(&total->latency.sum, &lap->latency.sum); + lats_merge(&total->lats, &lap->lats); times_init(lap); } /** Print some times. */ -static void times_print(const struct times *times, long seconds) { +static void times_print(struct times *times, long seconds) { struct timespec elapsed; gettime(&elapsed); timespec_sub(&elapsed, ×->start); double fsec = timespec_ns(&elapsed) / 1.0e9; - double iops = times->popped / fsec; - double mean = timespec_ns(×->latency.sum) / times->timed_reqs; - double min = timespec_ns(×->latency.min); - double max = timespec_ns(×->latency.max); if (seconds > 0) { - printf("%9ld", seconds); + printf("%5ld", seconds); } else if (elapsed.tv_nsec >= 10 * 1000 * 1000) { - printf("%9.2f", fsec); + printf("%5.2f", fsec); } else { - printf("%9.0f", fsec); + printf("%5.0f", fsec); } - printf(" │ %'17.0f │ %'15.0f ∈ [%'6.0f .. %'7.0f]\n", iops, mean, min, max); + double iops = times->popped / fsec; + double mean = timespec_ns(×->lats.sum) / times->lats.count; + double min = timespec_ns(×->lats.min); + double max = timespec_ns(×->lats.max); + + lats_sort(×->lats); + double n50 = timespec_ns(lats_percentile(×->lats, 50)); + double n90 = timespec_ns(lats_percentile(×->lats, 90)); + double n99 = timespec_ns(lats_percentile(×->lats, 99)); + + printf(" │ %'12.0f │ %'7.0f │ %'7.0f │ %'7.0f │ %'7.0f │ %'7.0f │ %'7.0f\n", iops, mean, min, n50, n90, n99, max); fflush(stdout); } @@ -126,9 +254,9 @@ static bool push(struct ioq *ioq, enum ioq_nop_type type, struct times *lap) { void *ptr = NULL; // Track latency for a small fraction of requests - if (!lap->timing && (lap->pushed + 1) % 128 == 0) { - start_request(lap); + if (!lap->timing && (lap->pushed + 1) % PERIOD == 0) { ptr = lap; + gettime(&lap->req_start); } int ret = ioq_nop(ioq, type, ptr); @@ -138,6 +266,9 @@ static bool push(struct ioq *ioq, enum ioq_nop_type type, struct times *lap) { } ++lap->pushed; + if (ptr) { + lap->timing = true; + } return true; } @@ -149,7 +280,7 @@ static bool pop(struct ioq *ioq, struct times *lap, bool block) { } if (ent->ptr) { - finish_request(lap); + track_latency(lap); } ioq_free(ioq, ent); @@ -177,9 +308,9 @@ int main(int argc, char *argv[]) { setlocale(LC_ALL, ""); // -d: queue depth - long depth = 4096; + unsigned int depth = 4096; // -j: threads - long threads = 0; + unsigned int threads = 0; // -t: timeout double timeout = 5.0; // -L|-H: ioq_nop() type @@ -190,13 +321,13 @@ int main(int argc, char *argv[]) { while (c = getopt(argc, argv, ":d:j:t:LH"), c != -1) { switch (c) { case 'd': - if (xstrtol(optarg, NULL, 10, &depth) != 0) { + if (xstrtoui(optarg, NULL, 10, &depth) != 0) { fprintf(stderr, "%s: Bad depth '%s': %s\n", cmd, optarg, errstr()); return EXIT_FAILURE; } break; case 'j': - if (xstrtol(optarg, NULL, 10, &threads) != 0) { + if (xstrtoui(optarg, NULL, 10, &threads) != 0) { fprintf(stderr, "%s: Bad thread count '%s': %s\n", cmd, optarg, errstr()); return EXIT_FAILURE; } @@ -222,7 +353,7 @@ int main(int argc, char *argv[]) { } } - if (threads <= 0) { + if (!threads) { threads = nproc(); if (threads > 8) { threads = 8; @@ -238,8 +369,8 @@ int main(int argc, char *argv[]) { printf("I/O queue benchmark (%s)\n\n", bfs_version); - printf("[-d] depth: %ld\n", depth); - printf("[-j] threads: %ld (including main)\n", threads + 1); + printf("[-d] depth: %u\n", depth); + printf("[-j] threads: %u (including main)\n", threads + 1); if (type == IOQ_NOP_HEAVY) { printf("[-H] type: heavy (with syscalls)\n"); } else { @@ -247,12 +378,13 @@ int main(int argc, char *argv[]) { } printf("\n"); - printf(" Time (s) │ Throughput (IO/s) │ Latency (ns/IO)\n"); - printf("══════════╪═══════════════════╪═════════════════\n"); + printf(" Time │ Throughput │ Latency │ min │ 50%% │ 90%% │ 99%% │ max\n"); + printf(" (s) │ (IO/s) │ (ns/IO) │ │ │ │ │\n"); + printf("══════╪══════════════╪═════════╪═════════╪═════════╪═════════╪═════════╪═════════\n"); fflush(stdout); struct ioq *ioq = ioq_create(depth, threads); - bfs_everify(ioq, "ioq_create(%ld, %ld)", depth, threads); + bfs_everify(ioq, "ioq_create(%u, %u)", depth, threads); // Pre-allocate all the requests while (ioq_capacity(ioq) > 0) { @@ -311,9 +443,9 @@ int main(int argc, char *argv[]) { times_lap(&total, &lap); if (load(&quit, relaxed)) { - printf("\r────^C────┼───────────────────┼─────────────────\n"); + printf("\r──^C──┼──────────────┼─────────┼─────────┼─────────┼─────────┼─────────┼─────────\n"); } else { - printf("──────────┼───────────────────┼─────────────────\n"); + printf("──────┼──────────────┼─────────┼─────────┼─────────┼─────────┼─────────┼─────────\n"); } times_print(&total, 0); |