summaryrefslogtreecommitdiffstats
path: root/src/ioq.c
diff options
context:
space:
mode:
authorTavian Barnes <tavianator@tavianator.com>2024-11-25 15:24:16 -0500
committerTavian Barnes <tavianator@tavianator.com>2024-12-03 14:42:05 -0500
commitcf197cada461d1d442458cbebdd2bb8ba314692e (patch)
tree6f7f2a94231abdc6b9944de9d737b256293b5c5b /src/ioq.c
parent3678c2ee7c11d67f4ea97c85d8564cd386a32bd1 (diff)
downloadbfs-cf197cada461d1d442458cbebdd2bb8ba314692e.tar.xz
ioq: Prefetch pointers before popping them
Also, cache-align struct ioq_ent to avoid false sharing when two workers are handling neighbouring requests.
Diffstat (limited to 'src/ioq.c')
-rw-r--r--src/ioq.c8
1 files changed, 8 insertions, 0 deletions
diff --git a/src/ioq.c b/src/ioq.c
index 5668a83..017b6c1 100644
--- a/src/ioq.c
+++ b/src/ioq.c
@@ -356,6 +356,14 @@ static bool ioq_slot_push(struct ioqq *ioqq, ioq_slot *slot, struct ioq_ent *ent
static struct ioq_ent *ioq_slot_pop(struct ioqq *ioqq, ioq_slot *slot, bool block) {
uintptr_t prev = load(slot, relaxed);
while (true) {
+#if __has_builtin(__builtin_prefetch)
+ // Optimistically prefetch the pointer in this slot. If this
+ // slot is not full, this will prefetch an invalid address, but
+ // experimentally this is worth it on both Intel (Alder Lake)
+ // and AMD (Zen 2).
+ __builtin_prefetch((void *)(prev << 1));
+#endif
+
// empty → skip(1)
// skip(n) → skip(n + 1)
// full(ptr) → full(ptr - 1)