From af3bfcccc484ace662c78f053f633bc883c73152 Mon Sep 17 00:00:00 2001
From: Tavian Barnes <tavianator@tavianator.com>
Date: Fri, 21 Feb 2025 13:38:16 -0500
Subject: Micro-optimize word-at-a-time loop tails

Compilers apparently aren't smart enough to infer that the odd tail
length fixup "loops" run at most once, and could be converted to ifs, so
do that manually.
---
 src/trie.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

(limited to 'src/trie.c')

diff --git a/src/trie.c b/src/trie.c
index c4bf4ba..4e0944a 100644
--- a/src/trie.c
+++ b/src/trie.c
@@ -408,7 +408,7 @@ static size_t trie_mismatch(const struct trie_leaf *rep, const void *key, size_t
 
 #define CHUNK(n) CHUNK_(uint##n##_t, load8_beu##n)
 #define CHUNK_(type, load8) \
-	while (length - i >= sizeof(type)) { \
+	(length - i >= sizeof(type)) { \
 		type rep_chunk = load8(rep_bytes + i); \
 		type key_chunk = load8(key_bytes + i); \
 		type diff = rep_chunk ^ key_chunk; \
@@ -420,11 +420,13 @@ static size_t trie_mismatch(const struct trie_leaf *rep, const void *key, size_t
 	}
 
 #if SIZE_WIDTH >= 64
-	CHUNK(64);
+	while CHUNK(64);
+	if CHUNK(32);
+#else
+	while CHUNK(32);
 #endif
-	CHUNK(32);
-	CHUNK(16);
-	CHUNK(8);
+	if CHUNK(16);
+	if CHUNK(8);
 
 #undef CHUNK_
 #undef CHUNK
-- 
cgit v1.2.3