diff options
author | Tavian Barnes <tavianator@tavianator.com> | 2025-02-21 13:38:16 -0500 |
---|---|---|
committer | Tavian Barnes <tavianator@tavianator.com> | 2025-02-21 13:38:16 -0500 |
commit | af3bfcccc484ace662c78f053f633bc883c73152 (patch) | |
tree | 386ebca53ab56b405103c2c7c519ca2dad2e7c5f | |
parent | 504dd8e9a551c75d6e83ab0d328a2d482d8b48d5 (diff) | |
download | bfs-af3bfcccc484ace662c78f053f633bc883c73152.tar.xz |
Micro-optimize word-at-a-time loop tails
Compilers apparently aren't smart enough to infer that the odd tail
length fixup "loops" run at most once, and could be converted to ifs, so
do that manually.
-rw-r--r-- | src/bfstd.c | 12 | ||||
-rw-r--r-- | src/trie.c | 12 |
2 files changed, 14 insertions, 10 deletions
diff --git a/src/bfstd.c b/src/bfstd.c index f2938ad..219b8d0 100644 --- a/src/bfstd.c +++ b/src/bfstd.c @@ -786,7 +786,7 @@ size_t asciinlen(const char *str, size_t n) { // Word-at-a-time isascii() #define CHUNK(n) CHUNK_(uint##n##_t, load8_leu##n) #define CHUNK_(type, load8) \ - while (n - i >= sizeof(type)) { \ + (n - i >= sizeof(type)) { \ type word = load8(ustr + i); \ type mask = (((type)-1) / 0xFF) << 7; /* 0x808080.. */ \ word &= mask; \ @@ -797,11 +797,13 @@ size_t asciinlen(const char *str, size_t n) { } #if SIZE_WIDTH >= 64 - CHUNK(64); + while CHUNK(64); + if CHUNK(32); +#else + while CHUNK(32); #endif - CHUNK(32); - CHUNK(16); - CHUNK(8); + if CHUNK(16); + if CHUNK(8); #undef CHUNK_ #undef CHUNK @@ -408,7 +408,7 @@ static size_t trie_mismatch(const struct trie_leaf *rep, const void *key, size_t #define CHUNK(n) CHUNK_(uint##n##_t, load8_beu##n) #define CHUNK_(type, load8) \ - while (length - i >= sizeof(type)) { \ + (length - i >= sizeof(type)) { \ type rep_chunk = load8(rep_bytes + i); \ type key_chunk = load8(key_bytes + i); \ type diff = rep_chunk ^ key_chunk; \ @@ -420,11 +420,13 @@ static size_t trie_mismatch(const struct trie_leaf *rep, const void *key, size_t } #if SIZE_WIDTH >= 64 - CHUNK(64); + while CHUNK(64); + if CHUNK(32); +#else + while CHUNK(32); #endif - CHUNK(32); - CHUNK(16); - CHUNK(8); + if CHUNK(16); + if CHUNK(8); #undef CHUNK_ #undef CHUNK |