diff options
Diffstat (limited to 'src/trie.c')
-rw-r--r-- | src/trie.c | 123 |
1 files changed, 78 insertions, 45 deletions
@@ -129,7 +129,7 @@ struct trie_node { * tag to distinguish internal nodes from leaves. This is safe as long * as all dynamic allocations are aligned to more than a single byte. */ - uintptr_t children[]; + uintptr_t children[]; // _counted_by(count_ones(bitmap)) }; /** Check if an encoded pointer is to an internal node. */ @@ -173,16 +173,16 @@ void trie_init(struct trie *trie) { /** Extract the nibble at a certain offset from a byte sequence. */ static unsigned char trie_key_nibble(const void *key, size_t length, size_t offset) { const unsigned char *bytes = key; - size_t byte = offset >> 1; + size_t byte = offset / 2; bfs_assert(byte < length); // A branchless version of // if (offset & 1) { - // return bytes[byte] >> 4; - // } else { // return bytes[byte] & 0xF; + // } else { + // return bytes[byte] >> 4; // } - unsigned int shift = (offset & 1) << 2; + unsigned int shift = 4 * ((offset + 1) % 2); return (bytes[byte] >> shift) & 0xF; } @@ -191,6 +191,12 @@ static unsigned char trie_leaf_nibble(const struct trie_leaf *leaf, size_t offse return trie_key_nibble(leaf->key, leaf->length, offset); } +/** Get the number of children of an internal node. */ +_trie_clones +static unsigned int trie_node_size(const struct trie_node *node) { + return count_ones((unsigned int)node->bitmap); +} + /** * Finds a leaf in the trie that matches the key at every branch. If the key * exists in the trie, the representative will match the searched key. But @@ -202,19 +208,20 @@ _trie_clones static struct trie_leaf *trie_representative(const struct trie *trie, const void *key, size_t length) { uintptr_t ptr = trie->root; - size_t offset = 0; + size_t offset = 0, limit = 2 * length; while (trie_is_node(ptr)) { struct trie_node *node = trie_decode_node(ptr); offset += node->offset; unsigned int index = 0; - if ((offset >> 1) < length) { + if (offset < limit) { unsigned char nibble = trie_key_nibble(key, length, offset); unsigned int bit = 1U << nibble; - // bits = bitmap & bit ? bitmap & (bit - 1) : 0 - unsigned int mask = -!!(node->bitmap & bit); - unsigned int bits = node->bitmap & (bit - 1) & mask; - index = count_ones(bits); + unsigned int map = node->bitmap; + unsigned int bits = map & (bit - 1); + unsigned int mask = -!!(map & bit); + // index = (map & bit) ? count_ones(bits) : 0; + index = count_ones(bits) & mask; } ptr = node->children[index]; } @@ -240,6 +247,16 @@ struct trie_leaf *trie_find_mem(const struct trie *trie, const void *key, size_t return trie_find_mem_impl(trie, key, length); } +void *trie_get_str(const struct trie *trie, const char *key) { + const struct trie_leaf *leaf = trie_find_str(trie, key); + return leaf ? leaf->value : NULL; +} + +void *trie_get_mem(const struct trie *trie, const void *key, size_t length) { + const struct trie_leaf *leaf = trie_find_mem(trie, key, length); + return leaf ? leaf->value : NULL; +} + _trie_clones static struct trie_leaf *trie_find_postfix_impl(const struct trie *trie, const char *key) { size_t length = strlen(key); @@ -296,18 +313,18 @@ static struct trie_leaf *trie_find_prefix_impl(const struct trie *trie, const ch size_t skip = 0; size_t length = strlen(key) + 1; - size_t offset = 0; + size_t offset = 0, limit = 2 * length; while (trie_is_node(ptr)) { struct trie_node *node = trie_decode_node(ptr); offset += node->offset; - if ((offset >> 1) >= length) { + if (offset >= limit) { return best; } struct trie_leaf *leaf = trie_terminal_leaf(node); if (trie_check_prefix(leaf, skip, key, length)) { best = leaf; - skip = offset >> 1; + skip = offset / 2; } unsigned char nibble = trie_key_nibble(key, length, offset); @@ -370,16 +387,10 @@ static struct trie_node *trie_node_realloc(struct trie *trie, struct trie_node * /** Free a node. */ static void trie_node_free(struct trie *trie, struct trie_node *node, size_t size) { - bfs_assert(size == (size_t)count_ones(node->bitmap)); + bfs_assert(size == trie_node_size(node)); varena_free(&trie->nodes, node, size); } -#if ENDIAN_NATIVE == ENDIAN_LITTLE -# define TRIE_BSWAP(n) (n) -#elif ENDIAN_NATIVE == ENDIAN_BIG -# define TRIE_BSWAP(n) bswap(n) -#endif - /** Find the offset of the first nibble that differs between two keys. */ static size_t trie_mismatch(const struct trie_leaf *rep, const void *key, size_t length) { if (!rep) { @@ -393,32 +404,34 @@ static size_t trie_mismatch(const struct trie_leaf *rep, const void *key, size_t const char *rep_bytes = rep->key; const char *key_bytes = key; - size_t i = 0; - for (size_t chunk = sizeof(chunk); i + chunk <= length; i += chunk) { - size_t rep_chunk, key_chunk; - memcpy(&rep_chunk, rep_bytes + i, sizeof(rep_chunk)); - memcpy(&key_chunk, key_bytes + i, sizeof(key_chunk)); - - if (rep_chunk != key_chunk) { -#ifdef TRIE_BSWAP - size_t diff = TRIE_BSWAP(rep_chunk ^ key_chunk); - i *= 2; - i += trailing_zeros(diff) / 4; - return i; + size_t ret = 0, i = 0; + +#define CHUNK(n) CHUNK_(uint##n##_t, load8_beu##n) +#define CHUNK_(type, load8) \ + (length - i >= sizeof(type)) { \ + type rep_chunk = load8(rep_bytes + i); \ + type key_chunk = load8(key_bytes + i); \ + type diff = rep_chunk ^ key_chunk; \ + ret += leading_zeros(diff) / 4; \ + if (diff) { \ + return ret; \ + } \ + i += sizeof(type); \ + } + +#if SIZE_WIDTH >= 64 + while CHUNK(64); + if CHUNK(32); #else - break; + while CHUNK(32); #endif - } - } + if CHUNK(16); + if CHUNK(8); - for (; i < length; ++i) { - unsigned char diff = rep_bytes[i] ^ key_bytes[i]; - if (diff) { - return 2 * i + !(diff & 0xF); - } - } +#undef CHUNK_ +#undef CHUNK - return 2 * i; + return ret; } /** @@ -446,7 +459,7 @@ static size_t trie_mismatch(const struct trie_leaf *rep, const void *key, size_t _trie_clones static struct trie_leaf *trie_node_insert(struct trie *trie, uintptr_t *ptr, struct trie_leaf *leaf, unsigned char nibble) { struct trie_node *node = trie_decode_node(*ptr); - unsigned int size = count_ones(node->bitmap); + unsigned int size = trie_node_size(node); // Double the capacity every power of two if (has_single_bit(size)) { @@ -626,6 +639,26 @@ struct trie_leaf *trie_insert_mem(struct trie *trie, const void *key, size_t len return trie_insert_mem_impl(trie, key, length); } +int trie_set_str(struct trie *trie, const char *key, const void *value) { + struct trie_leaf *leaf = trie_insert_str(trie, key); + if (leaf) { + leaf->value = (void *)value; + return 0; + } else { + return -1; + } +} + +int trie_set_mem(struct trie *trie, const void *key, size_t length, const void *value) { + struct trie_leaf *leaf = trie_insert_mem(trie, key, length); + if (leaf) { + leaf->value = (void *)value; + return 0; + } else { + return -1; + } +} + /** Free a chain of singleton nodes. */ static void trie_free_singletons(struct trie *trie, uintptr_t ptr) { while (trie_is_node(ptr)) { @@ -711,7 +744,7 @@ static void trie_remove_impl(struct trie *trie, struct trie_leaf *leaf) { struct trie_node *node = trie_decode_node(*parent); trie_free_singletons(trie, node->children[child_index]); - unsigned int parent_size = count_ones(node->bitmap); + unsigned int parent_size = trie_node_size(node); bfs_assert(parent_size > 1); if (parent_size == 2 && trie_collapse_node(trie, parent, node, child_index) == 0) { return; |