fusion/third_party/hashmap/map.c

432 lines
9.5 KiB
C
Executable File

//
// map.h
//
// Created by Mashpoe on 1/15/21.
//
#include "map.h"
#include <stdlib.h>
#include <string.h>
#include "hlog.h"
#define HASHMAP_DEFAULT_CAPACITY 20
#define HASHMAP_MAX_LOAD 0.75f
#define HASHMAP_RESIZE_FACTOR 2
struct bucket
{
// `next` must be the first struct element.
// changing the order will break multiple functions
struct bucket* next;
// key, key size, key hash, and associated value
const void* key;
size_t ksize;
uint32_t hash;
uintptr_t value;
};
struct hashmap
{
struct bucket* buckets;
int capacity;
int count;
#ifdef __HASHMAP_REMOVABLE
// "tombstones" are empty buckets from removing elements
int tombstone_count;
#endif
// a linked list of all valid entries, in order
struct bucket* first;
// lets us know where to add the next element
struct bucket* last;
};
hashmap* hashmap_create(void)
{
hashmap* m = malloc(sizeof(hashmap));
if (m == NULL)
{
return NULL;
}
m->capacity = HASHMAP_DEFAULT_CAPACITY;
m->count = 0;
#ifdef __HASHMAP_REMOVABLE
m->tombstone_count = 0;
#endif
m->buckets = calloc(HASHMAP_DEFAULT_CAPACITY, sizeof(struct bucket));
if (m->buckets == NULL) {
free(m);
return NULL;
}
m->first = NULL;
// this prevents branching in hashmap_set.
// m->first will be treated as the "next" pointer in an imaginary bucket.
// when the first item is added, m->first will be set to the correct address.
m->last = (struct bucket*)&m->first;
return m;
}
void hashmap_free(hashmap* m)
{
free(m->buckets);
free(m);
}
// puts an old bucket into a resized hashmap
static struct bucket* resize_entry(hashmap* m, struct bucket* old_entry)
{
uint32_t index = old_entry->hash % m->capacity;
for (;;)
{
struct bucket* entry = &m->buckets[index];
if (entry->key == NULL)
{
*entry = *old_entry; // copy data from old entry
return entry;
}
index = (index + 1) % m->capacity;
}
}
static int hashmap_resize(hashmap* m)
{
int old_capacity = m->capacity;
struct bucket* old_buckets = m->buckets;
m->capacity *= HASHMAP_RESIZE_FACTOR;
// initializes all bucket fields to null
m->buckets = calloc(m->capacity, sizeof(struct bucket));
if (m->buckets == NULL) {
m->capacity = old_capacity;
m->buckets = old_buckets;
return -1;
}
// same trick; avoids branching
m->last = (struct bucket*)&m->first;
#ifdef __HASHMAP_REMOVABLE
m->count -= m->tombstone_count;
m->tombstone_count = 0;
#endif
// assumes that an empty map won't be resized
do
{
#ifdef __HASHMAP_REMOVABLE
// skip entry if it's a "tombstone"
struct bucket* current = m->last->next;
if (current->key == NULL)
{
m->last->next = current->next;
// skip to loop condition
continue;
}
#endif
m->last->next = resize_entry(m, m->last->next);
m->last = m->last->next;
} while (m->last->next != NULL);
free(old_buckets);
return 0;
}
#define HASHMAP_HASH_INIT 2166136261u
// FNV-1a hash function
static inline uint32_t hash_data(const unsigned char* data, size_t size)
{
size_t nblocks = size / 8;
uint64_t hash = HASHMAP_HASH_INIT;
for (size_t i = 0; i < nblocks; ++i)
{
hash ^= (uint64_t)data[0] << 0 | (uint64_t)data[1] << 8 |
(uint64_t)data[2] << 16 | (uint64_t)data[3] << 24 |
(uint64_t)data[4] << 32 | (uint64_t)data[5] << 40 |
(uint64_t)data[6] << 48 | (uint64_t)data[7] << 56;
hash *= 0xbf58476d1ce4e5b9;
data += 8;
}
uint64_t last = size & 0xff;
switch (size % 8)
{
case 7:
last |= (uint64_t)data[6] << 56; /* fallthrough */
case 6:
last |= (uint64_t)data[5] << 48; /* fallthrough */
case 5:
last |= (uint64_t)data[4] << 40; /* fallthrough */
case 4:
last |= (uint64_t)data[3] << 32; /* fallthrough */
case 3:
last |= (uint64_t)data[2] << 24; /* fallthrough */
case 2:
last |= (uint64_t)data[1] << 16; /* fallthrough */
case 1:
last |= (uint64_t)data[0] << 8;
hash ^= last;
hash *= 0xd6e8feb86659fd93;
}
// compress to a 32-bit result.
// also serves as a finalizer.
return (uint32_t)(hash ^ hash >> 32);
}
static struct bucket* find_entry(hashmap* m, const void* key, size_t ksize, uint32_t hash)
{
uint32_t index = hash % m->capacity;
for (;;)
{
struct bucket* entry = &m->buckets[index];
#ifdef __HASHMAP_REMOVABLE
// compare sizes, then hashes, then key data as a last resort.
// check for tombstone
if ((entry->key == NULL && entry->value == 0) ||
// check for valid matching entry
(entry->key != NULL &&
entry->ksize == ksize &&
entry->hash == hash &&
memcmp(entry->key, key, ksize) == 0))
{
// return the entry if a match or an empty bucket is found
return entry;
}
#else
// kind of a thicc condition;
// I didn't want this to span multiple if statements or functions.
if (entry->key == NULL ||
// compare sizes, then hashes, then key data as a last resort.
(entry->ksize == ksize &&
entry->hash == hash &&
memcmp(entry->key, key, ksize) == 0))
{
// return the entry if a match or an empty bucket is found
return entry;
}
#endif
//printf("collision\n");
index = (index + 1) % m->capacity;
}
}
int hashmap_set(hashmap* m, const void* key, size_t ksize, uintptr_t val)
{
if (m->count + 1 > HASHMAP_MAX_LOAD * m->capacity) {
if (hashmap_resize(m) == -1)
return -1;
}
uint32_t hash = hash_data(key, ksize);
struct bucket* entry = find_entry(m, key, ksize, hash);
if (entry->key == NULL)
{
m->last->next = entry;
m->last = entry;
entry->next = NULL;
++m->count;
entry->key = key;
entry->ksize = ksize;
entry->hash = hash;
}
entry->value = val;
return 0;
}
int hashmap_get_set(hashmap* m, const void* key, size_t ksize, uintptr_t* out_in)
{
if (m->count + 1 > HASHMAP_MAX_LOAD * m->capacity) {
if (hashmap_resize(m) == -1)
return -1;
}
uint32_t hash = hash_data(key, ksize);
struct bucket* entry = find_entry(m, key, ksize, hash);
if (entry->key == NULL)
{
m->last->next = entry;
m->last = entry;
entry->next = NULL;
++m->count;
entry->value = *out_in;
entry->key = key;
entry->ksize = ksize;
entry->hash = hash;
return 0;
}
*out_in = entry->value;
return 1;
}
int hashmap_set_free(hashmap* m, const void* key, size_t ksize, uintptr_t val, hashmap_callback c, void* usr)
{
if (m->count + 1 > HASHMAP_MAX_LOAD * m->capacity) {
if (hashmap_resize(m) == -1)
return -1;
}
uint32_t hash = hash_data(key, ksize);
struct bucket *entry = find_entry(m, key, ksize, hash);
if (entry->key == NULL)
{
m->last->next = entry;
m->last = entry;
entry->next = NULL;
++m->count;
entry->key = key;
entry->ksize = ksize;
entry->hash = hash;
entry->value = val;
// there was no overwrite, exit the function.
return 0;
}
// allow the callback to free entry data.
// use old key and value so the callback can free them.
// the old key and value will be overwritten after this call.
int error = c(entry->key, ksize, entry->value, usr);
// overwrite the old key pointer in case the callback frees it.
entry->key = key;
entry->value = val;
return error;
}
int hashmap_get(hashmap* m, const void* key, size_t ksize, uintptr_t* out_val)
{
uint32_t hash = hash_data(key, ksize);
struct bucket* entry = find_entry(m, key, ksize, hash);
// if there is no match, output val will just be NULL
*out_val = entry->value;
return entry->key != NULL ? 1 : 0;
}
#ifdef __HASHMAP_REMOVABLE
// doesn't "remove" the element per se, but it will be ignored.
// the element will eventually be removed when the map is resized.
void hashmap_remove(hashmap* m, const void* key, size_t ksize)
{
uint32_t hash = hash_data(key, ksize);
struct bucket* entry = find_entry(m, key, ksize, hash);
if (entry->key != NULL)
{
// "tombstone" entry is signified by a NULL key with a nonzero value
// element removal is optional because of the overhead of tombstone checks
entry->key = NULL;
entry->value = 0xDEAD; // I mean, it's a tombstone...
++m->tombstone_count;
}
}
void hashmap_remove_free(hashmap* m, const void* key, size_t ksize, hashmap_callback c, void* usr)
{
uint32_t hash = hash_data(key, ksize);
struct bucket* entry = find_entry(m, key, ksize, hash);
if (entry->key != NULL)
{
c(entry->key, entry->ksize, entry->value, usr);
// "tombstone" entry is signified by a NULL key with a nonzero value
// element removal is optional because of the overhead of tombstone checks
entry->key = NULL;
entry->value = 0xDEAD; // I mean, it's a tombstone...
++m->tombstone_count;
}
}
#endif
int hashmap_size(hashmap* m)
{
#ifdef __HASHMAP_REMOVABLE
return m->count - m->tombstone_count;
#else
return m->count;
#endif
}
int hashmap_iterate(hashmap* m, hashmap_callback c, void* user_ptr)
{
// loop through the linked list of valid entries
// this way we can skip over empty buckets
struct bucket* current = m->first;
int error = 0;
while (current != NULL)
{
#ifdef __HASHMAP_REMOVABLE
// "tombstone" check
if (current->key != NULL)
#endif
error = c(current->key, current->ksize, current->value, user_ptr);
if (error == -1)
break;
current = current->next;
}
return error;
}
void hashmap_test(){
#define HASHMAP_TEST_KEY "TEST_KEY"
uintptr_t ivalue = 0;
hashmap* map = hashmap_create();
hashmap_set(map, hashmap_str_lit(HASHMAP_TEST_KEY), (uintptr_t)map);
hashmap_get_set(map, hashmap_str_lit(HASHMAP_TEST_KEY), &ivalue);
hlogd("%s ivalue:%llx map:%p",__func__, ivalue, map);
}
/*void bucket_dump(hashmap* m)
{
for (int i = 0; i < m->capacity; i++)
{
if (m->buckets[i].key == NULL)
{
if (m->buckets[i].value != 0)
{
printf("x");
}
else
{
printf("0");
}
}
else
{
printf("1");
}
}
printf("\n");
fflush(stdout);
}*/