summaryrefslogtreecommitdiffstats
path: root/Hashtable.c
diff options
context:
space:
mode:
authorChristian Göttsche <cgzones@googlemail.com>2020-10-22 15:43:26 +0200
committerBenBE <BenBE@geshi.org>2020-11-17 02:01:02 +0100
commit307c34b028d353154aa268eceb38e0331c8275cf (patch)
tree25ef4307e553d0bfcda195e66f1171646641a738 /Hashtable.c
parent7914ec201ef19fa0c0caed99dc150a953eb9bc19 (diff)
Hashtable: use dynamic growth and use primes as size
Dynamically increase the hashmap size to not exceed the load factor and avoid too long chains. Switch from Separate Chaining to Robin Hood linear probing to improve cache locality. Use primes as size to further avoid collisions. E.g. on a standard kde system the number of entries in the ProcessTable might be around 650.
Diffstat (limited to 'Hashtable.c')
-rw-r--r--Hashtable.c274
1 files changed, 204 insertions, 70 deletions
diff --git a/Hashtable.c b/Hashtable.c
index 606dfd60..cdfb4ec3 100644
--- a/Hashtable.c
+++ b/Hashtable.c
@@ -8,32 +8,55 @@ in the source distribution for its full text.
#include "Hashtable.h"
#include "XUtils.h"
-#include <stdlib.h>
#include <assert.h>
+#include <stdint.h>
+#include <stdlib.h>
#ifndef NDEBUG
+static void Hashtable_dump(const Hashtable* this) {
+ fprintf(stderr, "Hashtable %p: size=%u items=%u owner=%s\n",
+ (const void*)this,
+ this->size,
+ this->items,
+ this->owner ? "yes" : "no");
+
+ unsigned int items = 0;
+ for (unsigned int i = 0; i < this->size; i++) {
+ fprintf(stderr, " item %5u: key = %5u probe = %2u value = %p\n",
+ i,
+ this->buckets[i].key,
+ this->buckets[i].probe,
+ this->buckets[i].value ? (const void*)this->buckets[i].value : "(nil)");
+
+ if (this->buckets[i].value)
+ items++;
+ }
+
+ fprintf(stderr, "Hashtable %p: items=%u counted=%u\n",
+ (const void*)this,
+ this->items,
+ items);
+}
+
static bool Hashtable_isConsistent(const Hashtable* this) {
unsigned int items = 0;
for (unsigned int i = 0; i < this->size; i++) {
- HashtableItem* bucket = this->buckets[i];
- while (bucket) {
+ if (this->buckets[i].value)
items++;
- bucket = bucket->next;
- }
}
- return items == this->items;
+ bool res = items == this->items;
+ if (!res)
+ Hashtable_dump(this);
+ return res;
}
unsigned int Hashtable_count(const Hashtable* this) {
unsigned int items = 0;
for (unsigned int i = 0; i < this->size; i++) {
- HashtableItem* bucket = this->buckets[i];
- while (bucket) {
+ if (this->buckets[i].value)
items++;
- bucket = bucket->next;
- }
}
assert(items == this->items);
return items;
@@ -41,14 +64,25 @@ unsigned int Hashtable_count(const Hashtable* this) {
#endif /* NDEBUG */
-static HashtableItem* HashtableItem_new(hkey_t key, void* value) {
- HashtableItem* this;
+/* https://oeis.org/A014234 */
+static const uint64_t OEISprimes[] = {
+ 2, 3, 7, 13, 31, 61, 127, 251, 509, 1021, 2039, 4093, 8191,
+ 16381, 32749, 65521, 131071, 262139, 524287, 1048573,
+ 2097143, 4194301, 8388593, 16777213, 33554393,
+ 67108859, 134217689, 268435399, 536870909, 1073741789,
+ 2147483647, 4294967291, 8589934583, 17179869143,
+ 34359738337, 68719476731, 137438953447
+};
- this = xMalloc(sizeof(HashtableItem));
- this->key = key;
- this->value = value;
- this->next = NULL;
- return this;
+static uint64_t nextPrime(unsigned int n) {
+ assert(n <= OEISprimes[ARRAYSIZE(OEISprimes) - 1]);
+
+ for (unsigned int i = 0; i < ARRAYSIZE(OEISprimes); i++) {
+ if (n <= OEISprimes[i])
+ return OEISprimes[i];
+ }
+
+ return OEISprimes[ARRAYSIZE(OEISprimes) - 1];
}
Hashtable* Hashtable_new(unsigned int size, bool owner) {
@@ -56,102 +90,202 @@ Hashtable* Hashtable_new(unsigned int size, bool owner) {
this = xMalloc(sizeof(Hashtable));
this->items = 0;
- this->size = size;
- this->buckets = (HashtableItem**) xCalloc(size, sizeof(HashtableItem*));
+ this->size = size ? nextPrime(size) : 13;
+ this->buckets = (HashtableItem*) xCalloc(this->size, sizeof(HashtableItem));
this->owner = owner;
assert(Hashtable_isConsistent(this));
return this;
}
void Hashtable_delete(Hashtable* this) {
+
assert(Hashtable_isConsistent(this));
- for (unsigned int i = 0; i < this->size; i++) {
- HashtableItem* walk = this->buckets[i];
- while (walk != NULL) {
- if (this->owner)
- free(walk->value);
-
- HashtableItem* savedWalk = walk;
- walk = savedWalk->next;
- free(savedWalk);
- }
+
+ if (this->owner) {
+ for (unsigned int i = 0; i < this->size; i++)
+ free(this->buckets[i].value);
}
+
free(this->buckets);
free(this);
}
-void Hashtable_put(Hashtable* this, hkey_t key, void* value) {
+static void insert(Hashtable* this, hkey_t key, void* value) {
unsigned int index = key % this->size;
- HashtableItem** bucketPtr = &(this->buckets[index]);
- while (true)
- if (*bucketPtr == NULL) {
- *bucketPtr = HashtableItem_new(key, value);
+ unsigned int probe = 0;
+#ifndef NDEBUG
+ unsigned int origIndex = index;
+#endif
+
+ for (;;) {
+ if (!this->buckets[index].value) {
this->items++;
- break;
- } else if ((*bucketPtr)->key == key) {
- if (this->owner && (*bucketPtr)->value != value)
- free((*bucketPtr)->value);
+ this->buckets[index].key = key;
+ this->buckets[index].probe = probe;
+ this->buckets[index].value = value;
+ return;
+ }
- (*bucketPtr)->value = value;
- break;
- } else {
- bucketPtr = &((*bucketPtr)->next);
+ if (this->buckets[index].key == key) {
+ if (this->owner && this->buckets[index].value != value)
+ free(this->buckets[index].value);
+ this->buckets[index].value = value;
+ return;
}
+ /* Robin Hood swap */
+ if (probe > this->buckets[index].probe) {
+ HashtableItem tmp = this->buckets[index];
+
+ this->buckets[index].key = key;
+ this->buckets[index].probe = probe;
+ this->buckets[index].value = value;
+
+ key = tmp.key;
+ probe = tmp.probe;
+ value = tmp.value;
+ }
+
+ index = (index + 1) % this->size;
+ probe++;
+
+ assert(index != origIndex);
+ }
+}
+
+void Hashtable_setSize(Hashtable* this, unsigned int size) {
+
+ assert(Hashtable_isConsistent(this));
+
+ if (size <= this->items)
+ return;
+
+ HashtableItem* oldBuckets = this->buckets;
+ unsigned int oldSize = this->size;
+
+ this->size = nextPrime(size);
+ this->buckets = (HashtableItem*) xCalloc(this->size, sizeof(HashtableItem));
+ this->items = 0;
+
+ /* rehash */
+ for (unsigned int i = 0; i < oldSize; i++) {
+ if (!oldBuckets[i].value)
+ continue;
+
+ insert(this, oldBuckets[i].key, oldBuckets[i].value);
+ }
+
+ free(oldBuckets);
+
assert(Hashtable_isConsistent(this));
}
+void Hashtable_put(Hashtable* this, hkey_t key, void* value) {
+
+ assert(Hashtable_isConsistent(this));
+ assert(this->size > 0);
+ assert(value);
+
+ /* grow on load-factor > 0.7 */
+ if (10 * this->items > 7 * this->size)
+ Hashtable_setSize(this, 2 * this->size);
+
+ insert(this, key, value);
+
+ assert(Hashtable_isConsistent(this));
+ assert(Hashtable_get(this, key) != NULL);
+ assert(this->size > this->items);
+}
+
void* Hashtable_remove(Hashtable* this, hkey_t key) {
unsigned int index = key % this->size;
+ unsigned int probe = 0;
+#ifndef NDEBUG
+ unsigned int origIndex = index;
+#endif
assert(Hashtable_isConsistent(this));
- HashtableItem** bucket;
- for (bucket = &(this->buckets[index]); *bucket; bucket = &((*bucket)->next) ) {
- if ((*bucket)->key == key) {
- void* value = (*bucket)->value;
- HashtableItem* next = (*bucket)->next;
- free(*bucket);
- (*bucket) = next;
- this->items--;
+ void* res = NULL;
+
+ while (this->buckets[index].value) {
+ if (this->buckets[index].key == key) {
if (this->owner) {
- free(value);
- assert(Hashtable_isConsistent(this));
- return NULL;
+ free(this->buckets[index].value);
} else {
- assert(Hashtable_isConsistent(this));
- return value;
+ res = this->buckets[index].value;
}
+
+ unsigned int next = (index + 1) % this->size;
+
+ while (this->buckets[next].value && this->buckets[next].probe > 0) {
+ this->buckets[index] = this->buckets[next];
+ this->buckets[index].probe -= 1;
+
+ index = next;
+ next = (index + 1) % this->size;
+ }
+
+ /* set empty after backward shifting */
+ this->buckets[index].value = NULL;
+ this->items--;
+
+ break;
}
+
+ if (this->buckets[index].probe < probe)
+ break;
+
+ index = (index + 1) % this->size;
+ probe++;
+
+ assert(index != origIndex);
}
+
assert(Hashtable_isConsistent(this));
- return NULL;
+ assert(Hashtable_get(this, key) == NULL);
+
+ /* shrink on load-factor < 0.125 */
+ if (8 * this->items < this->size)
+ Hashtable_setSize(this, this->size / 2);
+
+ return res;
}
void* Hashtable_get(Hashtable* this, hkey_t key) {
unsigned int index = key % this->size;
- HashtableItem* bucketPtr = this->buckets[index];
- while (true) {
- if (bucketPtr == NULL) {
- assert(Hashtable_isConsistent(this));
- return NULL;
- } else if (bucketPtr->key == key) {
- assert(Hashtable_isConsistent(this));
- return bucketPtr->value;
- } else {
- bucketPtr = bucketPtr->next;
+ unsigned int probe = 0;
+ void* res = NULL;
+#ifndef NDEBUG
+ unsigned int origIndex = index;
+#endif
+
+ assert(Hashtable_isConsistent(this));
+
+ while (this->buckets[index].value) {
+ if (this->buckets[index].key == key) {
+ res = this->buckets[index].value;
+ break;
}
+
+ if (this->buckets[index].probe < probe)
+ break;
+
+ index = (index + 1) != this->size ? (index + 1) : 0;
+ probe++;
+
+ assert(index != origIndex);
}
+
+ return res;
}
void Hashtable_foreach(Hashtable* this, Hashtable_PairFunction f, void* userData) {
assert(Hashtable_isConsistent(this));
for (unsigned int i = 0; i < this->size; i++) {
- HashtableItem* walk = this->buckets[i];
- while (walk != NULL) {
+ HashtableItem* walk = &this->buckets[i];
+ if (walk->value)
f(walk->key, walk->value, userData);
- walk = walk->next;
- }
}
assert(Hashtable_isConsistent(this));
}

© 2014-2024 Faster IT GmbH | imprint | privacy policy