Cthulhu  0.2.10
Cthulhu compiler collection
map.c
Go to the documentation of this file.
1 // SPDX-License-Identifier: LGPL-3.0-only
2 #include "std/map.h"
3 
4 #include "base/panic.h"
5 #include "arena/arena.h"
6 
7 #include "std/str.h"
8 #include "std/vector.h"
9 
10 #include "std/typed/vector.h"
11 
12 #include "common.h"
13 
14 // TODO: add quadratic probing and robin hood hashing
15 
16 // 90% load factor before resizing
17 #define MAP_LOAD_FACTOR (90)
18 
22 typedef struct bucket_t
23 {
24  const void *key;
25  void *value;
26  struct bucket_t *next;
27 } bucket_t;
28 
29 static bucket_t gEmptyBucket = {0};
30 
31 // TODO: this is a bit of a bodge, but using extern const values
32 // in a static initializer isn't possible in C
33 
34 const map_t kEmptyMap = {
35  .arena = NULL,
36  .info = {
37  .size = sizeof(void *),
38  .hash = info_ptr_hash,
39  .equals = info_ptr_equal,
40  },
41  .size = 1,
42  .used = 0,
43  .data = &gEmptyBucket,
44 };
45 
46 // generic map functions
47 
48 static bucket_t *impl_bucket_new(const void *key, void *value, arena_t *arena)
49 {
50  bucket_t *entry = ARENA_MALLOC(sizeof(bucket_t), "bucket", NULL, arena);
51  entry->key = key;
52  entry->value = value;
53  entry->next = NULL;
54  return entry;
55 }
56 
58 static bucket_t *map_get_bucket(map_t *map, ctu_hash_t hash)
59 {
60  size_t index = hash % map->size;
61  return &map->data[index];
62 }
63 
65 static const bucket_t *map_get_bucket_const(const map_t *map, ctu_hash_t hash)
66 {
67  size_t index = hash % map->size;
68  return &map->data[index];
69 }
70 
71 static void clear_keys(bucket_t *buckets, size_t size)
72 {
73  for (size_t i = 0; i < size; i++)
74  {
75  buckets[i].key = NULL;
76  buckets[i].next = NULL;
77  }
78 }
79 
81 void map_init(map_t *map, size_t size, hash_info_t info, arena_t *arena)
82 {
83  CTASSERT(map != NULL);
84  CTASSERT(arena != NULL);
85  CTASSERT(size > 0);
86 
87  CTASSERT(info.size > 0);
88  CTASSERT(info.equals != NULL);
89  CTASSERT(info.hash != NULL);
90 
91  map_t tmp = {
92  .arena = arena,
93  .info = info,
94  .size = size,
95  .used = 0,
96  .data = ARENA_MALLOC(sizeof(bucket_t) * size, "buckets", NULL, arena),
97  };
98 
99  clear_keys(tmp.data, size);
100 
101  *map = tmp;
102 }
103 
104 STA_DECL
105 map_t map_make(size_t size, hash_info_t info, arena_t *arena)
106 {
107  map_t map;
108  map_init(&map, size, info, arena);
109  return map;
110 }
111 
112 STA_DECL
113 map_t *map_new(size_t size, hash_info_t info, arena_t *arena)
114 {
115  map_t *map = ARENA_MALLOC(sizeof(map_t), "map", NULL, arena);
116  map_init(map, size, info, arena);
117  ARENA_IDENTIFY(map->data, "buckets", map, arena);
118 
119  return map;
120 }
121 
122 #define MAP_FOREACH_APPLY(self, item, ...) \
123  do \
124  { \
125  for (size_t i = 0; i < self->size; i++) \
126  { \
127  bucket_t *item = &self->data[i]; \
128  while (item && item->key) \
129  { \
130  __VA_ARGS__; \
131  item = item->next; \
132  } \
133  } \
134  } while (0)
135 
136 STA_DECL
138 {
139  CTASSERT(map != NULL);
140 
141  vector_t *result = vector_new(map->size, map->arena);
142 
143  MAP_FOREACH_APPLY(map, entry, {
144  vector_push(&result, entry->value);
145  });
146 
147  return result;
148 }
149 
150 static map_entry_t map_entry_new(const char *key, void *value)
151 {
152  map_entry_t entry = {
153  .key = key,
154  .value = value,
155  };
156 
157  return entry;
158 }
159 
160 STA_DECL
162 {
163  CTASSERT(map != NULL);
164 
165  typevec_t *result = typevec_new(sizeof(map_entry_t), map->size, map->arena);
166 
167  MAP_FOREACH_APPLY(map, entry, {
168  map_entry_t item = map_entry_new(entry->key, entry->value);
169  typevec_push(result, &item);
170  });
171 
172  return result;
173 }
174 
175 STA_DECL
176 size_t map_count(const map_t *map)
177 {
178  CTASSERT(map != NULL);
179 
180  size_t count = 0;
181  MAP_FOREACH_APPLY(map, entry, { count++; });
182 
183  return count;
184 }
185 
186 // info functions
187 
188 CT_HOTFN
189 static ctu_hash_t impl_key_hash(const map_t *map, const void *key)
190 {
191  const hash_info_t *info = &map->info;
192  return info->hash(key);
193 }
194 
195 CT_HOTFN
196 static bool impl_key_equal(const map_t *map, const void *lhs, const void *rhs)
197 {
198  const hash_info_t *info = &map->info;
199  return info->equals(lhs, rhs);
200 }
201 
202 CT_HOTFN
203 static const bucket_t *impl_bucket_get_const(const map_t *map, const void *key)
204 {
205  ctu_hash_t hash = impl_key_hash(map, key);
206  return map_get_bucket_const(map, hash);
207 }
208 
209 CT_HOTFN
210 static bucket_t *impl_bucket_get(map_t *map, const void *key)
211 {
212  ctu_hash_t hash = impl_key_hash(map, key);
213  return map_get_bucket(map, hash);
214 }
215 
216 static bool impl_entry_exists(const map_t *map, const bucket_t *bucket, const void *key)
217 {
218  CTASSERT(bucket != NULL);
219 
220  if (impl_key_equal(map, bucket->key, key))
221  return true;
222 
223  if (bucket->next != NULL)
224  return impl_entry_exists(map, bucket->next, key);
225 
226  return false;
227 }
228 
229 // delete a bucket and reparent the next bucket
230 static void impl_delete_bucket(bucket_t *previous, bucket_t *entry)
231 {
232  CTASSERT(entry != NULL);
233 
234  entry->key = NULL;
235  entry->value = NULL;
236 
237  if (previous != NULL)
238  {
239  previous->next = entry->next;
240  }
241 }
242 
243 static bool impl_insert_into_bucket(map_t *map, bucket_t *bucket, const void *key, void *value)
244 {
245  if (bucket->key == NULL)
246  {
247  // track this as a new entry
248  map->used += 1;
249 
250  bucket->key = key;
251  bucket->value = value;
252  return true;
253  }
254 
255  if (impl_key_equal(map, bucket->key, key))
256  {
257  bucket->value = value;
258  return true;
259  }
260 
261  return false;
262 }
263 
264 CT_HOTFN
265 static void impl_resize(map_t *map, size_t new_size)
266 {
267  bucket_t *old_data = map->data;
268  size_t old_size = map->size;
269 
270  // TODO: maybe resize the old data instead of allocating new data
271  map->size = new_size;
272  map->used = 0;
273  map->data = ARENA_MALLOC(sizeof(bucket_t) * new_size, "buckets", map, map->arena);
274  clear_keys(map->data, new_size);
275 
276  for (size_t i = 0; i < old_size; i++)
277  {
278  bucket_t *entry = &old_data[i];
279  while (entry != NULL)
280  {
281  if (entry->key != NULL)
282  {
283  map_set(map, entry->key, entry->value);
284  }
285 
286  entry = entry->next;
287  }
288  }
289 
290  arena_free(old_data, sizeof(bucket_t) * old_size, map->arena);
291 }
292 
294 void map_set(map_t *map, const void *key, void *value)
295 {
296  CTASSERT(map != NULL);
297  CTASSERT(key != NULL);
298 
299  // if we are over the load factor, resize the map
300  if ((map->used * 100 / map->size) > MAP_LOAD_FACTOR)
301  impl_resize(map, map->size * 2);
302 
303  // follow the chain
304  bucket_t *bucket = impl_bucket_get(map, key);
305  while (true)
306  {
307  if (impl_insert_into_bucket(map, bucket, key, value))
308  return;
309 
310  if (bucket->next == NULL)
311  {
312  map->used += 1;
313 
314  bucket->next = impl_bucket_new(key, value, map->arena);
315  ARENA_REPARENT(bucket->next, bucket, map->arena);
316  return;
317  }
318 
319  bucket = bucket->next;
320  }
321 }
322 
324 void *map_get(const map_t *map, const void *key)
325 {
326  CTASSERT(map != NULL);
327  CTASSERT(key != NULL);
328 
329  return map_get_default(map, key, NULL);
330 }
331 
333 void *map_get_default(const map_t *map, const void *key, void *other)
334 {
335  CTASSERT(map != NULL);
336  CTASSERT(key != NULL);
337 
338  const bucket_t *bucket = impl_bucket_get_const(map, key);
339  while (bucket != NULL)
340  {
341  if (bucket->key != NULL && impl_key_equal(map, bucket->key, key))
342  return bucket->value;
343 
344  bucket = bucket->next;
345  }
346 
347  return other;
348 }
349 
350 STA_DECL
351 bool map_contains(const map_t *map, const void *key)
352 {
353  CTASSERT(map != NULL);
354  CTASSERT(key != NULL);
355 
356  const bucket_t *bucket = impl_bucket_get_const(map, key);
357  return impl_entry_exists(map, bucket, key);
358 }
359 
360 STA_DECL
361 bool map_delete(map_t *map, const void *key)
362 {
363  CTASSERT(map != NULL);
364  CTASSERT(key != NULL);
365 
366  bucket_t *entry = impl_bucket_get(map, key);
367  bucket_t *previous = entry;
368 
369  while (entry != NULL)
370  {
371  if (entry->key != NULL && impl_key_equal(map, entry->key, key))
372  {
373  map->used -= 1;
374  impl_delete_bucket(previous, entry);
375  return true;
376  }
377 
378  previous = entry;
379  entry = entry->next;
380  }
381 
382  return false;
383 }
384 
385 STA_DECL
386 void map_reset(map_t *map)
387 {
388  CTASSERT(map != NULL);
389 
390  map->used = 0;
391  clear_keys(map->data, map->size);
392 }
393 
394 // iteration
395 
396 CT_PUREFN
397 static bucket_t *map_next_in_chain(bucket_t *entry)
398 {
399  if (entry == NULL || entry->key == NULL)
400  {
401  return NULL;
402  }
403 
404  while (entry->next != NULL)
405  {
406  entry = entry->next;
407 
408  if (entry->key != NULL)
409  {
410  return entry;
411  }
412  }
413 
414  return NULL;
415 }
416 
425 static bucket_t *map_find_next_bucket(const map_t *map, size_t *index, bucket_t *previous)
426 {
427  bucket_t *entry = map_next_in_chain(previous);
428  if (entry != NULL)
429  {
430  return entry;
431  }
432 
433  size_t i = *index;
434 
435  while (i < map->size)
436  {
437  entry = &map->data[i++];
438  if (entry->key != NULL)
439  {
440  *index = i;
441  return entry;
442  }
443 
444  entry = map_next_in_chain(entry);
445  if (entry != NULL)
446  {
447  *index = i;
448  return entry;
449  }
450  }
451 
452  return NULL;
453 }
454 
455 STA_DECL
457 {
458  CTASSERT(map != NULL);
459 
460  size_t index = 0;
461 
462  bucket_t *bucket = map_find_next_bucket(map, &index, NULL);
463  bucket_t *next = map_find_next_bucket(map, &index, bucket);
464 
465  map_iter_t iter = {
466  .map = map,
467  .index = index,
468  .bucket = bucket,
469  .next = next,
470  };
471 
472  return iter;
473 }
474 
477 {
478  CTASSERT(iter != NULL);
479 
480  map_entry_t entry = {
481  iter->bucket->key,
482  iter->bucket->value,
483  };
484 
485  iter->bucket = iter->next;
486  iter->next = map_find_next_bucket(iter->map, &iter->index, iter->next);
487 
488  return entry;
489 }
490 
491 STA_DECL
492 bool map_next_pair(map_iter_t *iter, const void **key, void **value)
493 {
494  CTASSERT(iter != NULL);
495  CTASSERT(key != NULL);
496  CTASSERT(value != NULL);
497 
498  bool has_next = map_has_next(iter);
499  if (!has_next)
500  return false;
501 
502  map_entry_t entry = map_next(iter);
503  *key = entry.key;
504  *value = entry.value;
505  return true;
506 }
507 
509 bool map_has_next(const map_iter_t *iter)
510 {
511  CTASSERT(iter != NULL);
512 
513  return iter->bucket != NULL;
514 }
CT_NODISCARD size_t size
Definition: scan.h:128
CT_LOCAL bool info_ptr_equal(const void *lhs, const void *rhs)
Definition: typeinfo.c:10
CT_LOCAL ctu_hash_t info_ptr_hash(const void *key)
Definition: typeinfo.c:9
#define CT_PUREFN
mark a function as pure, always returns the same value for the same arguments
Definition: analyze.h:228
#define CT_HOTFN
mark a function as hot, it is likely to be called often
Definition: analyze.h:268
#define STA_DECL
sal2 annotation on function implementations to copy annotations from the declaration
#define CT_NOALIAS
mark a function as only modifying pointers passed to it the same as CT_CONSTFN but allowed to modify/...
Definition: analyze.h:226
size_t ctu_hash_t
Definition: types.h:8
STA_DECL size_t map_count(const map_t *map)
get the number of key-value pairs in a map
Definition: map.c:176
STA_DECL CT_HOTFN void * map_get_default(const map_t *map, const void *key, void *other)
get a value from a map or a default value
Definition: map.c:333
const map_t kEmptyMap
an empty map
Definition: map.c:34
STA_DECL typevec_t * map_entries(map_t *map)
collect all key-value pairs in a map into a vector returns a typevec_t<map_entry_t>
Definition: map.c:161
STA_DECL CT_PUREFN bool map_has_next(const map_iter_t *iter)
check if a map iterator has more elements
Definition: map.c:509
STA_DECL void map_reset(map_t *map)
clear all key-value pairs from a map
Definition: map.c:386
STA_DECL bool map_delete(map_t *map, const void *key)
delete a key-value pair from a map
Definition: map.c:361
STA_DECL vector_t * map_values(map_t *map)
collect all the values from a map into a vector
Definition: map.c:137
STA_DECL bool map_next_pair(map_iter_t *iter, const void **key, void **value)
get the next key-value pair from a map iterator returns the values via out parameters for ease of use...
Definition: map.c:492
STA_DECL map_t map_make(size_t size, hash_info_t info, arena_t *arena)
create a new map on the stack
Definition: map.c:105
STA_DECL map_iter_t map_iter(const map_t *map)
create a new map iterator
Definition: map.c:456
STA_DECL CT_HOTFN void map_set(map_t *map, const void *key, void *value)
set a key-value pair in a map
Definition: map.c:294
STA_DECL map_t * map_new(size_t size, hash_info_t info, arena_t *arena)
create a new map on the heap
Definition: map.c:113
STA_DECL bool map_contains(const map_t *map, const void *key)
check if a map contains a key
Definition: map.c:351
STA_DECL CT_HOTFN void * map_get(const map_t *map, const void *key)
get a value from a map
Definition: map.c:324
STA_DECL CT_NOALIAS map_entry_t map_next(map_iter_t *iter)
get the next key-value pair from a map iterator
Definition: map.c:476
#define ARENA_IDENTIFY(ptr, name, parent, arena)
rename and reparent a pointer in a custom allocator
Definition: arena.h:409
#define ARENA_REPARENT(arena, ptr, parent)
reparent a pointer in a custom allocator
Definition: arena.h:391
#define ARENA_MALLOC(size, name, parent, arena)
allocate memory from a custom allocator
Definition: arena.h:392
CT_ARENA_API void arena_free(STA_RELEASE void *ptr, size_t size, arena_t *arena)
release memory from a custom allocator
#define CTASSERT(expr)
assert a condition, prints the condition as a message
Definition: panic.h:130
CT_STD_API void * typevec_push(typevec_t *vec, const void *src)
push a value onto the vector
Definition: vector.c:156
CT_NODISCARD CT_STD_API typevec_t * typevec_new(size_t width, size_t len, arena_t *arena)
create a new typed vector on the heap
Definition: vector.c:77
CT_NODISCARD CT_STD_API vector_t * vector_new(size_t size, arena_t *arena)
create a new vector with an initial capacity
Definition: vector.c:63
CT_STD_API void vector_push(vector_t **vector, void *value)
push a value onto the end of a vector
Definition: vector.c:108
STA_DECL void map_init(map_t *map, size_t size, hash_info_t info, arena_t *arena)
Definition: map.c:81
#define MAP_FOREACH_APPLY(self, item,...)
Definition: map.c:122
#define MAP_LOAD_FACTOR
Definition: map.c:17
an allocator object
Definition: arena.h:86
a single node in a map
Definition: map.c:23
struct bucket_t * next
the next bucket in the chain
Definition: map.c:26
const void * key
the key
Definition: map.c:24
void * value
any pointer value
Definition: map.c:25
information for using a type in a hashset or hashmap
Definition: typeinfo.h:39
hash_fn_t hash
the hash function for the type
Definition: typeinfo.h:44
equals_fn_t equals
the equality function for the type
Definition: typeinfo.h:47
a key-value pair in a map
Definition: map.h:175
const void * key
the key of this entry
Definition: map.h:176
void * value
the value of this entry
Definition: map.h:177
a map iterator handle
Definition: map.h:183
bucket_t * next
the next bucket in the chain
Definition: map.h:188
size_t index
current top level bucket index
Definition: map.h:185
bucket_t * bucket
the current bucket
Definition: map.h:187
const map_t * map
the map being iterated over
Definition: map.h:184
an unordered hash map
Definition: map.h:38
arena_t * arena
the arena this map allocates from
Definition: map.h:40
hash_info_t info
the hash function for this map
Definition: map.h:43
A vector with a fixed type size.
Definition: vector.h:24
a generic vector of pointers
Definition: vector.c:16