FD.io VPP  v19.08.2-294-g37e99c22d
Vector Packet Processing
bihash_template.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 /** @cond DOCUMENTATION_IS_IN_BIHASH_DOC_H */
17 
18 static inline void *BV (alloc_aligned) (BVT (clib_bihash) * h, uword nbytes)
19 {
20  uword rv;
21 
22  /* Round to an even number of cache lines */
23  nbytes += CLIB_CACHE_LINE_BYTES - 1;
24  nbytes &= ~(CLIB_CACHE_LINE_BYTES - 1);
25 
26  rv = alloc_arena_next (h);
27  alloc_arena_next (h) += nbytes;
28 
29  if (alloc_arena_next (h) > alloc_arena_size (h))
31 
32  return (void *) (uword) (rv + alloc_arena (h));
33 }
34 
35 static void BV (clib_bihash_instantiate) (BVT (clib_bihash) * h)
36 {
37  uword bucket_size;
38 
39  alloc_arena (h) = (uword) clib_mem_vm_alloc (h->memory_size);
40  alloc_arena_next (h) = 0;
41  alloc_arena_size (h) = h->memory_size;
42 
43  bucket_size = h->nbuckets * sizeof (h->buckets[0]);
44  h->buckets = BV (alloc_aligned) (h, bucket_size);
46  h->instantiated = 1;
47 }
48 
49 void BV (clib_bihash_init2) (BVT (clib_bihash_init2_args) * a)
50 {
51  int i;
52  void *oldheap;
53  BVT (clib_bihash) * h = a->h;
54 
55  a->nbuckets = 1 << (max_log2 (a->nbuckets));
56 
57  h->name = (u8 *) a->name;
58  h->nbuckets = a->nbuckets;
59  h->log2_nbuckets = max_log2 (a->nbuckets);
60  h->memory_size = a->memory_size;
61  h->instantiated = 0;
62  h->fmt_fn = a->fmt_fn;
63 
64  alloc_arena (h) = 0;
65 
66  /*
67  * Make sure the requested size is rational. The max table
68  * size without playing the alignment card is 64 Gbytes.
69  * If someone starts complaining that's not enough, we can shift
70  * the offset by CLIB_LOG2_CACHE_LINE_BYTES...
71  */
72  ASSERT (h->memory_size < (1ULL << BIHASH_BUCKET_OFFSET_BITS));
73 
74  /* Add this hash table to the list */
75  if (a->dont_add_to_all_bihash_list == 0)
76  {
77  for (i = 0; i < vec_len (clib_all_bihashes); i++)
78  if (clib_all_bihashes[i] == h)
79  goto do_lock;
80  oldheap = clib_all_bihash_set_heap ();
81  vec_add1 (clib_all_bihashes, (void *) h);
82  clib_mem_set_heap (oldheap);
83  }
84 
85 do_lock:
86  if (h->alloc_lock)
87  clib_mem_free ((void *) h->alloc_lock);
88 
89  /*
90  * Set up the lock now, so we can use it to make the first add
91  * thread-safe
92  */
95  h->alloc_lock[0] = 0;
96 
97  if (a->instantiate_immediately)
98  BV (clib_bihash_instantiate) (h);
99 }
100 
101 void BV (clib_bihash_init)
102  (BVT (clib_bihash) * h, char *name, u32 nbuckets, uword memory_size)
103 {
104  BVT (clib_bihash_init2_args) _a, *a = &_a;
105 
106  memset (a, 0, sizeof (*a));
107 
108  a->h = h;
109  a->name = name;
110  a->nbuckets = nbuckets;
111  a->memory_size = memory_size;
112 
113  BV (clib_bihash_init2) (a);
114 }
115 
116 #if BIHASH_32_64_SVM
117 #if !defined (MFD_ALLOW_SEALING)
118 #define MFD_ALLOW_SEALING 0x0002U
119 #endif
120 
121 void BV (clib_bihash_initiator_init_svm)
122  (BVT (clib_bihash) * h, char *name, u32 nbuckets, u64 memory_size)
123 {
124  uword bucket_size;
125  u8 *mmap_addr;
126  vec_header_t *freelist_vh;
127  int fd;
128 
129  ASSERT (memory_size < (1ULL << 32));
130  /* Set up for memfd sharing */
131  if ((fd = memfd_create (name, MFD_ALLOW_SEALING)) == -1)
132  {
133  clib_unix_warning ("memfd_create");
134  return;
135  }
136 
137  if (ftruncate (fd, memory_size) < 0)
138  {
139  clib_unix_warning ("ftruncate");
140  return;
141  }
142 
143  /* Not mission-critical, complain and continue */
144  if ((fcntl (fd, F_ADD_SEALS, F_SEAL_SHRINK)) == -1)
145  clib_unix_warning ("fcntl (F_ADD_SEALS)");
146 
147  mmap_addr = mmap (0, memory_size,
148  PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0 /* offset */ );
149 
150  if (mmap_addr == MAP_FAILED)
151  {
152  clib_unix_warning ("mmap failed");
153  ASSERT (0);
154  }
155 
156  h->sh = (void *) mmap_addr;
157  h->memfd = fd;
158  nbuckets = 1 << (max_log2 (nbuckets));
159 
160  h->name = (u8 *) name;
161  h->sh->nbuckets = h->nbuckets = nbuckets;
162  h->log2_nbuckets = max_log2 (nbuckets);
163 
164  alloc_arena (h) = (u64) (uword) mmap_addr;
165  alloc_arena_next (h) = CLIB_CACHE_LINE_BYTES;
166  alloc_arena_size (h) = memory_size;
167 
168  bucket_size = nbuckets * sizeof (h->buckets[0]);
169  h->buckets = BV (alloc_aligned) (h, bucket_size);
170  h->sh->buckets_as_u64 = (u64) BV (clib_bihash_get_offset) (h, h->buckets);
171 
172  h->alloc_lock = BV (alloc_aligned) (h, CLIB_CACHE_LINE_BYTES);
173  h->alloc_lock[0] = 0;
174 
175  h->sh->alloc_lock_as_u64 =
176  (u64) BV (clib_bihash_get_offset) (h, (void *) h->alloc_lock);
177  freelist_vh =
178  BV (alloc_aligned) (h,
179  sizeof (vec_header_t) +
180  BIHASH_FREELIST_LENGTH * sizeof (u64));
181  freelist_vh->len = BIHASH_FREELIST_LENGTH;
182  freelist_vh->dlmalloc_header_offset = 0xDEADBEEF;
183  h->sh->freelists_as_u64 =
184  (u64) BV (clib_bihash_get_offset) (h, freelist_vh->vector_data);
185  h->freelists = (void *) (freelist_vh->vector_data);
186 
187  h->fmt_fn = NULL;
188  h->instantiated = 1;
189 }
190 
191 void BV (clib_bihash_responder_init_svm)
192  (BVT (clib_bihash) * h, char *name, int fd)
193 {
194  u8 *mmap_addr;
196  BVT (clib_bihash_shared_header) * sh;
197 
198  /* Trial mapping, to learn the segment size */
199  mmap_addr = mmap (0, 4096, PROT_READ, MAP_SHARED, fd, 0 /* offset */ );
200  if (mmap_addr == MAP_FAILED)
201  {
202  clib_unix_warning ("trial mmap failed");
203  ASSERT (0);
204  }
205 
206  sh = (BVT (clib_bihash_shared_header) *) mmap_addr;
207 
208  memory_size = sh->alloc_arena_size;
209 
210  munmap (mmap_addr, 4096);
211 
212  /* Actual mapping, at the required size */
213  mmap_addr = mmap (0, memory_size,
214  PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0 /* offset */ );
215 
216  if (mmap_addr == MAP_FAILED)
217  {
218  clib_unix_warning ("mmap failed");
219  ASSERT (0);
220  }
221 
222  (void) close (fd);
223 
224  h->sh = (void *) mmap_addr;
225  alloc_arena (h) = (u64) (uword) mmap_addr;
226  h->memfd = -1;
227 
228  h->name = (u8 *) name;
229  h->buckets = BV (clib_bihash_get_value) (h, h->sh->buckets_as_u64);
230  h->nbuckets = h->sh->nbuckets;
231  h->log2_nbuckets = max_log2 (h->nbuckets);
232 
233  h->alloc_lock = BV (clib_bihash_get_value) (h, h->sh->alloc_lock_as_u64);
234  h->freelists = BV (clib_bihash_get_value) (h, h->sh->freelists_as_u64);
235  h->fmt_fn = NULL;
236 }
237 #endif /* BIHASH_32_64_SVM */
238 
239 void BV (clib_bihash_set_kvp_format_fn) (BVT (clib_bihash) * h,
240  format_function_t * fmt_fn)
241 {
242  h->fmt_fn = fmt_fn;
243 }
244 
245 void BV (clib_bihash_free) (BVT (clib_bihash) * h)
246 {
247  int i;
248 
249  if (PREDICT_FALSE (h->instantiated == 0))
250  goto never_initialized;
251 
252  h->instantiated = 0;
253  vec_free (h->working_copies);
254  vec_free (h->working_copy_lengths);
255 #if BIHASH_32_64_SVM == 0
256  vec_free (h->freelists);
257 #else
258  if (h->memfd > 0)
259  (void) close (h->memfd);
260 #endif
261  clib_mem_vm_free ((void *) (uword) (alloc_arena (h)), alloc_arena_size (h));
262 never_initialized:
263  clib_memset (h, 0, sizeof (*h));
264  for (i = 0; i < vec_len (clib_all_bihashes); i++)
265  {
266  if ((void *) h == clib_all_bihashes[i])
267  {
269  return;
270  }
271  }
272  clib_warning ("Couldn't find hash table %llx on clib_all_bihashes...",
273  (u64) (uword) h);
274 }
275 
276 static
278 BV (value_alloc) (BVT (clib_bihash) * h, u32 log2_pages)
279 {
280  BVT (clib_bihash_value) * rv = 0;
281 
282  ASSERT (h->alloc_lock[0]);
283 
284 #if BIHASH_32_64_SVM
285  ASSERT (log2_pages < vec_len (h->freelists));
286 #endif
287 
288  if (log2_pages >= vec_len (h->freelists) || h->freelists[log2_pages] == 0)
289  {
290  vec_validate_init_empty (h->freelists, log2_pages, 0);
291  rv = BV (alloc_aligned) (h, (sizeof (*rv) * (1 << log2_pages)));
292  goto initialize;
293  }
294  rv = BV (clib_bihash_get_value) (h, (uword) h->freelists[log2_pages]);
295  h->freelists[log2_pages] = rv->next_free_as_u64;
296 
297 initialize:
298  ASSERT (rv);
299  /*
300  * Latest gcc complains that the length arg is zero
301  * if we replace (1<<log2_pages) with vec_len(rv).
302  * No clue.
303  */
304  clib_memset (rv, 0xff, sizeof (*rv) * (1 << log2_pages));
305  return rv;
306 }
307 
308 static void
309 BV (value_free) (BVT (clib_bihash) * h, BVT (clib_bihash_value) * v,
310  u32 log2_pages)
311 {
312  ASSERT (h->alloc_lock[0]);
313 
314  ASSERT (vec_len (h->freelists) > log2_pages);
315 
316  if (CLIB_DEBUG > 0)
317  clib_memset (v, 0xFE, sizeof (*v) * (1 << log2_pages));
318 
319  v->next_free_as_u64 = (u64) h->freelists[log2_pages];
320  h->freelists[log2_pages] = (u64) BV (clib_bihash_get_offset) (h, v);
321 }
322 
323 static inline void
324 BV (make_working_copy) (BVT (clib_bihash) * h, BVT (clib_bihash_bucket) * b)
325 {
326  BVT (clib_bihash_value) * v;
327  BVT (clib_bihash_bucket) working_bucket __attribute__ ((aligned (8)));
328  BVT (clib_bihash_value) * working_copy;
329  u32 thread_index = os_get_thread_index ();
330  int log2_working_copy_length;
331 
332  ASSERT (h->alloc_lock[0]);
333 
334  if (thread_index >= vec_len (h->working_copies))
335  {
336  vec_validate (h->working_copies, thread_index);
337  vec_validate_init_empty (h->working_copy_lengths, thread_index, ~0);
338  }
339 
340  /*
341  * working_copies are per-cpu so that near-simultaneous
342  * updates from multiple threads will not result in sporadic, spurious
343  * lookup failures.
344  */
345  working_copy = h->working_copies[thread_index];
346  log2_working_copy_length = h->working_copy_lengths[thread_index];
347 
348  h->saved_bucket.as_u64 = b->as_u64;
349 
350  if (b->log2_pages > log2_working_copy_length)
351  {
352  /*
353  * It's not worth the bookkeeping to free working copies
354  * if (working_copy)
355  * clib_mem_free (working_copy);
356  */
357  working_copy = BV (alloc_aligned)
358  (h, sizeof (working_copy[0]) * (1 << b->log2_pages));
359  h->working_copy_lengths[thread_index] = b->log2_pages;
360  h->working_copies[thread_index] = working_copy;
361 
362  BV (clib_bihash_increment_stat) (h, BIHASH_STAT_working_copy_lost,
363  1ULL << b->log2_pages);
364  }
365 
366  v = BV (clib_bihash_get_value) (h, b->offset);
367 
368  clib_memcpy_fast (working_copy, v, sizeof (*v) * (1 << b->log2_pages));
369  working_bucket.as_u64 = b->as_u64;
370  working_bucket.offset = BV (clib_bihash_get_offset) (h, working_copy);
372  b->as_u64 = working_bucket.as_u64;
373  h->working_copies[thread_index] = working_copy;
374 }
375 
376 static
378 BV (split_and_rehash)
379  (BVT (clib_bihash) * h,
380  BVT (clib_bihash_value) * old_values, u32 old_log2_pages,
381  u32 new_log2_pages)
382 {
383  BVT (clib_bihash_value) * new_values, *new_v;
384  int i, j, length_in_kvs;
385 
386  ASSERT (h->alloc_lock[0]);
387 
388  new_values = BV (value_alloc) (h, new_log2_pages);
389  length_in_kvs = (1 << old_log2_pages) * BIHASH_KVP_PER_PAGE;
390 
391  for (i = 0; i < length_in_kvs; i++)
392  {
393  u64 new_hash;
394 
395  /* Entry not in use? Forget it */
396  if (BV (clib_bihash_is_free) (&(old_values->kvp[i])))
397  continue;
398 
399  /* rehash the item onto its new home-page */
400  new_hash = BV (clib_bihash_hash) (&(old_values->kvp[i]));
401  new_hash >>= h->log2_nbuckets;
402  new_hash &= (1 << new_log2_pages) - 1;
403  new_v = &new_values[new_hash];
404 
405  /* Across the new home-page */
406  for (j = 0; j < BIHASH_KVP_PER_PAGE; j++)
407  {
408  /* Empty slot */
409  if (BV (clib_bihash_is_free) (&(new_v->kvp[j])))
410  {
411  clib_memcpy_fast (&(new_v->kvp[j]), &(old_values->kvp[i]),
412  sizeof (new_v->kvp[j]));
413  goto doublebreak;
414  }
415  }
416  /* Crap. Tell caller to try again */
417  BV (value_free) (h, new_values, new_log2_pages);
418  return 0;
419  doublebreak:;
420  }
421 
422  return new_values;
423 }
424 
425 static
428  (BVT (clib_bihash) * h,
429  BVT (clib_bihash_value) * old_values, u32 old_log2_pages,
430  u32 new_log2_pages)
431 {
432  BVT (clib_bihash_value) * new_values;
433  int i, j, new_length, old_length;
434 
435  ASSERT (h->alloc_lock[0]);
436 
437  new_values = BV (value_alloc) (h, new_log2_pages);
438  new_length = (1 << new_log2_pages) * BIHASH_KVP_PER_PAGE;
439  old_length = (1 << old_log2_pages) * BIHASH_KVP_PER_PAGE;
440 
441  j = 0;
442  /* Across the old value array */
443  for (i = 0; i < old_length; i++)
444  {
445  /* Find a free slot in the new linear scan bucket */
446  for (; j < new_length; j++)
447  {
448  /* Old value not in use? Forget it. */
449  if (BV (clib_bihash_is_free) (&(old_values->kvp[i])))
450  goto doublebreak;
451 
452  /* New value should never be in use */
453  if (BV (clib_bihash_is_free) (&(new_values->kvp[j])))
454  {
455  /* Copy the old value and move along */
456  clib_memcpy_fast (&(new_values->kvp[j]), &(old_values->kvp[i]),
457  sizeof (new_values->kvp[j]));
458  j++;
459  goto doublebreak;
460  }
461  }
462  /* This should never happen... */
463  clib_warning ("BUG: linear rehash failed!");
464  BV (value_free) (h, new_values, new_log2_pages);
465  return 0;
466 
467  doublebreak:;
468  }
469  return new_values;
470 }
471 
472 static inline int BV (clib_bihash_add_del_inline)
473  (BVT (clib_bihash) * h, BVT (clib_bihash_kv) * add_v, int is_add,
474  int (*is_stale_cb) (BVT (clib_bihash_kv) *, void *), void *arg)
475 {
476  u32 bucket_index;
477  BVT (clib_bihash_bucket) * b, tmp_b;
478  BVT (clib_bihash_value) * v, *new_v, *save_new_v, *working_copy;
479  int i, limit;
480  u64 hash, new_hash;
481  u32 new_log2_pages, old_log2_pages;
482  u32 thread_index = os_get_thread_index ();
483  int mark_bucket_linear;
484  int resplit_once;
485 
486  /*
487  * Create the table (is_add=1,2), or flunk the request now (is_add=0)
488  * Use the alloc_lock to protect the instantiate operation.
489  */
490  if (PREDICT_FALSE (h->instantiated == 0))
491  {
492  if (is_add == 0)
493  return (-1);
494 
495  BV (clib_bihash_alloc_lock) (h);
496  if (h->instantiated == 0)
497  BV (clib_bihash_instantiate) (h);
498  BV (clib_bihash_alloc_unlock) (h);
499  }
500 
501  hash = BV (clib_bihash_hash) (add_v);
502 
503  bucket_index = hash & (h->nbuckets - 1);
504  b = &h->buckets[bucket_index];
505 
506  hash >>= h->log2_nbuckets;
507 
508  BV (clib_bihash_lock_bucket) (b);
509 
510  /* First elt in the bucket? */
511  if (BV (clib_bihash_bucket_is_empty) (b))
512  {
513  if (is_add == 0)
514  {
515  BV (clib_bihash_unlock_bucket) (b);
516  return (-1);
517  }
518 
519  BV (clib_bihash_alloc_lock) (h);
520  v = BV (value_alloc) (h, 0);
521  BV (clib_bihash_alloc_unlock) (h);
522 
523  *v->kvp = *add_v;
524  tmp_b.as_u64 = 0; /* clears bucket lock */
525  tmp_b.offset = BV (clib_bihash_get_offset) (h, v);
526  tmp_b.refcnt = 1;
528 
529  b->as_u64 = tmp_b.as_u64; /* unlocks the bucket */
530  BV (clib_bihash_increment_stat) (h, BIHASH_STAT_alloc_add, 1);
531 
532  return (0);
533  }
534 
535  /* WARNING: we're still looking at the live copy... */
536  limit = BIHASH_KVP_PER_PAGE;
537  v = BV (clib_bihash_get_value) (h, b->offset);
538 
539  v += (b->linear_search == 0) ? hash & ((1 << b->log2_pages) - 1) : 0;
540  if (b->linear_search)
541  limit <<= b->log2_pages;
542 
543  if (is_add)
544  {
545  /*
546  * Because reader threads are looking at live data,
547  * we have to be extra careful. Readers do NOT hold the
548  * bucket lock. We need to be SLOWER than a search, past the
549  * point where readers CHECK the bucket lock.
550  */
551 
552  /*
553  * For obvious (in hindsight) reasons, see if we're supposed to
554  * replace an existing key, then look for an empty slot.
555  */
556  for (i = 0; i < limit; i++)
557  {
558  if (BV (clib_bihash_key_compare) (v->kvp[i].key, add_v->key))
559  {
560  /* Add but do not overwrite? */
561  if (is_add == 2)
562  {
563  BV (clib_bihash_unlock_bucket) (b);
564  return (-2);
565  }
566 
567  CLIB_MEMORY_BARRIER (); /* Add a delay */
568  clib_memcpy_fast (&(v->kvp[i]), add_v, sizeof (*add_v));
569  BV (clib_bihash_unlock_bucket) (b);
570  BV (clib_bihash_increment_stat) (h, BIHASH_STAT_replace, 1);
571  return (0);
572  }
573  }
574  /*
575  * Look for an empty slot. If found, use it
576  */
577  for (i = 0; i < limit; i++)
578  {
579  if (BV (clib_bihash_is_free) (&(v->kvp[i])))
580  {
581  /*
582  * Copy the value first, so that if a reader manages
583  * to match the new key, the value will be right...
584  */
585  clib_memcpy_fast (&(v->kvp[i].value),
586  &add_v->value, sizeof (add_v->value));
587  CLIB_MEMORY_BARRIER (); /* Make sure the value has settled */
588  clib_memcpy_fast (&(v->kvp[i]), &add_v->key,
589  sizeof (add_v->key));
590  b->refcnt++;
591  ASSERT (b->refcnt > 0);
592  BV (clib_bihash_unlock_bucket) (b);
593  BV (clib_bihash_increment_stat) (h, BIHASH_STAT_add, 1);
594  return (0);
595  }
596  }
597  /* look for stale data to overwrite */
598  if (is_stale_cb)
599  {
600  for (i = 0; i < limit; i++)
601  {
602  if (is_stale_cb (&(v->kvp[i]), arg))
603  {
605  clib_memcpy_fast (&(v->kvp[i]), add_v, sizeof (*add_v));
606  BV (clib_bihash_unlock_bucket) (b);
607  BV (clib_bihash_increment_stat) (h, BIHASH_STAT_replace, 1);
608  return (0);
609  }
610  }
611  }
612  /* Out of space in this bucket, split the bucket... */
613  }
614  else /* delete case */
615  {
616  for (i = 0; i < limit; i++)
617  {
618  /* Found the key? Kill it... */
619  if (BV (clib_bihash_key_compare) (v->kvp[i].key, add_v->key))
620  {
621  clib_memset (&(v->kvp[i]), 0xff, sizeof (*(add_v)));
622  /* Is the bucket empty? */
623  if (PREDICT_TRUE (b->refcnt > 1))
624  {
625  b->refcnt--;
626  BV (clib_bihash_unlock_bucket) (b);
627  BV (clib_bihash_increment_stat) (h, BIHASH_STAT_del, 1);
628  return (0);
629  }
630  else /* yes, free it */
631  {
632  /* Save old bucket value, need log2_pages to free it */
633  tmp_b.as_u64 = b->as_u64;
635 
636  /* Kill and unlock the bucket */
637  b->as_u64 = 0;
638 
639  /* And free the backing storage */
640  BV (clib_bihash_alloc_lock) (h);
641  /* Note: v currently points into the middle of the bucket */
642  v = BV (clib_bihash_get_value) (h, tmp_b.offset);
643  BV (value_free) (h, v, tmp_b.log2_pages);
644  BV (clib_bihash_alloc_unlock) (h);
645  BV (clib_bihash_increment_stat) (h, BIHASH_STAT_del_free,
646  1);
647  return (0);
648  }
649  }
650  }
651  /* Not found... */
652  BV (clib_bihash_unlock_bucket) (b);
653  return (-3);
654  }
655 
656  /* Move readers to a (locked) temp copy of the bucket */
657  BV (clib_bihash_alloc_lock) (h);
658  BV (make_working_copy) (h, b);
659 
660  v = BV (clib_bihash_get_value) (h, h->saved_bucket.offset);
661 
662  old_log2_pages = h->saved_bucket.log2_pages;
663  new_log2_pages = old_log2_pages + 1;
664  mark_bucket_linear = 0;
665  BV (clib_bihash_increment_stat) (h, BIHASH_STAT_split_add, 1);
666  BV (clib_bihash_increment_stat) (h, BIHASH_STAT_splits, old_log2_pages);
667 
668  working_copy = h->working_copies[thread_index];
669  resplit_once = 0;
670  BV (clib_bihash_increment_stat) (h, BIHASH_STAT_splits, 1);
671 
672  new_v = BV (split_and_rehash) (h, working_copy, old_log2_pages,
673  new_log2_pages);
674  if (new_v == 0)
675  {
676  try_resplit:
677  resplit_once = 1;
678  new_log2_pages++;
679  /* Try re-splitting. If that fails, fall back to linear search */
680  new_v = BV (split_and_rehash) (h, working_copy, old_log2_pages,
681  new_log2_pages);
682  if (new_v == 0)
683  {
684  mark_linear:
685  new_log2_pages--;
686  /* pinned collisions, use linear search */
687  new_v =
688  BV (split_and_rehash_linear) (h, working_copy, old_log2_pages,
689  new_log2_pages);
690  mark_bucket_linear = 1;
691  BV (clib_bihash_increment_stat) (h, BIHASH_STAT_linear, 1);
692  }
693  BV (clib_bihash_increment_stat) (h, BIHASH_STAT_resplit, 1);
694  BV (clib_bihash_increment_stat) (h, BIHASH_STAT_splits,
695  old_log2_pages + 1);
696  }
697 
698  /* Try to add the new entry */
699  save_new_v = new_v;
700  new_hash = BV (clib_bihash_hash) (add_v);
701  limit = BIHASH_KVP_PER_PAGE;
702  if (mark_bucket_linear)
703  limit <<= new_log2_pages;
704  new_hash >>= h->log2_nbuckets;
705  new_hash &= (1 << new_log2_pages) - 1;
706  new_v += mark_bucket_linear ? 0 : new_hash;
707 
708  for (i = 0; i < limit; i++)
709  {
710  if (BV (clib_bihash_is_free) (&(new_v->kvp[i])))
711  {
712  clib_memcpy_fast (&(new_v->kvp[i]), add_v, sizeof (*add_v));
713  goto expand_ok;
714  }
715  }
716 
717  /* Crap. Try again */
718  BV (value_free) (h, save_new_v, new_log2_pages);
719  /*
720  * If we've already doubled the size of the bucket once,
721  * fall back to linear search now.
722  */
723  if (resplit_once)
724  goto mark_linear;
725  else
726  goto try_resplit;
727 
728 expand_ok:
729  tmp_b.log2_pages = new_log2_pages;
730  tmp_b.offset = BV (clib_bihash_get_offset) (h, save_new_v);
731  tmp_b.linear_search = mark_bucket_linear;
732  tmp_b.refcnt = h->saved_bucket.refcnt + 1;
733  ASSERT (tmp_b.refcnt > 0);
734  tmp_b.lock = 0;
736  b->as_u64 = tmp_b.as_u64;
737  /* free the old bucket */
738  v = BV (clib_bihash_get_value) (h, h->saved_bucket.offset);
739  BV (value_free) (h, v, h->saved_bucket.log2_pages);
740  BV (clib_bihash_alloc_unlock) (h);
741  return (0);
742 }
743 
744 int BV (clib_bihash_add_del)
745  (BVT (clib_bihash) * h, BVT (clib_bihash_kv) * add_v, int is_add)
746 {
747  return BV (clib_bihash_add_del_inline) (h, add_v, is_add, 0, 0);
748 }
749 
750 int BV (clib_bihash_add_or_overwrite_stale)
751  (BVT (clib_bihash) * h, BVT (clib_bihash_kv) * add_v,
752  int (*stale_callback) (BVT (clib_bihash_kv) *, void *), void *arg)
753 {
754  return BV (clib_bihash_add_del_inline) (h, add_v, 1, stale_callback, arg);
755 }
756 
757 int BV (clib_bihash_search)
758  (BVT (clib_bihash) * h,
759  BVT (clib_bihash_kv) * search_key, BVT (clib_bihash_kv) * valuep)
760 {
761  u64 hash;
762  u32 bucket_index;
763  BVT (clib_bihash_value) * v;
764  BVT (clib_bihash_bucket) * b;
765  int i, limit;
766 
767  ASSERT (valuep);
768 
769  if (PREDICT_FALSE (alloc_arena (h) == 0))
770  return -1;
771 
772  hash = BV (clib_bihash_hash) (search_key);
773 
774  bucket_index = hash & (h->nbuckets - 1);
775  b = &h->buckets[bucket_index];
776 
777  if (BV (clib_bihash_bucket_is_empty) (b))
778  return -1;
779 
780  if (PREDICT_FALSE (b->lock))
781  {
782  volatile BVT (clib_bihash_bucket) * bv = b;
783  while (bv->lock)
784  CLIB_PAUSE ();
785  }
786 
787  hash >>= h->log2_nbuckets;
788 
789  v = BV (clib_bihash_get_value) (h, b->offset);
790  limit = BIHASH_KVP_PER_PAGE;
791  v += (b->linear_search == 0) ? hash & ((1 << b->log2_pages) - 1) : 0;
792  if (PREDICT_FALSE (b->linear_search))
793  limit <<= b->log2_pages;
794 
795  for (i = 0; i < limit; i++)
796  {
797  if (BV (clib_bihash_key_compare) (v->kvp[i].key, search_key->key))
798  {
799  *valuep = v->kvp[i];
800  return 0;
801  }
802  }
803  return -1;
804 }
805 
806 u8 *BV (format_bihash) (u8 * s, va_list * args)
807 {
808  BVT (clib_bihash) * h = va_arg (*args, BVT (clib_bihash) *);
809  int verbose = va_arg (*args, int);
810  BVT (clib_bihash_bucket) * b;
811  BVT (clib_bihash_value) * v;
812  int i, j, k;
813  u64 active_elements = 0;
814  u64 active_buckets = 0;
815  u64 linear_buckets = 0;
816  u64 used_bytes;
817 
818  s = format (s, "Hash table %s\n", h->name ? h->name : (u8 *) "(unnamed)");
819 
820  if (PREDICT_FALSE (alloc_arena (h) == 0))
821  return format (s, "[empty, uninitialized]");
822 
823  for (i = 0; i < h->nbuckets; i++)
824  {
825  b = &h->buckets[i];
826  if (BV (clib_bihash_bucket_is_empty) (b))
827  {
828  if (verbose > 1)
829  s = format (s, "[%d]: empty\n", i);
830  continue;
831  }
832 
833  active_buckets++;
834 
835  if (b->linear_search)
836  linear_buckets++;
837 
838  if (verbose)
839  {
840  s = format (s, "[%d]: heap offset %lld, len %d, linear %d\n", i,
841  b->offset, (1 << b->log2_pages), b->linear_search);
842  }
843 
844  v = BV (clib_bihash_get_value) (h, b->offset);
845  for (j = 0; j < (1 << b->log2_pages); j++)
846  {
847  for (k = 0; k < BIHASH_KVP_PER_PAGE; k++)
848  {
849  if (BV (clib_bihash_is_free) (&v->kvp[k]))
850  {
851  if (verbose > 1)
852  s = format (s, " %d: empty\n",
853  j * BIHASH_KVP_PER_PAGE + k);
854  continue;
855  }
856  if (verbose)
857  {
858  if (h->fmt_fn)
859  {
860  s = format (s, " %d: %U\n",
861  j * BIHASH_KVP_PER_PAGE + k,
862  h->fmt_fn, &(v->kvp[k]), verbose);
863  }
864  else
865  {
866  s = format (s, " %d: %U\n",
867  j * BIHASH_KVP_PER_PAGE + k,
868  BV (format_bihash_kvp), &(v->kvp[k]));
869  }
870  }
871  active_elements++;
872  }
873  v++;
874  }
875  }
876 
877  s = format (s, " %lld active elements %lld active buckets\n",
878  active_elements, active_buckets);
879  s = format (s, " %d free lists\n", vec_len (h->freelists));
880 
881  for (i = 0; i < vec_len (h->freelists); i++)
882  {
883  u32 nfree = 0;
884  BVT (clib_bihash_value) * free_elt;
885  u64 free_elt_as_u64 = h->freelists[i];
886 
887  while (free_elt_as_u64)
888  {
889  free_elt = BV (clib_bihash_get_value) (h, free_elt_as_u64);
890  nfree++;
891  free_elt_as_u64 = free_elt->next_free_as_u64;
892  }
893 
894  if (nfree || verbose)
895  s = format (s, " [len %d] %u free elts\n", 1 << i, nfree);
896  }
897 
898  s = format (s, " %lld linear search buckets\n", linear_buckets);
899  used_bytes = alloc_arena_next (h);
900  s = format (s,
901  " arena: base %llx, next %llx\n"
902  " used %lld b (%lld Mbytes) of %lld b (%lld Mbytes)\n",
903  alloc_arena (h), alloc_arena_next (h),
904  used_bytes, used_bytes >> 20,
905  alloc_arena_size (h), alloc_arena_size (h) >> 20);
906  return s;
907 }
908 
910  (BVT (clib_bihash) * h, void *callback, void *arg)
911 {
912  int i, j, k;
913  BVT (clib_bihash_bucket) * b;
914  BVT (clib_bihash_value) * v;
915  void (*fp) (BVT (clib_bihash_kv) *, void *) = callback;
916 
917  if (PREDICT_FALSE (alloc_arena (h) == 0))
918  return;
919 
920  for (i = 0; i < h->nbuckets; i++)
921  {
922  b = &h->buckets[i];
923  if (BV (clib_bihash_bucket_is_empty) (b))
924  continue;
925 
926  v = BV (clib_bihash_get_value) (h, b->offset);
927  for (j = 0; j < (1 << b->log2_pages); j++)
928  {
929  for (k = 0; k < BIHASH_KVP_PER_PAGE; k++)
930  {
931  if (BV (clib_bihash_is_free) (&v->kvp[k]))
932  continue;
933 
934  (*fp) (&v->kvp[k], arg);
935  /*
936  * In case the callback deletes the last entry in the bucket...
937  */
938  if (BV (clib_bihash_bucket_is_empty) (b))
939  goto doublebreak;
940  }
941  v++;
942  }
943  doublebreak:
944  ;
945  }
946 }
947 
948 /** @endcond */
949 
950 /*
951  * fd.io coding-style-patch-verification: ON
952  *
953  * Local Variables:
954  * eval: (c-set-style "gnu")
955  * End:
956  */
#define vec_validate(V, I)
Make sure vector is long enough for given index (no header, unspecified alignment) ...
Definition: vec.h:439
#define CLIB_PAUSE()
Definition: lock.h:23
#define BIHASH_KVP_PER_PAGE
Definition: bihash_16_8.h:21
a
Definition: bitmap.h:538
void clib_bihash_free(clib_bihash *h)
Destroy a bounded index extensible hash table.
#define PREDICT_TRUE(x)
Definition: clib.h:113
unsigned long u64
Definition: types.h:89
#define clib_memcpy_fast(a, b, c)
Definition: string.h:81
#define F_ADD_SEALS
Definition: mem.c:40
clib_memset(h->entries, 0, sizeof(h->entries[0]) *entries)
static int memfd_create(const char *name, unsigned int flags)
Definition: syscall.h:52
void os_out_of_memory(void)
Definition: unix-misc.c:219
#define vec_add1(V, E)
Add 1 element to end of vector (unspecified alignment).
Definition: vec.h:522
for(i=1;i<=collision_buckets;i++)
int i
u8 * format(u8 *s, const char *fmt,...)
Definition: format.c:424
unsigned char u8
Definition: types.h:56
u8 *() format_function_t(u8 *s, va_list *args)
Definition: format.h:48
#define MFD_ALLOW_SEALING
Definition: main.c:104
int clib_bihash_add_del(clib_bihash *h, clib_bihash_kv *add_v, int is_add)
Add or delete a (key,value) pair from a bi-hash table.
u32 dlmalloc_header_offset
offset to memory allocator offset
Definition: vec_bootstrap.h:61
static BVT(clib_bihash)
Definition: adj_nbr.c:28
unsigned int u32
Definition: types.h:88
#define F_SEAL_SHRINK
Definition: mem.c:44
static uword clib_bihash_get_offset(clib_bihash *h, void *v)
Get clib mheap offset given a pointer.
u64 memory_size
Definition: vhost_user.h:141
static vnet_classify_entry_t * split_and_rehash_linear(vnet_classify_table_t *t, vnet_classify_entry_t *old_values, u32 old_log2_pages, u32 new_log2_pages)
#define PREDICT_FALSE(x)
Definition: clib.h:112
void clib_bihash_init(clib_bihash *h, char *name, u32 nbuckets, uword memory_size)
initialize a bounded index extensible hash table
u8 name[64]
Definition: memclnt.api:152
void clib_bihash_foreach_key_value_pair(clib_bihash *h, void *callback, void *arg)
Visit active (key,value) pairs in a bi-hash table.
static vnet_classify_entry_t * split_and_rehash(vnet_classify_table_t *t, vnet_classify_entry_t *old_values, u32 old_log2_pages, u32 new_log2_pages)
#define vec_free(V)
Free vector&#39;s memory (no header).
Definition: vec.h:341
static void * clib_mem_set_heap(void *heap)
Definition: mem.h:290
#define clib_warning(format, args...)
Definition: error.h:59
u32 len
Number of elements in vector (NOT its allocated length).
Definition: vec_bootstrap.h:60
static void make_working_copy(vnet_classify_table_t *t, vnet_classify_bucket_t *b)
#define ASSERT(truth)
#define vec_delete(V, N, M)
Delete N elements starting at element M.
Definition: vec.h:784
static void clib_mem_free(void *p)
Definition: mem.h:226
u8 log2_pages
Definition: bihash_doc.h:62
vector header structure
Definition: vec_bootstrap.h:55
template key/value backing page structure
Definition: bihash_doc.h:44
static void clib_mem_vm_free(void *addr, uword size)
Definition: mem.h:356
u8 vector_data[0]
Vector data .
Definition: vec_bootstrap.h:63
#define vec_len(v)
Number of elements in vector (rvalue-only, NULL tolerant)
static uword max_log2(uword x)
Definition: clib.h:192
u64 uword
Definition: types.h:112
#define clib_unix_warning(format, args...)
Definition: error.h:68
static_always_inline uword os_get_thread_index(void)
Definition: os.h:62
static void * clib_mem_alloc_aligned(uword size, uword align)
Definition: mem.h:161
#define CLIB_MEMORY_BARRIER()
Definition: clib.h:116
static void * clib_bihash_get_value(clib_bihash *h, uword offset)
Get pointer to value page given its clib mheap offset.
void ** clib_all_bihashes
#define vec_validate_init_empty(V, I, INIT)
Make sure vector is long enough for given index and initialize empty space (no header, unspecified alignment)
Definition: vec.h:486
#define CLIB_CACHE_LINE_BYTES
Definition: cache.h:59
void * clib_all_bihash_set_heap(void)
static void * clib_mem_vm_alloc(uword size)
Definition: mem.h:339
static void * alloc_aligned(uword size, uword log2_align, void **ptr_to_free)
Definition: test_vec.h:191