FD.io VPP  v21.01.1
Vector Packet Processing
svm.c
Go to the documentation of this file.
1 /*
2  *------------------------------------------------------------------
3  * svm.c - shared VM allocation, mmap(...MAP_FIXED...)
4  * library
5  *
6  * Copyright (c) 2009 Cisco and/or its affiliates.
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at:
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *------------------------------------------------------------------
19  */
20 
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <sys/types.h>
24 #include <sys/mman.h>
25 #include <sys/stat.h>
26 #include <netinet/in.h>
27 #include <signal.h>
28 #include <pthread.h>
29 #include <unistd.h>
30 #include <time.h>
31 #include <fcntl.h>
32 #include <string.h>
33 #include <vppinfra/clib.h>
34 #include <vppinfra/vec.h>
35 #include <vppinfra/hash.h>
36 #include <vppinfra/bitmap.h>
37 #include <vppinfra/fifo.h>
38 #include <vppinfra/time.h>
39 #include <vppinfra/heap.h>
40 #include <vppinfra/pool.h>
41 #include <vppinfra/format.h>
42 
43 #include "svm.h"
44 
46 static int root_rp_refcount;
47 
48 #define MAXLOCK 2
49 static pthread_mutex_t *mutexes_held[MAXLOCK];
50 static int nheld;
51 
54 {
55  return root_rp;
56 }
57 
58 #define MUTEX_DEBUG
59 
60 u64
62 {
63 #ifdef CLIB_SANITIZE_ADDR
64  return 0x200000000000;
65 #endif
66 
67 #if __aarch64__
68  /* On AArch64 VA space can have different size, from 36 to 48 bits.
69  Here we are trying to detect VA bits by parsing /proc/self/maps
70  address ranges */
71  int fd;
72  unformat_input_t input;
73  u64 start, end = 0;
74  u8 bits = 0;
75 
76  if ((fd = open ("/proc/self/maps", 0)) < 0)
77  clib_unix_error ("open '/proc/self/maps'");
78 
79  unformat_init_clib_file (&input, fd);
81  {
82  if (unformat (&input, "%llx-%llx", &start, &end))
83  end--;
84  unformat_skip_line (&input);
85  }
86  unformat_free (&input);
87  close (fd);
88 
89  bits = count_leading_zeros (end);
90  bits = 64 - bits;
91  if (bits >= 36 && bits <= 48)
92  return ((1ul << bits) / 4) - (2 * SVM_GLOBAL_REGION_SIZE);
93  else
94  clib_unix_error ("unexpected va bits '%u'", bits);
95 #endif
96 
97  /* default value */
98  return 0x130000000ULL;
99 }
100 
101 static void
102 region_lock (svm_region_t * rp, int tag)
103 {
104  pthread_mutex_lock (&rp->mutex);
105 #ifdef MUTEX_DEBUG
106  rp->mutex_owner_pid = getpid ();
107  rp->mutex_owner_tag = tag;
108 #endif
109  ASSERT (nheld < MAXLOCK); //NOSONAR
110  /*
111  * Keep score of held mutexes so we can try to exit
112  * cleanly if the world comes to an end at the worst possible
113  * moment
114  */
115  mutexes_held[nheld++] = &rp->mutex;
116 }
117 
118 static void
120 {
121  int i, j;
122 #ifdef MUTEX_DEBUG
123  rp->mutex_owner_pid = 0;
124  rp->mutex_owner_tag = 0;
125 #endif
126 
127  for (i = nheld - 1; i >= 0; i--)
128  {
129  if (mutexes_held[i] == &rp->mutex)
130  {
131  for (j = i; j < MAXLOCK - 1; j++)
132  mutexes_held[j] = mutexes_held[j + 1];
133  nheld--;
134  goto found;
135  }
136  }
137  ASSERT (0);
138 
139 found:
141  pthread_mutex_unlock (&rp->mutex);
142 }
143 
144 
145 static u8 *
146 format_svm_flags (u8 * s, va_list * args)
147 {
148  uword f = va_arg (*args, uword);
149 
150  if (f & SVM_FLAGS_MHEAP)
151  s = format (s, "MHEAP ");
152  if (f & SVM_FLAGS_FILE)
153  s = format (s, "FILE ");
154  if (f & SVM_FLAGS_NODATA)
155  s = format (s, "NODATA ");
156  if (f & SVM_FLAGS_NEED_DATA_INIT)
157  s = format (s, "INIT ");
158 
159  return (s);
160 }
161 
162 static u8 *
163 format_svm_size (u8 * s, va_list * args)
164 {
165  uword size = va_arg (*args, uword);
166 
167  if (size >= (1 << 20))
168  {
169  s = format (s, "(%d mb)", size >> 20);
170  }
171  else if (size >= (1 << 10))
172  {
173  s = format (s, "(%d kb)", size >> 10);
174  }
175  else
176  {
177  s = format (s, "(%d bytes)", size);
178  }
179  return (s);
180 }
181 
182 u8 *
183 format_svm_region (u8 * s, va_list * args)
184 {
185  svm_region_t *rp = va_arg (*args, svm_region_t *);
186  int verbose = va_arg (*args, int);
187  int i;
188  uword lo, hi;
189 
190  s = format (s, "%s: base va 0x%x size 0x%x %U\n",
191  rp->region_name, rp->virtual_base,
193  s = format (s, " user_ctx 0x%x, bitmap_size %d\n",
194  rp->user_ctx, rp->bitmap_size);
195 
196  if (verbose)
197  {
198  s = format (s, " flags: 0x%x %U\n", rp->flags,
199  format_svm_flags, rp->flags);
200  s = format (s,
201  " region_heap 0x%x data_base 0x%x data_heap 0x%x\n",
202  rp->region_heap, rp->data_base, rp->data_heap);
203  }
204 
205  s = format (s, " %d clients, pids: ", vec_len (rp->client_pids));
206 
207  for (i = 0; i < vec_len (rp->client_pids); i++)
208  s = format (s, "%d ", rp->client_pids[i]);
209 
210  s = format (s, "\n");
211 
212  if (verbose)
213  {
214  lo = hi = ~0;
215 
216  s = format (s, " VM in use: ");
217 
218  for (i = 0; i < rp->bitmap_size; i++)
219  {
220  if (clib_bitmap_get_no_check (rp->bitmap, i) != 0)
221  {
222  if (lo == ~0)
223  {
224  hi = lo = rp->virtual_base + i * MMAP_PAGESIZE;
225  }
226  else
227  {
228  hi = rp->virtual_base + i * MMAP_PAGESIZE;
229  }
230  }
231  else
232  {
233  if (lo != ~0)
234  {
235  hi = rp->virtual_base + i * MMAP_PAGESIZE - 1;
236  s = format (s, " 0x%x - 0x%x (%dk)\n", lo, hi,
237  (hi - lo) >> 10);
238  lo = hi = ~0;
239  }
240  }
241  }
242  }
243 
244  return (s);
245 }
246 
247 /*
248  * rnd_pagesize
249  * Round to a pagesize multiple, presumably 4k works
250  */
251 static u64
253 {
254  u64 rv;
255 
256  rv = (size + (MMAP_PAGESIZE - 1)) & ~(MMAP_PAGESIZE - 1);
257  return (rv);
258 }
259 
260 /*
261  * svm_data_region_setup
262  */
263 static int
265 {
266  int fd;
267  u8 junk = 0;
268  uword map_size;
269 
270  map_size = rp->virtual_size - (MMAP_PAGESIZE +
271  (a->pvt_heap_size ? a->pvt_heap_size :
273 
274  if (a->flags & SVM_FLAGS_FILE)
275  {
276  struct stat statb;
277 
278  fd = open (a->backing_file, O_RDWR | O_CREAT, 0777);
279 
280  if (fd < 0)
281  {
282  clib_unix_warning ("open");
283  return -1;
284  }
285 
286  if (fstat (fd, &statb) < 0)
287  {
288  clib_unix_warning ("fstat");
289  close (fd);
290  return -2;
291  }
292 
293  if (statb.st_mode & S_IFREG)
294  {
295  if (statb.st_size == 0)
296  {
297  if (lseek (fd, map_size, SEEK_SET) == (off_t) - 1)
298  {
299  clib_unix_warning ("seek region size");
300  close (fd);
301  return -3;
302  }
303  if (write (fd, &junk, 1) != 1)
304  {
305  clib_unix_warning ("set region size");
306  close (fd);
307  return -3;
308  }
309  }
310  else
311  {
312  map_size = rnd_pagesize (statb.st_size);
313  }
314  }
315  else
316  {
317  map_size = a->backing_mmap_size;
318  }
319 
320  ASSERT (map_size <= rp->virtual_size -
322 
323  if (mmap (rp->data_base, map_size, PROT_READ | PROT_WRITE,
324  MAP_SHARED | MAP_FIXED, fd, 0) == MAP_FAILED)
325  {
326  clib_unix_warning ("mmap");
327  close (fd);
328  return -3;
329  }
330  close (fd);
331  CLIB_MEM_UNPOISON (rp->data_base, map_size);
332  rp->backing_file = (char *) format (0, "%s%c", a->backing_file, 0);
333  rp->flags |= SVM_FLAGS_FILE;
334  }
335 
336  if (a->flags & SVM_FLAGS_MHEAP)
337  {
338  rp->data_heap = clib_mem_create_heap (rp->data_base, map_size,
339  1 /* locked */ , "svm data");
340 
341  rp->flags |= SVM_FLAGS_MHEAP;
342  }
343  return 0;
344 }
345 
346 static int
348 {
349  int fd;
350  u8 junk = 0;
351  uword map_size;
352  struct stat statb;
353 
354  map_size = rp->virtual_size -
357 
358  if (a->flags & SVM_FLAGS_FILE)
359  {
360 
361  fd = open (a->backing_file, O_RDWR, 0777);
362 
363  if (fd < 0)
364  {
365  clib_unix_warning ("open");
366  return -1;
367  }
368 
369  if (fstat (fd, &statb) < 0)
370  {
371  clib_unix_warning ("fstat");
372  close (fd);
373  return -2;
374  }
375 
376  if (statb.st_mode & S_IFREG)
377  {
378  if (statb.st_size == 0)
379  {
380  if (lseek (fd, map_size, SEEK_SET) == (off_t) - 1)
381  {
382  clib_unix_warning ("seek region size");
383  close (fd);
384  return -3;
385  }
386  if (write (fd, &junk, 1) != 1)
387  {
388  clib_unix_warning ("set region size");
389  close (fd);
390  return -3;
391  }
392  }
393  else
394  {
395  map_size = rnd_pagesize (statb.st_size);
396  }
397  }
398  else
399  {
400  map_size = a->backing_mmap_size;
401  }
402 
403  ASSERT (map_size <= rp->virtual_size
404  - (MMAP_PAGESIZE
405  +
407 
408  if (mmap (rp->data_base, map_size, PROT_READ | PROT_WRITE,
409  MAP_SHARED | MAP_FIXED, fd, 0) == MAP_FAILED)
410  {
411  clib_unix_warning ("mmap");
412  close (fd);
413  return -3;
414  }
415  close (fd);
416  CLIB_MEM_UNPOISON (rp->data_base, map_size);
417  }
418  return 0;
419 }
420 
421 u8 *
423 {
424  u8 *shm_name;
425  int root_path_offset = 0;
426  int name_offset = 0;
427 
428  if (a->root_path)
429  {
430  /* Tolerate present or absent slashes */
431  if (a->root_path[0] == '/')
432  root_path_offset++;
433 
434  if (a->name[0] == '/')
435  name_offset = 1;
436 
437  shm_name = format (0, "/%s-%s%c", &a->root_path[root_path_offset],
438  &a->name[name_offset], 0);
439  }
440  else
441  shm_name = format (0, "%s%c", a->name, 0);
442  return (shm_name);
443 }
444 
445 void
447 {
448  pthread_mutexattr_t attr;
449  pthread_condattr_t cattr;
450  int nbits, words, bit;
451  int overhead_space;
452  void *oldheap;
453  uword data_base;
454  ASSERT (rp);
455  int rv;
456 
457  clib_memset (rp, 0, sizeof (*rp));
458 
459  if (pthread_mutexattr_init (&attr))
460  clib_unix_warning ("mutexattr_init");
461 
462  if (pthread_mutexattr_setpshared (&attr, PTHREAD_PROCESS_SHARED))
463  clib_unix_warning ("mutexattr_setpshared");
464 
465  if (pthread_mutex_init (&rp->mutex, &attr))
466  clib_unix_warning ("mutex_init");
467 
468  if (pthread_mutexattr_destroy (&attr))
469  clib_unix_warning ("mutexattr_destroy");
470 
471  if (pthread_condattr_init (&cattr))
472  clib_unix_warning ("condattr_init");
473 
474  if (pthread_condattr_setpshared (&cattr, PTHREAD_PROCESS_SHARED))
475  clib_unix_warning ("condattr_setpshared");
476 
477  if (pthread_cond_init (&rp->condvar, &cattr))
478  clib_unix_warning ("cond_init");
479 
480  if (pthread_condattr_destroy (&cattr))
481  clib_unix_warning ("condattr_destroy");
482 
483  region_lock (rp, 1);
484 
485  rp->virtual_base = a->baseva;
486  rp->virtual_size = a->size;
487 
489  (uword_to_pointer (a->baseva + MMAP_PAGESIZE, void *),
490  (a->pvt_heap_size !=
491  0) ? a->pvt_heap_size : SVM_PVT_MHEAP_SIZE, 1 /* locked */ ,
492  "svm region");
493 
494  oldheap = svm_push_pvt_heap (rp);
495 
496  rp->region_name = (char *) format (0, "%s%c", a->name, 0);
497  vec_add1 (rp->client_pids, getpid ());
498 
499  nbits = rp->virtual_size / MMAP_PAGESIZE;
500 
501  ASSERT (nbits > 0);
502  rp->bitmap_size = nbits;
503  words = (nbits + BITS (uword) - 1) / BITS (uword);
504  vec_validate (rp->bitmap, words - 1);
505 
506  overhead_space = MMAP_PAGESIZE /* header */ +
508 
509  bit = 0;
510  data_base = (uword) rp->virtual_base;
511 
512  if (a->flags & SVM_FLAGS_NODATA)
514 
515  do
516  {
517  clib_bitmap_set_no_check (rp->bitmap, bit, 1);
518  bit++;
519  overhead_space -= MMAP_PAGESIZE;
520  data_base += MMAP_PAGESIZE;
521  }
522  while (overhead_space > 0);
523 
524  rp->data_base = (void *) data_base;
525 
526  /*
527  * Note: although the POSIX spec guarantees that only one
528  * process enters this block, we have to play games
529  * to hold off clients until e.g. the mutex is ready
530  */
531  rp->version = SVM_VERSION;
532 
533  /* setup the data portion of the region */
534 
535  rv = svm_data_region_create (a, rp);
536  if (rv)
537  {
538  clib_warning ("data_region_create: %d", rv);
539  }
540 
541  region_unlock (rp);
542 
543  svm_pop_heap (oldheap);
544 }
545 
546 /*
547  * svm_map_region
548  */
549 void *
551 {
552  int svm_fd;
553  svm_region_t *rp;
554  int deadman = 0;
555  u8 junk = 0;
556  void *oldheap;
557  int rv;
558  int pid_holding_region_lock;
559  u8 *shm_name;
560  int dead_region_recovery = 0;
561  int time_left;
562  struct stat stat;
563  struct timespec ts, tsrem;
564 
565  ASSERT ((a->size & ~(MMAP_PAGESIZE - 1)) == a->size);
566  ASSERT (a->name);
567 
568  shm_name = shm_name_from_svm_map_region_args (a);
569 
570  if (CLIB_DEBUG > 1)
571  clib_warning ("[%d] map region %s: shm_open (%s)",
572  getpid (), a->name, shm_name);
573 
574  svm_fd = shm_open ((char *) shm_name, O_RDWR | O_CREAT | O_EXCL, 0777);
575 
576  if (svm_fd >= 0)
577  {
578  if (fchmod (svm_fd, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP) < 0)
579  clib_unix_warning ("segment chmod");
580  /* This turns out to fail harmlessly if the client starts first */
581  if (fchown (svm_fd, a->uid, a->gid) < 0)
582  clib_unix_warning ("segment chown [ok if client starts first]");
583 
584  vec_free (shm_name);
585 
586  if (lseek (svm_fd, a->size, SEEK_SET) == (off_t) - 1)
587  {
588  clib_warning ("seek region size");
589  close (svm_fd);
590  return (0);
591  }
592  if (write (svm_fd, &junk, 1) != 1)
593  {
594  clib_warning ("set region size");
595  close (svm_fd);
596  return (0);
597  }
598 
599  rp = mmap (uword_to_pointer (a->baseva, void *), a->size,
600  PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, svm_fd, 0);
601 
602  if (rp == (svm_region_t *) MAP_FAILED)
603  {
604  clib_unix_warning ("mmap create");
605  close (svm_fd);
606  return (0);
607  }
608  close (svm_fd);
609  CLIB_MEM_UNPOISON (rp, a->size);
610 
612 
613  return ((void *) rp);
614  }
615  else
616  {
617  svm_fd = shm_open ((char *) shm_name, O_RDWR, 0777);
618 
619  vec_free (shm_name);
620 
621  if (svm_fd < 0)
622  {
623  perror ("svm_region_map(mmap open)");
624  return (0);
625  }
626 
627  /* Reset ownership in case the client started first */
628  if (fchown (svm_fd, a->uid, a->gid) < 0)
629  clib_unix_warning ("segment chown [ok if client starts first]");
630 
631  time_left = 20;
632  while (1)
633  {
634  if (0 != fstat (svm_fd, &stat))
635  {
636  clib_warning ("fstat failed: %d", errno);
637  close (svm_fd);
638  return (0);
639  }
640  if (stat.st_size > 0)
641  {
642  break;
643  }
644  if (0 == time_left)
645  {
646  clib_warning ("waiting for resize of shm file timed out");
647  close (svm_fd);
648  return (0);
649  }
650  ts.tv_sec = 0;
651  ts.tv_nsec = 100000000;
652  while (nanosleep (&ts, &tsrem) < 0)
653  ts = tsrem;
654  time_left--;
655  }
656 
657  rp = mmap (0, MMAP_PAGESIZE,
658  PROT_READ | PROT_WRITE, MAP_SHARED, svm_fd, 0);
659 
660  if (rp == (svm_region_t *) MAP_FAILED)
661  {
662  close (svm_fd);
663  clib_warning ("mmap");
664  return (0);
665  }
666 
668 
669  /*
670  * We lost the footrace to create this region; make sure
671  * the winner has crossed the finish line.
672  */
673  while (rp->version == 0 && deadman++ < 5)
674  {
675  sleep (1);
676  }
677 
678  /*
679  * <bleep>-ed?
680  */
681  if (rp->version == 0)
682  {
683  clib_warning ("rp->version %d not %d", rp->version, SVM_VERSION);
684  close (svm_fd);
685  munmap (rp, a->size);
686  return (0);
687  }
688  /* Remap now that the region has been placed */
689  a->baseva = rp->virtual_base;
690  a->size = rp->virtual_size;
691  munmap (rp, MMAP_PAGESIZE);
692 
693  rp = (void *) mmap (uword_to_pointer (a->baseva, void *), a->size,
694  PROT_READ | PROT_WRITE,
695  MAP_SHARED | MAP_FIXED, svm_fd, 0);
696  if ((uword) rp == (uword) MAP_FAILED)
697  {
698  clib_unix_warning ("mmap");
699  close (svm_fd);
700  return (0);
701  }
702 
703  close (svm_fd);
704 
705  CLIB_MEM_UNPOISON (rp, a->size);
706 
707  if ((uword) rp != rp->virtual_base)
708  {
709  clib_warning ("mmap botch");
710  }
711 
712  /*
713  * Try to fix the region mutex if it is held by
714  * a dead process
715  */
716  pid_holding_region_lock = rp->mutex_owner_pid;
717  if (pid_holding_region_lock && kill (pid_holding_region_lock, 0) < 0)
718  {
720  ("region %s mutex held by dead pid %d, tag %d, force unlock",
721  rp->region_name, pid_holding_region_lock, rp->mutex_owner_tag);
722  /* owner pid is nonexistent */
723  rp->mutex.__data.__owner = 0;
724  rp->mutex.__data.__lock = 0;
725  dead_region_recovery = 1;
726  }
727 
728  if (dead_region_recovery)
729  clib_warning ("recovery: attempt to re-lock region");
730 
731  region_lock (rp, 2);
732  oldheap = svm_push_pvt_heap (rp);
733  vec_add1 (rp->client_pids, getpid ());
734 
735  if (dead_region_recovery)
736  clib_warning ("recovery: attempt svm_data_region_map");
737 
738  rv = svm_data_region_map (a, rp);
739  if (rv)
740  {
741  clib_warning ("data_region_map: %d", rv);
742  }
743 
744  if (dead_region_recovery)
745  clib_warning ("unlock and continue");
746 
747  region_unlock (rp);
748 
749  svm_pop_heap (oldheap);
750 
751  return ((void *) rp);
752 
753  }
754  return 0; /* NOTREACHED *///NOSONAR
755 }
756 
757 static void
759 {
760  int i;
761  for (i = 0; i < nheld; i++)
762  {
763  pthread_mutex_unlock (mutexes_held[i]); //NOSONAR
764  }
765 }
766 
767 static int
769 {
770  svm_region_t *rp;
771  u64 ticks = clib_cpu_time_now ();
772  uword randomize_baseva;
773 
774  /* guard against klutz calls */
775  if (root_rp)
776  return -1;
777 
779 
780  atexit (svm_mutex_cleanup);
781 
782  /* Randomize the shared-VM base at init time */
783  if (MMAP_PAGESIZE <= (4 << 10))
784  randomize_baseva = (ticks & 15) * MMAP_PAGESIZE;
785  else
786  randomize_baseva = (ticks & 3) * MMAP_PAGESIZE;
787 
788  a->baseva += randomize_baseva;
789 
790  rp = svm_map_region (a);
791  if (!rp)
792  return -1;
793 
794  region_lock (rp, 3);
795 
796  /* Set up the main region data structures */
798  {
799  svm_main_region_t *mp = 0;
800  void *oldheap;
801 
803 
804  oldheap = svm_push_pvt_heap (rp);
805  vec_validate (mp, 0);
806  mp->name_hash = hash_create_string (0, sizeof (uword));
807  mp->root_path = a->root_path ? format (0, "%s%c", a->root_path, 0) : 0;
808  mp->uid = a->uid;
809  mp->gid = a->gid;
810  rp->data_base = mp;
811  svm_pop_heap (oldheap);
812  }
813  region_unlock (rp);
814  root_rp = rp;
815 
816  return 0;
817 }
818 
819 void
821 {
822  svm_map_region_args_t _a, *a = &_a;
823 
824  clib_memset (a, 0, sizeof (*a));
825  a->root_path = 0;
829  a->flags = SVM_FLAGS_NODATA;
830  a->uid = 0;
831  a->gid = 0;
832 
834 }
835 
836 int
837 svm_region_init_chroot (const char *root_path)
838 {
839  svm_map_region_args_t _a, *a = &_a;
840 
841  clib_memset (a, 0, sizeof (*a));
842  a->root_path = root_path;
846  a->flags = SVM_FLAGS_NODATA;
847  a->uid = 0;
848  a->gid = 0;
849 
850  return svm_region_init_internal (a);
851 }
852 
853 void
854 svm_region_init_chroot_uid_gid (const char *root_path, int uid, int gid)
855 {
856  svm_map_region_args_t _a, *a = &_a;
857 
858  clib_memset (a, 0, sizeof (*a));
859  a->root_path = root_path;
863  a->flags = SVM_FLAGS_NODATA;
864  a->uid = uid;
865  a->gid = gid;
866 
868 }
869 
870 void
872 {
874 }
875 
876 void *
878 {
879  svm_main_region_t *mp;
880  svm_region_t *rp;
881  uword need_nbits;
882  int index, i;
883  void *oldheap;
884  uword *p;
885  u8 *name;
886  svm_subregion_t *subp;
887 
888  ASSERT (root_rp);
889 
890  a->size += MMAP_PAGESIZE +
892  a->size = rnd_pagesize (a->size);
893 
894  region_lock (root_rp, 4);
895  oldheap = svm_push_pvt_heap (root_rp);
896  mp = root_rp->data_base;
897 
898  ASSERT (mp);
899 
900  /* Map the named region from the correct chroot environment */
901  if (a->root_path == NULL)
902  a->root_path = (char *) mp->root_path;
903 
904  /*
905  * See if this region is already known. If it is, we're
906  * almost done...
907  */
908  p = hash_get_mem (mp->name_hash, a->name);
909 
910  if (p)
911  {
912  rp = svm_map_region (a);
913  region_unlock (root_rp);
914  svm_pop_heap (oldheap);
915  return rp;
916  }
917 
918  /* Create the region. */
919  ASSERT ((a->size & ~(MMAP_PAGESIZE - 1)) == a->size);
920 
921  need_nbits = a->size / MMAP_PAGESIZE;
922 
923  index = 1; /* $$$ fixme, figure out how many bit to really skip */
924 
925  /*
926  * Scan the virtual space allocation bitmap, looking for a large
927  * enough chunk
928  */
929  do
930  {
931  if (clib_bitmap_get_no_check (root_rp->bitmap, index) == 0)
932  {
933  for (i = 0; i < (need_nbits - 1); i++)
934  {
935  if (clib_bitmap_get_no_check (root_rp->bitmap, index + i) == 1)
936  {
937  index = index + i;
938  goto next;
939  }
940  }
941  break;
942  }
943  index++;
944  next:;
945  }
946  while (index < root_rp->bitmap_size);
947 
948  /* Completely out of VM? */
949  if (index >= root_rp->bitmap_size)
950  {
951  clib_warning ("region %s: not enough VM to allocate 0x%llx (%lld)",
952  root_rp->region_name, a->size, a->size);
953  svm_pop_heap (oldheap);
954  region_unlock (root_rp);
955  return 0;
956  }
957 
958  /*
959  * Mark virtual space allocated
960  */
961 #if CLIB_DEBUG > 1
962  clib_warning ("set %d bits at index %d", need_nbits, index);
963 #endif
964 
965  for (i = 0; i < need_nbits; i++)
966  {
967  clib_bitmap_set_no_check (root_rp->bitmap, index + i, 1);
968  }
969 
970  /* Place this region where it goes... */
971  a->baseva = root_rp->virtual_base + index * MMAP_PAGESIZE;
972 
973  rp = svm_map_region (a);
974 
975  pool_get (mp->subregions, subp);
976  name = format (0, "%s%c", a->name, 0);
977  subp->subregion_name = name;
978 
979  hash_set_mem (mp->name_hash, name, subp - mp->subregions);
980 
981  svm_pop_heap (oldheap);
982 
983  region_unlock (root_rp);
984 
985  return (rp);
986 }
987 
988 void
990 {
991  svm_map_region_args_t _a, *a = &_a;
992  svm_main_region_t *mp;
993  u8 *shm_name;
994 
995  ASSERT (root_rp);
996  ASSERT (rp);
998 
999  mp = root_rp->data_base;
1000  ASSERT (mp);
1001 
1002  a->root_path = (char *) mp->root_path;
1003  a->name = rp->region_name;
1004  shm_name = shm_name_from_svm_map_region_args (a);
1005  if (CLIB_DEBUG > 1)
1006  clib_warning ("[%d] shm_unlink (%s)", getpid (), shm_name);
1007  shm_unlink ((const char *) shm_name);
1008  vec_free (shm_name);
1009 }
1010 
1011 /*
1012  * svm_region_unmap
1013  *
1014  * Let go of the indicated region. If the calling process
1015  * is the last customer, throw it away completely.
1016  * The root region mutex guarantees atomicity with respect to
1017  * a new region client showing up at the wrong moment.
1018  */
1019 void
1020 svm_region_unmap_internal (void *rp_arg, u8 is_client)
1021 {
1022  int i, mypid = getpid ();
1023  int nclients_left;
1024  void *oldheap;
1025  uword virtual_base, virtual_size;
1026  svm_region_t *rp = rp_arg;
1027  char *name;
1028 
1029  /*
1030  * If we take a signal while holding one or more shared-memory
1031  * mutexes, we may end up back here from an otherwise
1032  * benign exit handler. Bail out to avoid a recursive
1033  * mutex screw-up.
1034  */
1035  if (nheld)
1036  return;
1037 
1038  ASSERT (rp);
1039  ASSERT (root_rp);
1040 
1041  if (CLIB_DEBUG > 1)
1042  clib_warning ("[%d] unmap region %s", getpid (), rp->region_name);
1043 
1044  region_lock (root_rp, 5);
1045  region_lock (rp, 6);
1046 
1047  oldheap = svm_push_pvt_heap (rp); /* nb vec_delete() in the loop */
1048 
1049  /* Remove the caller from the list of mappers */
1051  for (i = 0; i < vec_len (rp->client_pids); i++)
1052  {
1053  if (rp->client_pids[i] == mypid)
1054  {
1055  vec_delete (rp->client_pids, 1, i);
1056  goto found;
1057  }
1058  }
1059  clib_warning ("pid %d AWOL", mypid);
1060 
1061 found:
1062 
1063  svm_pop_heap (oldheap);
1064 
1065  nclients_left = vec_len (rp->client_pids);
1066  virtual_base = rp->virtual_base;
1067  virtual_size = rp->virtual_size;
1068 
1069  if (nclients_left == 0)
1070  {
1071  int index, nbits, i;
1072  svm_main_region_t *mp;
1073  uword *p;
1074  svm_subregion_t *subp;
1075 
1076  /* Kill the region, last guy on his way out */
1077 
1078  oldheap = svm_push_pvt_heap (root_rp);
1079  name = vec_dup (rp->region_name);
1080 
1081  virtual_base = rp->virtual_base;
1082  virtual_size = rp->virtual_size;
1083 
1084  /* Figure out which bits to clear in the root region bitmap */
1085  index = (virtual_base - root_rp->virtual_base) / MMAP_PAGESIZE;
1086 
1087  nbits = (virtual_size + MMAP_PAGESIZE - 1) / MMAP_PAGESIZE;
1088 
1089 #if CLIB_DEBUG > 1
1090  clib_warning ("clear %d bits at index %d", nbits, index);
1091 #endif
1092  /* Give back the allocated VM */
1093  for (i = 0; i < nbits; i++)
1094  {
1095  clib_bitmap_set_no_check (root_rp->bitmap, index + i, 0);
1096  }
1097 
1098  mp = root_rp->data_base;
1099 
1100  p = hash_get_mem (mp->name_hash, name);
1101 
1102  /* Better never happen ... */
1103  if (p == NULL)
1104  {
1105  region_unlock (rp);
1106  region_unlock (root_rp);
1107  svm_pop_heap (oldheap);
1108  clib_warning ("Region name '%s' not found?", name);
1109  return;
1110  }
1111 
1112  /* Remove from the root region subregion pool */
1113  subp = mp->subregions + p[0];
1114  pool_put (mp->subregions, subp);
1115 
1116  hash_unset_mem (mp->name_hash, name);
1117 
1118  vec_free (name);
1119 
1120  region_unlock (rp);
1121 
1122  /* If a client asks for the cleanup, don't unlink the backing
1123  * file since we can't tell if it has been recreated. */
1124  if (!is_client)
1125  svm_region_unlink (rp);
1126 
1127  munmap ((void *) virtual_base, virtual_size);
1128  region_unlock (root_rp);
1129  svm_pop_heap (oldheap);
1130  return;
1131  }
1132 
1133  region_unlock (rp);
1134  region_unlock (root_rp);
1135 
1136  munmap ((void *) virtual_base, virtual_size);
1137 }
1138 
1139 void
1140 svm_region_unmap (void *rp_arg)
1141 {
1142  svm_region_unmap_internal (rp_arg, 0 /* is_client */ );
1143 }
1144 
1145 void
1147 {
1148  svm_region_unmap_internal (rp_arg, 1 /* is_client */ );
1149 }
1150 
1151 /*
1152  * svm_region_exit
1153  */
1154 static void
1156 {
1157  void *oldheap;
1158  int i, mypid = getpid ();
1159  uword virtual_base, virtual_size;
1160 
1161  /* It felt so nice we did it twice... */
1162  if (root_rp == 0)
1163  return;
1164 
1165  if (--root_rp_refcount > 0)
1166  return;
1167 
1168  /*
1169  * If we take a signal while holding one or more shared-memory
1170  * mutexes, we may end up back here from an otherwise
1171  * benign exit handler. Bail out to avoid a recursive
1172  * mutex screw-up.
1173  */
1174  if (nheld)
1175  return;
1176 
1177  region_lock (root_rp, 7);
1178  oldheap = svm_push_pvt_heap (root_rp);
1179 
1180  virtual_base = root_rp->virtual_base;
1181  virtual_size = root_rp->virtual_size;
1182 
1183  CLIB_MEM_UNPOISON (root_rp->client_pids, vec_bytes (root_rp->client_pids));
1184  for (i = 0; i < vec_len (root_rp->client_pids); i++)
1185  {
1186  if (root_rp->client_pids[i] == mypid)
1187  {
1188  vec_delete (root_rp->client_pids, 1, i);
1189  goto found;
1190  }
1191  }
1192  clib_warning ("pid %d AWOL", mypid);
1193 
1194 found:
1195 
1196  if (!is_client && vec_len (root_rp->client_pids) == 0)
1197  svm_region_unlink (root_rp);
1198 
1199  region_unlock (root_rp);
1200  svm_pop_heap (oldheap);
1201 
1202  root_rp = 0;
1203  munmap ((void *) virtual_base, virtual_size);
1204 }
1205 
1206 void
1208 {
1209  svm_region_exit_internal (0 /* is_client */ );
1210 }
1211 
1212 void
1214 {
1215  svm_region_exit_internal (1 /* is_client */ );
1216 }
1217 
1218 void
1220 {
1221  int j;
1222  int mypid = getpid ();
1223  void *oldheap;
1224 
1225  for (j = 0; j < vec_len (rp->client_pids); j++)
1226  {
1227  if (mypid == rp->client_pids[j])
1228  continue;
1229  if (rp->client_pids[j] && (kill (rp->client_pids[j], 0) < 0))
1230  {
1231  clib_warning ("%s: cleanup ghost pid %d",
1232  rp->region_name, rp->client_pids[j]);
1233  /* nb: client vec in rp->region_heap */
1234  oldheap = svm_push_pvt_heap (rp);
1235  vec_delete (rp->client_pids, 1, j);
1236  j--;
1237  svm_pop_heap (oldheap);
1238  }
1239  }
1240 }
1241 
1242 
1243 /*
1244  * Scan svm regions for dead clients
1245  */
1246 void
1247 svm_client_scan (const char *root_path)
1248 {
1249  int i, j;
1250  svm_main_region_t *mp;
1251  svm_map_region_args_t *a = 0;
1253  svm_region_t *rp;
1254  svm_subregion_t *subp;
1255  u8 *name = 0;
1256  u8 **svm_names = 0;
1257  void *oldheap;
1258  int mypid = getpid ();
1259 
1260  vec_validate (a, 0);
1261 
1262  svm_region_init_chroot (root_path);
1263 
1264  root_rp = svm_get_root_rp ();
1265 
1266  pthread_mutex_lock (&root_rp->mutex);
1267 
1268  mp = root_rp->data_base;
1269 
1270  for (j = 0; j < vec_len (root_rp->client_pids); j++)
1271  {
1272  if (mypid == root_rp->client_pids[j])
1273  continue;
1274  if (root_rp->client_pids[j] && (kill (root_rp->client_pids[j], 0) < 0))
1275  {
1276  clib_warning ("%s: cleanup ghost pid %d",
1277  root_rp->region_name, root_rp->client_pids[j]);
1278  /* nb: client vec in root_rp->region_heap */
1279  oldheap = svm_push_pvt_heap (root_rp);
1280  vec_delete (root_rp->client_pids, 1, j);
1281  j--;
1282  svm_pop_heap (oldheap);
1283  }
1284  }
1285 
1286  /*
1287  * Snapshoot names, can't hold root rp mutex across
1288  * find_or_create.
1289  */
1290  /* *INDENT-OFF* */
1291  pool_foreach (subp, mp->subregions) {
1292  name = vec_dup (subp->subregion_name);
1293  vec_add1(svm_names, name);
1294  }
1295  /* *INDENT-ON* */
1296 
1297  pthread_mutex_unlock (&root_rp->mutex);
1298 
1299  for (i = 0; i < vec_len (svm_names); i++)
1300  {
1301  vec_validate (a, 0);
1302  a->root_path = root_path;
1303  a->name = (char *) svm_names[i];
1304  rp = svm_region_find_or_create (a);
1305  if (rp)
1306  {
1307  pthread_mutex_lock (&rp->mutex);
1308 
1310 
1311  pthread_mutex_unlock (&rp->mutex);
1312  svm_region_unmap (rp);
1313  vec_free (svm_names[i]);
1314  }
1315  vec_free (a);
1316  }
1317  vec_free (svm_names);
1318 
1319  svm_region_exit ();
1320 
1321  vec_free (a);
1322 }
1323 
1324 /*
1325  * fd.io coding-style-patch-verification: ON
1326  *
1327  * Local Variables:
1328  * eval: (c-set-style "gnu")
1329  * End:
1330  */
#define vec_validate(V, I)
Make sure vector is long enough for given index (no header, unspecified alignment) ...
Definition: vec.h:509
void svm_region_init_chroot_uid_gid(const char *root_path, int uid, int gid)
Definition: svm.c:854
#define CLIB_MEM_UNPOISON(a, s)
Definition: sanitizer.h:47
svm_region_t * svm_get_root_rp(void)
Definition: svm.c:53
#define SVM_GLOBAL_REGION_NAME
Definition: svm_common.h:101
const char * root_path
Definition: svm_common.h:67
static int nheld
Definition: svm.c:50
static void svm_pop_heap(void *oldheap)
Definition: svm.h:94
#define vec_c_string_is_terminated(V)
Test whether a vector is a NULL terminated c-string.
Definition: vec.h:1082
a
Definition: bitmap.h:544
#define SVM_FLAGS_NODATA
Definition: svm_common.h:29
#define SVM_FLAGS_NEED_DATA_INIT
Definition: svm_common.h:30
void * svm_map_region(svm_map_region_args_t *a)
Definition: svm.c:550
#define count_leading_zeros(x)
Definition: clib.h:156
#define pool_foreach(VAR, POOL)
Iterate through pool.
Definition: pool.h:527
clib_mem_heap_t * clib_mem_create_heap(void *base, uword size, int is_locked, char *fmt,...)
Definition: mem_dlmalloc.c:533
Optimized string handling code, including c11-compliant "safe C library" variants.
unsigned long u64
Definition: types.h:89
Fixed length block allocator.
clib_memset(h->entries, 0, sizeof(h->entries[0]) *entries)
#define vec_add1(V, E)
Add 1 element to end of vector (unspecified alignment).
Definition: vec.h:592
static u64 clib_cpu_time_now(void)
Definition: time.h:81
void svm_client_scan(const char *root_path)
Definition: svm.c:1247
uword virtual_base
Definition: svm_common.h:42
#define SVM_PVT_MHEAP_SIZE
Definition: svm_common.h:32
void svm_region_unmap_client(void *rp_arg)
Definition: svm.c:1146
#define hash_set_mem(h, key, value)
Definition: hash.h:275
void svm_region_exit_client(void)
Definition: svm.c:1213
#define vec_bytes(v)
Number of data bytes in vector.
#define pool_get(P, E)
Allocate an object E from a pool P (unspecified alignment).
Definition: pool.h:251
static u8 * format_svm_size(u8 *s, va_list *args)
Definition: svm.c:163
static uword clib_bitmap_get_no_check(uword *ai, uword i)
Gets the ith bit value from a bitmap Does not sanity-check the bit position.
Definition: bitmap.h:212
unsigned char u8
Definition: types.h:56
void unformat_init_clib_file(unformat_input_t *input, int file_descriptor)
Definition: unformat.c:1064
#define clib_unix_error(format, args...)
Definition: error.h:65
static uword clib_bitmap_set_no_check(uword *a, uword i, uword new_value)
Sets the ith bit of a bitmap to new_value.
Definition: bitmap.h:141
#define SVM_VERSION
Definition: svm_common.h:25
void svm_region_exit(void)
Definition: svm.c:1207
static void unformat_skip_line(unformat_input_t *i)
Definition: format.h:221
void svm_region_init(void)
Definition: svm.c:820
uword * client_pids
Definition: svm_common.h:54
void * svm_region_find_or_create(svm_map_region_args_t *a)
Definition: svm.c:877
volatile void * user_ctx
Definition: svm_common.h:47
description fragment has unexpected format
Definition: map.api:433
static u64 rnd_pagesize(u64 size)
Definition: svm.c:252
pthread_cond_t condvar
Definition: svm_common.h:38
u8 * format_svm_region(u8 *s, va_list *args)
Definition: svm.c:183
#define hash_create_string(elts, value_bytes)
Definition: hash.h:690
#define SVM_FLAGS_MHEAP
Definition: svm_common.h:27
static int svm_region_init_internal(svm_map_region_args_t *a)
Definition: svm.c:768
void * data_base
Definition: svm_common.h:45
#define hash_unset_mem(h, key)
Definition: hash.h:291
lo
#define MAXLOCK
Definition: svm.c:48
struct _unformat_input_t unformat_input_t
u32 size
Definition: vhost_user.h:106
#define pool_put(P, E)
Free an object E in pool P.
Definition: pool.h:301
#define vec_dup(V)
Return copy of vector (no header, no alignment)
Definition: vec.h:429
svm_subregion_t * subregions
Definition: svm_common.h:119
char * backing_file
Definition: svm_common.h:52
uword virtual_size
Definition: svm_common.h:43
void svm_region_init_args(svm_map_region_args_t *a)
Definition: svm.c:871
#define SVM_GLOBAL_REGION_SIZE
Definition: svm_common.h:100
char * region_name
Definition: svm_common.h:51
void svm_region_init_mapped_region(svm_map_region_args_t *a, svm_region_t *rp)
Definition: svm.c:446
static void * svm_push_pvt_heap(svm_region_t *rp)
Definition: svm.h:78
static int root_rp_refcount
Definition: svm.c:46
#define UNFORMAT_END_OF_INPUT
Definition: format.h:144
sll srl srl sll sra u16x4 i
Definition: vector_sse42.h:317
#define vec_free(V)
Free vector&#39;s memory (no header).
Definition: vec.h:380
#define clib_warning(format, args...)
Definition: error.h:59
static pthread_mutex_t * mutexes_held[MAXLOCK]
Definition: svm.c:49
u8 * shm_name_from_svm_map_region_args(svm_map_region_args_t *a)
Definition: svm.c:422
uword bitmap_size
Definition: svm_common.h:49
string name[64]
Definition: ip.api:44
static int svm_data_region_map(svm_map_region_args_t *a, svm_region_t *rp)
Definition: svm.c:347
#define uword_to_pointer(u, type)
Definition: types.h:136
#define ASSERT(truth)
#define vec_delete(V, N, M)
Delete N elements starting at element M.
Definition: vec.h:854
static void region_lock(svm_region_t *rp, int tag)
Definition: svm.c:102
volatile uword version
Definition: svm_common.h:36
u64 svm_get_global_region_base_va()
Definition: svm.c:61
Bitmaps built as vectors of machine words.
static void region_unlock(svm_region_t *rp)
Definition: svm.c:119
int mutex_owner_tag
Definition: svm_common.h:40
void svm_region_unmap_internal(void *rp_arg, u8 is_client)
Definition: svm.c:1020
vl_api_ip4_address_t hi
Definition: arp.api:37
#define MMAP_PAGESIZE
Definition: ssvm.h:42
void svm_region_unlink(svm_region_t *rp)
Definition: svm.c:989
#define vec_len(v)
Number of elements in vector (rvalue-only, NULL tolerant)
const char * name
Definition: svm_common.h:68
u64 uword
Definition: types.h:112
static void unformat_free(unformat_input_t *i)
Definition: format.h:162
#define clib_unix_warning(format, args...)
Definition: error.h:68
void svm_region_unmap(void *rp_arg)
Definition: svm.c:1140
u32 index
Definition: flow_types.api:221
void svm_client_scan_this_region_nolock(svm_region_t *rp)
Definition: svm.c:1219
#define hash_get_mem(h, key)
Definition: hash.h:269
void * region_heap
Definition: svm_common.h:44
uword * bitmap
Definition: svm_common.h:50
f64 end
end of the time range
Definition: mactime.api:44
#define CLIB_MEMORY_BARRIER()
Definition: clib.h:133
void * data_heap
Definition: svm_common.h:46
static int svm_data_region_create(svm_map_region_args_t *a, svm_region_t *rp)
Definition: svm.c:264
static u8 * format_svm_flags(u8 *s, va_list *args)
Definition: svm.c:146
int mutex_owner_pid
Definition: svm_common.h:39
#define BITS(x)
Definition: clib.h:66
uword flags
Definition: svm_common.h:41
static void svm_mutex_cleanup(void)
Definition: svm.c:758
uword unformat(unformat_input_t *i, const char *fmt,...)
Definition: unformat.c:978
pthread_mutex_t mutex
Definition: svm_common.h:37
#define SVM_FLAGS_FILE
Definition: svm_common.h:28
CLIB vectors are ubiquitous dynamically resized arrays with by user defined "headers".
static uword unformat_check_input(unformat_input_t *i)
Definition: format.h:170
static svm_region_t * root_rp
Definition: svm.c:45
int svm_region_init_chroot(const char *root_path)
Definition: svm.c:837
static void svm_region_exit_internal(u8 is_client)
Definition: svm.c:1155