FD.io VPP  v18.11-rc0-18-g2a3fb1a
Vector Packet Processing
string.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2016 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  Copyright (c) 2001, 2002, 2003 Eliot Dresselhaus
17 
18  Permission is hereby granted, free of charge, to any person obtaining
19  a copy of this software and associated documentation files (the
20  "Software"), to deal in the Software without restriction, including
21  without limitation the rights to use, copy, modify, merge, publish,
22  distribute, sublicense, and/or sell copies of the Software, and to
23  permit persons to whom the Software is furnished to do so, subject to
24  the following conditions:
25 
26  The above copyright notice and this permission notice shall be
27  included in all copies or substantial portions of the Software.
28 
29  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
30  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
31  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
32  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
33  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
34  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
35  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
36 */
37 
38 #ifndef included_clib_string_h
39 #define included_clib_string_h
40 
41 #include <vppinfra/clib.h> /* for CLIB_LINUX_KERNEL */
42 #include <vppinfra/vector.h>
43 
44 #ifdef CLIB_LINUX_KERNEL
45 #include <linux/string.h>
46 #endif
47 
48 #ifdef CLIB_UNIX
49 #include <string.h>
50 #endif
51 
52 #ifdef CLIB_STANDALONE
53 #include <vppinfra/standalone_string.h>
54 #endif
55 
56 #if _x86_64_
57 #include <x86intrin.h>
58 #endif
59 
60 /* Exchanges source and destination. */
61 void clib_memswap (void *_a, void *_b, uword bytes);
62 
63 /*
64  * the vector unit memcpy variants confuse coverity
65  * so don't let it anywhere near them.
66  */
67 #ifndef __COVERITY__
68 #if __AVX512F__
69 #include <vppinfra/memcpy_avx512.h>
70 #elif __AVX2__
71 #include <vppinfra/memcpy_avx2.h>
72 #elif __SSSE3__
73 #include <vppinfra/memcpy_sse3.h>
74 #else
75 #define clib_memcpy(a,b,c) memcpy(a,b,c)
76 #endif
77 #else /* __COVERITY__ */
78 #define clib_memcpy(a,b,c) memcpy(a,b,c)
79 #endif
80 
81 /*
82  * Copy 64 bytes of data to 4 destinations
83  * this function is typically used in quad-loop case when whole cacheline
84  * needs to be copied to 4 different places. First it reads whole cacheline
85  * to 1/2/4 SIMD registers and then it writes data to 4 destinations.
86  */
87 
89 clib_memcpy64_x4 (void *d0, void *d1, void *d2, void *d3, void *s)
90 {
91 #if defined (__AVX512F__)
92  __m512i r0 = _mm512_loadu_si512 (s);
93 
94  _mm512_storeu_si512 (d0, r0);
95  _mm512_storeu_si512 (d1, r0);
96  _mm512_storeu_si512 (d2, r0);
97  _mm512_storeu_si512 (d3, r0);
98 
99 #elif defined (__AVX2__)
100  __m256i r0 = _mm256_loadu_si256 ((__m256i *) (s + 0 * 32));
101  __m256i r1 = _mm256_loadu_si256 ((__m256i *) (s + 1 * 32));
102 
103  _mm256_storeu_si256 ((__m256i *) (d0 + 0 * 32), r0);
104  _mm256_storeu_si256 ((__m256i *) (d0 + 1 * 32), r1);
105 
106  _mm256_storeu_si256 ((__m256i *) (d1 + 0 * 32), r0);
107  _mm256_storeu_si256 ((__m256i *) (d1 + 1 * 32), r1);
108 
109  _mm256_storeu_si256 ((__m256i *) (d2 + 0 * 32), r0);
110  _mm256_storeu_si256 ((__m256i *) (d2 + 1 * 32), r1);
111 
112  _mm256_storeu_si256 ((__m256i *) (d3 + 0 * 32), r0);
113  _mm256_storeu_si256 ((__m256i *) (d3 + 1 * 32), r1);
114 
115 #elif defined (__SSSE3__)
116  __m128i r0 = _mm_loadu_si128 ((__m128i *) (s + 0 * 16));
117  __m128i r1 = _mm_loadu_si128 ((__m128i *) (s + 1 * 16));
118  __m128i r2 = _mm_loadu_si128 ((__m128i *) (s + 2 * 16));
119  __m128i r3 = _mm_loadu_si128 ((__m128i *) (s + 3 * 16));
120 
121  _mm_storeu_si128 ((__m128i *) (d0 + 0 * 16), r0);
122  _mm_storeu_si128 ((__m128i *) (d0 + 1 * 16), r1);
123  _mm_storeu_si128 ((__m128i *) (d0 + 2 * 16), r2);
124  _mm_storeu_si128 ((__m128i *) (d0 + 3 * 16), r3);
125 
126  _mm_storeu_si128 ((__m128i *) (d1 + 0 * 16), r0);
127  _mm_storeu_si128 ((__m128i *) (d1 + 1 * 16), r1);
128  _mm_storeu_si128 ((__m128i *) (d1 + 2 * 16), r2);
129  _mm_storeu_si128 ((__m128i *) (d1 + 3 * 16), r3);
130 
131  _mm_storeu_si128 ((__m128i *) (d2 + 0 * 16), r0);
132  _mm_storeu_si128 ((__m128i *) (d2 + 1 * 16), r1);
133  _mm_storeu_si128 ((__m128i *) (d2 + 2 * 16), r2);
134  _mm_storeu_si128 ((__m128i *) (d2 + 3 * 16), r3);
135 
136  _mm_storeu_si128 ((__m128i *) (d3 + 0 * 16), r0);
137  _mm_storeu_si128 ((__m128i *) (d3 + 1 * 16), r1);
138  _mm_storeu_si128 ((__m128i *) (d3 + 2 * 16), r2);
139  _mm_storeu_si128 ((__m128i *) (d3 + 3 * 16), r3);
140 
141 #else
142  clib_memcpy (d0, s, 64);
143  clib_memcpy (d1, s, 64);
144  clib_memcpy (d2, s, 64);
145  clib_memcpy (d3, s, 64);
146 #endif
147 }
148 
151 {
152  u64 *ptr = p;
153 #if defined(CLIB_HAVE_VEC512)
154  u64x8 v512 = u64x8_splat (val);
155  while (count >= 8)
156  {
157  u64x8_store_unaligned (v512, ptr);
158  ptr += 8;
159  count -= 8;
160  }
161  if (count == 0)
162  return;
163 #endif
164 #if defined(CLIB_HAVE_VEC256)
165  u64x4 v256 = u64x4_splat (val);
166  while (count >= 4)
167  {
168  u64x4_store_unaligned (v256, ptr);
169  ptr += 4;
170  count -= 4;
171  }
172  if (count == 0)
173  return;
174 #else
175  while (count >= 4)
176  {
177  ptr[0] = ptr[1] = ptr[2] = ptr[3] = val;
178  ptr += 4;
179  count -= 4;
180  }
181 #endif
182  while (count--)
183  ptr++[0] = val;
184 }
185 
188 {
189  u32 *ptr = p;
190 #if defined(CLIB_HAVE_VEC512)
191  u32x16 v512 = u32x16_splat (val);
192  while (count >= 16)
193  {
194  u32x16_store_unaligned (v512, ptr);
195  ptr += 16;
196  count -= 16;
197  }
198  if (count == 0)
199  return;
200 #endif
201 #if defined(CLIB_HAVE_VEC256)
202  u32x8 v256 = u32x8_splat (val);
203  while (count >= 8)
204  {
205  u32x8_store_unaligned (v256, ptr);
206  ptr += 8;
207  count -= 8;
208  }
209  if (count == 0)
210  return;
211 #endif
212 #if defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_UNALIGNED_LOAD_STORE)
213  u32x4 v128 = u32x4_splat (val);
214  while (count >= 4)
215  {
216  u32x4_store_unaligned (v128, ptr);
217  ptr += 4;
218  count -= 4;
219  }
220 #else
221  while (count >= 4)
222  {
223  ptr[0] = ptr[1] = ptr[2] = ptr[3] = val;
224  ptr += 4;
225  count -= 4;
226  }
227 #endif
228  while (count--)
229  ptr++[0] = val;
230 }
231 
234 {
235  u16 *ptr = p;
236 #if defined(CLIB_HAVE_VEC512)
237  u16x32 v512 = u16x32_splat (val);
238  while (count >= 32)
239  {
240  u16x32_store_unaligned (v512, ptr);
241  ptr += 32;
242  count -= 32;
243  }
244  if (count == 0)
245  return;
246 #endif
247 #if defined(CLIB_HAVE_VEC256)
248  u16x16 v256 = u16x16_splat (val);
249  while (count >= 16)
250  {
251  u16x16_store_unaligned (v256, ptr);
252  ptr += 16;
253  count -= 16;
254  }
255  if (count == 0)
256  return;
257 #endif
258 #if defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_UNALIGNED_LOAD_STORE)
259  u16x8 v128 = u16x8_splat (val);
260  while (count >= 8)
261  {
262  u16x8_store_unaligned (v128, ptr);
263  ptr += 8;
264  count -= 8;
265  }
266 #else
267  while (count >= 4)
268  {
269  ptr[0] = ptr[1] = ptr[2] = ptr[3] = val;
270  ptr += 4;
271  count -= 4;
272  }
273 #endif
274  while (count--)
275  ptr++[0] = val;
276 }
277 
279 clib_memset_u8 (void *p, u8 val, uword count)
280 {
281  u8 *ptr = p;
282 #if defined(CLIB_HAVE_VEC512)
283  u8x64 v512 = u8x64_splat (val);
284  while (count >= 64)
285  {
286  u8x64_store_unaligned (v512, ptr);
287  ptr += 64;
288  count -= 64;
289  }
290  if (count == 0)
291  return;
292 #endif
293 #if defined(CLIB_HAVE_VEC256)
294  u8x32 v256 = u8x32_splat (val);
295  while (count >= 32)
296  {
297  u8x32_store_unaligned (v256, ptr);
298  ptr += 32;
299  count -= 32;
300  }
301  if (count == 0)
302  return;
303 #endif
304 #if defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_UNALIGNED_LOAD_STORE)
305  u8x16 v128 = u8x16_splat (val);
306  while (count >= 16)
307  {
308  u8x16_store_unaligned (v128, ptr);
309  ptr += 16;
310  count -= 16;
311  }
312 #else
313  while (count >= 4)
314  {
315  ptr[0] = ptr[1] = ptr[2] = ptr[3] = val;
316  ptr += 4;
317  count -= 4;
318  }
319 #endif
320  while (count--)
321  ptr++[0] = val;
322 }
323 
325 clib_count_equal_u64 (u64 * data, uword max_count)
326 {
327  uword count = 0;
328  u64 first = data[0];
329 
330 #if defined(CLIB_HAVE_VEC512)
331  while (u64x8_is_all_equal (u64x8_load_unaligned (data), first))
332  {
333  data += 8;
334  count += 8;
335  if (count >= max_count)
336  return max_count;
337  }
338 #endif
339 #if defined(CLIB_HAVE_VEC256)
340  while (u64x4_is_all_equal (u64x4_load_unaligned (data), first))
341  {
342  data += 4;
343  count += 4;
344  if (count >= max_count)
345  return max_count;
346  }
347 #endif
348 #if defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_UNALIGNED_LOAD_STORE)
349  while (u64x2_is_all_equal (u64x2_load_unaligned (data), first))
350  {
351  data += 2;
352  count += 2;
353  if (count >= max_count)
354  return max_count;
355  }
356 #endif
357  while (count < max_count && (data[0] == first))
358  {
359  data += 1;
360  count += 1;
361  }
362  return count;
363 }
364 
366 clib_count_equal_u32 (u32 * data, uword max_count)
367 {
368  uword count = 0;
369  u32 first = data[0];
370 
371 #if defined(CLIB_HAVE_VEC512)
372  while (u32x16_is_all_equal (u32x16_load_unaligned (data), first))
373  {
374  data += 16;
375  count += 16;
376  if (count >= max_count)
377  return max_count;
378  }
379 #endif
380 #if defined(CLIB_HAVE_VEC256)
381  while (u32x8_is_all_equal (u32x8_load_unaligned (data), first))
382  {
383  data += 8;
384  count += 8;
385  if (count >= max_count)
386  return max_count;
387  }
388 #endif
389 #if defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_UNALIGNED_LOAD_STORE)
390  while (u32x4_is_all_equal (u32x4_load_unaligned (data), first))
391  {
392  data += 4;
393  count += 4;
394  if (count >= max_count)
395  return max_count;
396  }
397 #endif
398  while (count < max_count && (data[0] == first))
399  {
400  data += 1;
401  count += 1;
402  }
403  return count;
404 }
405 
407 clib_count_equal_u16 (u16 * data, uword max_count)
408 {
409  uword count = 0;
410  u16 first = data[0];
411 
412 #if defined(CLIB_HAVE_VEC512)
413  while (count + 32 <= max_count &&
414  u16x32_is_all_equal (u16x32_load_unaligned (data), first))
415  {
416  data += 32;
417  count += 32;
418  }
419 #endif
420 #if defined(CLIB_HAVE_VEC256)
421  while (count + 16 <= max_count &&
422  u16x16_is_all_equal (u16x16_load_unaligned (data), first))
423  {
424  data += 16;
425  count += 16;
426  }
427 #endif
428 #if defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_UNALIGNED_LOAD_STORE)
429  while (count + 8 <= max_count &&
430  u16x8_is_all_equal (u16x8_load_unaligned (data), first))
431  {
432  data += 8;
433  count += 8;
434  }
435 #endif
436  while (count < max_count && (data[0] == first))
437  {
438  data += 1;
439  count += 1;
440  }
441  return count;
442 }
443 
445 clib_count_equal_u8 (u32 * data, uword max_count)
446 {
447  uword count = 0;
448  u8 first = data[0];
449 
450 #if defined(CLIB_HAVE_VEC512)
451  while (count + 64 <= max_count &&
452  u8x64_is_all_equal (u8x64_load_unaligned (data), first))
453  {
454  data += 64;
455  count += 64;
456  }
457 #endif
458 #if defined(CLIB_HAVE_VEC256)
459  while (count + 32 <= max_count &&
460  u8x32_is_all_equal (u8x32_load_unaligned (data), first))
461  {
462  data += 32;
463  count += 32;
464  }
465 #endif
466 #if defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_UNALIGNED_LOAD_STORE)
467  while (count + 16 <= max_count &&
468  u8x16_is_all_equal (u8x16_load_unaligned (data), first))
469  {
470  data += 4;
471  count += 4;
472  }
473 #endif
474  while (count < max_count && (data[0] == first))
475  {
476  data += 1;
477  count += 1;
478  }
479  return count;
480 }
481 
482 
483 #endif /* included_clib_string_h */
484 
485 /*
486  * fd.io coding-style-patch-verification: ON
487  *
488  * Local Variables:
489  * eval: (c-set-style "gnu")
490  * End:
491  */
unsigned long u64
Definition: types.h:89
static_always_inline u32 clib_count_equal_u8(u32 *data, uword max_count)
Definition: string.h:445
void clib_memswap(void *_a, void *_b, uword bytes)
Definition: string.c:43
unsigned char u8
Definition: types.h:56
#define static_always_inline
Definition: clib.h:93
unsigned long long u32x4
Definition: ixge.c:28
unsigned int u32
Definition: types.h:88
static heap_elt_t * first(heap_header_t *h)
Definition: heap.c:59
unsigned short u16
Definition: types.h:57
static_always_inline uword clib_count_equal_u64(u64 *data, uword max_count)
Definition: string.h:325
static_always_inline uword clib_count_equal_u16(u16 *data, uword max_count)
Definition: string.h:407
static_always_inline void clib_memcpy64_x4(void *d0, void *d1, void *d2, void *d3, void *s)
Definition: string.h:89
#define clib_memcpy(a, b, c)
Definition: string.h:75
size_t count
Definition: vapi.c:46
static_always_inline void clib_memset_u8(void *p, u8 val, uword count)
Definition: string.h:279
static_always_inline void clib_memset_u16(void *p, u16 val, uword count)
Definition: string.h:233
u64 uword
Definition: types.h:112
static_always_inline void clib_memset_u64(void *p, u64 val, uword count)
Definition: string.h:150
u64x4
Definition: vector_avx2.h:121
static_always_inline uword clib_count_equal_u32(u32 *data, uword max_count)
Definition: string.h:366
epu16_epi64 u16x16
Definition: vector_avx2.h:123
static_always_inline void clib_memset_u32(void *p, u32 val, uword count)
Definition: string.h:187