FD.io VPP  v18.04-17-g3a0d853
Vector Packet Processing
lb.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2016 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 /**
17  * lb-plugin implements a MagLev-like load balancer.
18  * http://research.google.com/pubs/pub44824.html
19  *
20  * It hasn't been tested for interoperability with the original MagLev
21  * but intends to provide similar functionality.
22  * The load-balancer receives traffic destined to VIP (Virtual IP)
23  * addresses from one or multiple(ECMP) routers.
24  * The load-balancer tunnels the traffic toward many application servers
25  * ensuring session stickyness (i.e. that a single sessions is tunneled
26  * towards a single application server).
27  *
28  */
29 
30 #ifndef LB_PLUGIN_LB_LB_H_
31 #define LB_PLUGIN_LB_LB_H_
32 
33 #include <lb/util.h>
34 #include <vnet/util/refcount.h>
35 
36 #include <vnet/vnet.h>
37 #include <vnet/ip/ip.h>
38 #include <vnet/dpo/dpo.h>
39 #include <vnet/fib/fib_table.h>
40 #include <vppinfra/hash.h>
41 
42 #include <lb/lbhash.h>
43 
44 #define LB_DEFAULT_PER_CPU_STICKY_BUCKETS 1 << 10
45 #define LB_DEFAULT_FLOW_TIMEOUT 40
46 
47 typedef enum {
50 } lb_next_t;
51 
52 /**
53  * Each VIP is configured with a set of
54  * application server.
55  */
56 typedef struct {
57  /**
58  * Registration to FIB event.
59  */
61 
62  /**
63  * Destination address used to tunnel traffic towards
64  * that application server.
65  * The address is also used as ID and pseudo-random
66  * seed for the load-balancing process.
67  */
68  ip46_address_t address;
69 
70  /**
71  * ASs are indexed by address and VIP Index.
72  * Which means there will be duplicated if the same server
73  * address is used for multiple VIPs.
74  */
76 
77  /**
78  * Some per-AS flags.
79  * For now only LB_AS_FLAGS_USED is defined.
80  */
82 
83 #define LB_AS_FLAGS_USED 0x1
84 
85  /**
86  * Rotating timestamp of when LB_AS_FLAGS_USED flag was last set.
87  *
88  * AS removal is based on garbage collection and reference counting.
89  * When an AS is removed, there is a race between configuration core
90  * and worker cores which may still add a reference while it should not
91  * be used. This timestamp is used to not remove the AS while a race condition
92  * may happen.
93  */
95 
96  /**
97  * The FIB entry index for the next-hop
98  */
100 
101  /**
102  * The child index on the FIB entry
103  */
105 
106  /**
107  * The next DPO in the graph to follow.
108  */
110 
111 } lb_as_t;
112 
114 
115 typedef struct {
118 
119 #define lb_foreach_vip_counter \
120  _(NEXT_PACKET, "packet from existing sessions", 0) \
121  _(FIRST_PACKET, "first session packet", 1) \
122  _(UNTRACKED_PACKET, "untracked packet", 2) \
123  _(NO_SERVER, "no server configured", 3)
124 
125 typedef enum {
126 #define _(a,b,c) LB_VIP_COUNTER_##a = c,
128 #undef _
131 
132 typedef enum {
138 
139 /**
140  * The load balancer supports IPv4 and IPv6 traffic
141  * and GRE4, GRE6 and L3DSR encap.
142  */
143 typedef enum {
150 } lb_vip_type_t;
151 
152 
155 
156 /**
157  * Load balancing service is provided per VIP.
158  * In this data model, a VIP can be a whole prefix.
159  * But load balancing only
160  * occurs on a per-source-address/port basis. Meaning that if a given source
161  * reuses the same port for multiple destinations within the same VIP,
162  * they will be considered as a single flow.
163  */
164 typedef struct {
165 
166  //Runtime
167 
168  /**
169  * Vector mapping (flow-hash & new_connect_table_mask) to AS index.
170  * This is used for new flows.
171  */
173 
174  /**
175  * New flows table length - 1
176  * (length MUST be a power of 2)
177  */
179 
180  /**
181  * Last time garbage collection was run to free the ASs.
182  */
184 
185  //Not runtime
186 
187  /**
188  * A Virtual IP represents a given service delivered
189  * by a set of application servers. It can be a single
190  * address or a prefix.
191  * IPv4 prefixes are encoded using IPv4-in-IPv6 embedded address
192  * (i.e. ::/96 prefix).
193  */
194  ip46_address_t prefix;
195 
196  /**
197  * The VIP prefix length.
198  * In case of IPv4, plen = 96 + ip4_plen.
199  */
201 
202  /**
203  * The type of traffic for this.
204  * LB_TYPE_UNDEFINED if unknown.
205  */
207 
208  /**
209  * DSCP bits for L3DSR
210  */
212 
213  /**
214  * Flags related to this VIP.
215  * LB_VIP_FLAGS_USED means the VIP is active.
216  * When it is not set, the VIP in the process of being removed.
217  * We cannot immediately remove a VIP because the VIP index still may be stored
218  * in the adjacency index.
219  */
221 #define LB_VIP_FLAGS_USED 0x1
222 
223  /**
224  * Pool of AS indexes used for this VIP.
225  * This also includes ASs that have been removed (but are still referenced).
226  */
228 } lb_vip_t;
229 
230 #define lb_vip_is_ip4(vip) ((vip)->type == LB_VIP_TYPE_IP4_GRE6 \
231  || (vip)->type == LB_VIP_TYPE_IP4_GRE4 \
232  || (vip)->type == LB_VIP_TYPE_IP4_L3DSR )
233 
234 #define lb_vip_is_gre4(vip) ((vip)->type == LB_VIP_TYPE_IP6_GRE4 \
235  || (vip)->type == LB_VIP_TYPE_IP4_GRE4)
236 #define lb_vip_is_gre6(vip) ((vip)->type == LB_VIP_TYPE_IP6_GRE6 \
237  || (vip)->type == LB_VIP_TYPE_IP4_GRE6)
238 #define lb_vip_is_l3dsr(vip) (vip)->type == LB_VIP_TYPE_IP4_L3DSR
239 
240 #define lb_encap_is_ip4(vip) ((vip)->type == LB_VIP_TYPE_IP6_GRE4 \
241  || (vip)->type == LB_VIP_TYPE_IP4_GRE4 \
242  || (vip)->type == LB_VIP_TYPE_IP4_L3DSR)
243 
246 
247 typedef struct {
248  /**
249  * Each CPU has its own sticky flow hash table.
250  * One single table is used for all VIPs.
251  */
253 } lb_per_cpu_t;
254 
255 typedef struct {
256  /**
257  * Pool of all Virtual IPs
258  */
260 
261  /**
262  * Pool of ASs.
263  * ASs are referenced by address and vip index.
264  * The first element (index 0) is special and used only to fill
265  * new_flow_tables when no AS has been configured.
266  */
268 
269  /**
270  * Each AS has an associated reference counter.
271  * As ass[0] has a special meaning, its associated counter
272  * starts at 0 and is decremented instead. i.e. do not use it.
273  */
275 
276  /**
277  * Some global data is per-cpu
278  */
280 
281  /**
282  * Node next index for IP adjacencies, for each of the traffic types.
283  */
284  u32 ip_lookup_next_index[LB_VIP_N_TYPES];
285 
286  /**
287  * Source address used in IPv6 encapsulated traffic
288  */
290 
291  /**
292  * Source address used for IPv4 encapsulated traffic
293  */
295 
296  /**
297  * Number of buckets in the per-cpu sticky hash table.
298  */
300 
301  /**
302  * Flow timeout in seconds.
303  */
305 
306  /**
307  * Per VIP counter
308  */
310 
311  /**
312  * DPO used to send packet from IP4/6 lookup to LB node.
313  */
317 
318  /**
319  * Node type for registering to fib changes.
320  */
322 
323  /**
324  * API dynamically registered base ID.
325  */
327 
328  volatile u32 *writer_lock;
329 } lb_main_t;
330 
331 extern lb_main_t lb_main;
334 
335 /**
336  * Fix global load-balancer parameters.
337  * @param ip4_address IPv4 source address used for encapsulated traffic
338  * @param ip6_address IPv6 source address used for encapsulated traffic
339  * @return 0 on success. VNET_LB_ERR_XXX on error
340  */
341 int lb_conf(ip4_address_t *ip4_address, ip6_address_t *ip6_address,
342  u32 sticky_buckets, u32 flow_timeout);
343 
344 int lb_vip_add(ip46_address_t *prefix, u8 plen, lb_vip_type_t type, u8 dscp,
345  u32 new_length, u32 *vip_index);
346 int lb_vip_del(u32 vip_index);
347 
348 int lb_vip_find_index(ip46_address_t *prefix, u8 plen, u32 *vip_index);
349 
350 #define lb_vip_get_by_index(index) (pool_is_free_index(lb_main.vips, index)?NULL:pool_elt_at_index(lb_main.vips, index))
351 
352 int lb_vip_add_ass(u32 vip_index, ip46_address_t *addresses, u32 n);
353 int lb_vip_del_ass(u32 vip_index, ip46_address_t *addresses, u32 n);
354 
356 
357 void lb_garbage_collection();
358 
360 
361 #endif /* LB_PLUGIN_LB_LB_H_ */
format_function_t format_lb_vip
Definition: lb.h:244
u8 dscp
DSCP bits for L3DSR.
Definition: lb.h:211
int lb_vip_add_ass(u32 vip_index, ip46_address_t *addresses, u32 n)
Definition: lb.c:441
uword( unformat_function_t)(unformat_input_t *input, va_list *args)
Definition: format.h:231
Each VIP is configured with a set of application server.
Definition: lb.h:56
u32 per_cpu_sticky_buckets
Number of buckets in the per-cpu sticky hash table.
Definition: lb.h:299
u16 msg_id_base
API dynamically registered base ID.
Definition: lb.h:326
Definition: lb.h:49
ip46_address_t prefix
A Virtual IP represents a given service delivered by a set of application servers.
Definition: lb.h:194
lb_vip_counter_t
Definition: lb.h:125
format_function_t format_lb_vip_type
Definition: lb.h:153
u8 *( format_function_t)(u8 *s, va_list *args)
Definition: format.h:48
u32 vip_index
ASs are indexed by address and VIP Index.
Definition: lb.h:75
lb_hash_t * sticky_ht
Each CPU has its own sticky flow hash table.
Definition: lb.h:252
ip46_address_t address
Destination address used to tunnel traffic towards that application server.
Definition: lb.h:68
lb_next_t
Definition: lb.h:47
int lb_conf(ip4_address_t *ip4_address, ip6_address_t *ip6_address, u32 sticky_buckets, u32 flow_timeout)
Fix global load-balancer parameters.
Definition: lb.c:378
enum dpo_type_t_ dpo_type_t
Common types of data-path objects New types can be dynamically added using dpo_register_new_type() ...
unformat_function_t unformat_lb_vip_type
Definition: lb.h:154
u32 flow_timeout
Flow timeout in seconds.
Definition: lb.h:304
Definition: lb.h:255
fib_node_type_t fib_node_type
Node type for registering to fib changes.
Definition: lb.h:321
dpo_type_t dpo_gre4_type
DPO used to send packet from IP4/6 lookup to LB node.
Definition: lb.h:314
vlib_refcount_t as_refcount
Each AS has an associated reference counter.
Definition: lb.h:274
format_function_t format_lb_as
Definition: lb.h:113
A collection of simple counters.
Definition: counter.h:58
lb_main_t lb_main
Definition: lb.c:27
lb_vip_t * vips
Pool of all Virtual IPs.
Definition: lb.h:259
u32 last_used
Rotating timestamp of when LB_AS_FLAGS_USED flag was last set.
Definition: lb.h:94
ip4_address_t ip4_src_address
Source address used for IPv4 encapsulated traffic.
Definition: lb.h:294
u8 plen
The VIP prefix length.
Definition: lb.h:200
The identity of a DPO is a combination of its type and its instance number/index of objects of that t...
Definition: dpo.h:168
format_function_t format_lb_main
Definition: lb.h:359
An node in the FIB graph.
Definition: fib_node.h:286
Definition: lb.h:115
fib_node_t fib_node
Registration to FIB event.
Definition: lb.h:60
lb_encap_type_t
Definition: lb.h:132
vlib_node_registration_t lb6_node
vlib_main_t * vm
Definition: buffer.c:294
volatile u32 * writer_lock
Definition: lb.h:328
#define lb_foreach_vip_counter
Definition: lb.h:119
u32 fib_node_index_t
A typedef of a node index.
Definition: fib_types.h:30
u32 as_index
Definition: lb.h:116
dpo_type_t dpo_gre6_type
Definition: lb.h:315
u32 last_garbage_collection
Last time garbage collection was run to free the ASs.
Definition: lb.h:183
lb_as_t * ass
Pool of ASs.
Definition: lb.h:267
vlib_node_registration_t lb4_node
int lb_vip_find_index(ip46_address_t *prefix, u8 plen, u32 *vip_index)
Definition: lb.c:414
lb_vip_type_t type
The type of traffic for this.
Definition: lb.h:206
int lb_vip_del(u32 vip_index)
Definition: lb.c:733
unsigned int u32
Definition: types.h:88
lb_vip_type_t
The load balancer supports IPv4 and IPv6 traffic and GRE4, GRE6 and L3DSR encap.
Definition: lb.h:143
void lb_garbage_collection()
Definition: lb.c:249
u32 new_flow_table_mask
New flows table length - 1 (length MUST be a power of 2)
Definition: lb.h:178
format_function_t format_lb_vip_detailed
Definition: lb.h:245
lb_per_cpu_t * per_cpu
Some global data is per-cpu.
Definition: lb.h:279
int lb_vip_del_ass(u32 vip_index, ip46_address_t *addresses, u32 n)
Definition: lb.c:598
struct _vlib_node_registration vlib_node_registration_t
unsigned short u16
Definition: types.h:57
ip6_address_t ip6_src_address
Source address used in IPv6 encapsulated traffic.
Definition: lb.h:289
unsigned char u8
Definition: types.h:56
u32 next_hop_child_index
The child index on the FIB entry.
Definition: lb.h:104
dpo_type_t dpo_l3dsr_type
Definition: lb.h:316
int lb_vip_add(ip46_address_t *prefix, u8 plen, lb_vip_type_t type, u8 dscp, u32 new_length, u32 *vip_index)
Definition: lb.c:662
enum fib_node_type_t_ fib_node_type_t
The types of nodes in a FIB graph.
u32 lb_hash_time_now(vlib_main_t *vm)
Definition: lb.c:57
dpo_id_t dpo
The next DPO in the graph to follow.
Definition: lb.h:109
u8 flags
Some per-AS flags.
Definition: lb.h:81
lb_new_flow_entry_t * new_flow_table
Vector mapping (flow-hash & new_connect_table_mask) to AS index.
Definition: lb.h:172
u8 flags
Flags related to this VIP.
Definition: lb.h:220
Load balancing service is provided per VIP.
Definition: lb.h:164
u32 * as_indexes
Pool of AS indexes used for this VIP.
Definition: lb.h:227
fib_node_index_t next_hop_fib_entry_index
The FIB entry index for the next-hop.
Definition: lb.h:99