FD.io VPP  v20.05-21-gb1500e9ff
Vector Packet Processing
tcp.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2016-2019 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #ifndef _vnet_tcp_h_
17 #define _vnet_tcp_h_
18 
19 #include <vnet/vnet.h>
20 #include <vnet/ip/ip.h>
21 #include <vnet/session/session.h>
22 #include <vnet/tcp/tcp_types.h>
23 #include <vnet/tcp/tcp_timer.h>
24 #include <vnet/tcp/tcp_debug.h>
25 #include <vnet/tcp/tcp_sack.h>
26 #include <vnet/tcp/tcp_bt.h>
27 #include <vnet/tcp/tcp_cc.h>
28 
30 
35 
36 typedef enum _tcp_error
37 {
38 #define tcp_error(n,s) TCP_ERROR_##n,
39 #include <vnet/tcp/tcp_error.def>
40 #undef tcp_error
42 } tcp_error_t;
43 
44 typedef struct _tcp_lookup_dispatch
45 {
46  u8 next, error;
48 
49 #define foreach_tcp_wrk_stat \
50  _(timer_expirations, u64, "timer expirations") \
51  _(rxt_segs, u64, "segments retransmitted") \
52  _(tr_events, u32, "timer retransmit events") \
53  _(to_closewait, u32, "timeout close-wait") \
54  _(to_closewait2, u32, "timeout close-wait w/data") \
55  _(to_finwait1, u32, "timeout fin-wait-1") \
56  _(to_finwait2, u32, "timeout fin-wait-2") \
57  _(to_lastack, u32, "timeout last-ack") \
58  _(to_closing, u32, "timeout closing") \
59  _(tr_abort, u32, "timer retransmit abort") \
60  _(rst_unread, u32, "reset on close due to unread data") \
61 
62 typedef struct tcp_wrk_stats_
63 {
64 #define _(name, type, str) type name;
66 #undef _
68 
69 typedef struct tcp_free_req_
70 {
74 
75 typedef struct tcp_worker_ctx_
76 {
77  CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
78 
79  /** worker's pool of connections */
81 
82  /** vector of pending ack dequeues */
84 
85  /** vector of pending disconnect notifications */
87 
88  /** vector of pending reset notifications */
90 
91  /** convenience pointer to this thread's vlib main */
93 
94  /** worker time */
96 
97  /* Max timers to be handled per dispatch loop */
99 
100  /** Session layer edge indices to tcp output */
101  u32 tco_next_node[2];
102 
103  /* Fifo of pending timer expirations */
105 
106  CLIB_CACHE_LINE_ALIGN_MARK (cacheline1);
107 
108  /** cached 'on the wire' options for bursts */
109  u8 cached_opts[40];
110 
111  /** tx buffer free list */
113 
114  /* fifo of pending free requests */
116 
117  /** worker timer wheel */
119 
120  CLIB_CACHE_LINE_ALIGN_MARK (cacheline2);
121 
124 
125 #define tcp_worker_stats_inc(_wrk,_stat,_val) \
126  _wrk->stats._stat += _val
127 
128 typedef struct tcp_iss_seed_
129 {
133 
134 typedef struct tcp_configuration_
135 {
136  /** Max rx fifo size for a session (in bytes). It is used in to compute the
137  * rfc 7323 window scaling factor */
139 
140  /** Min rx fifo for a session (in bytes) */
142 
143  /** Default MTU to be used when establishing connections */
145 
146  /** Initial CWND multiplier, which multiplies MSS to determine initial CWND.
147  * Set 0 to determine the initial CWND by another way */
149 
150  /** Enable tx pacing for new connections */
152 
153  /** Allow use of TSO whenever available */
155 
156  /** Set if csum offloading is enabled */
158 
159  /** Default congestion control algorithm type */
161 
162  /** Min rwnd, as number of snd_mss segments, for update ack to be sent after
163  * a zero rwnd advertisement */
165 
166  /** Delayed ack time (disabled) */
168 
169  /** Timer ticks to wait for close from app */
171 
172  /** Timer ticks to wait in time-wait. Also known as 2MSL */
174 
175  /** Timer ticks to wait in fin-wait1 to send fin and rcv fin-ack */
177 
178  /** Timer ticks to wait in last ack for ack */
180 
181  /** Timer ticks to wait in fin-wait2 for fin */
183 
184  /** Timer ticks to wait in closing for fin ack */
186 
187  /** Time to wait (sec) before cleaning up the connection */
189 
190  /** Number of preallocated connections */
192 
193  /** Number of preallocated half-open connections */
195 
196  /** Vectors of src addresses. Optional unless one needs > 63K active-opens */
198  ip6_address_t *ip6_src_addrs;
199 
200  /** Fault-injection. Debug only */
203 
204 typedef struct _tcp_main
205 {
206  /** per-worker context */
207  tcp_worker_ctx_t *wrk_ctx;
208 
209  /* Pool of listeners. */
210  tcp_connection_t *listener_pool;
211 
212  f64 tstamp_ticks_per_clock;
213 
214  /** vlib buffer size */
215  u32 bytes_per_buffer;
216 
217  /** Session layer edge indices to ip lookup (syns, rst) */
218  u32 ipl_next_node[2];
219 
220  /** Dispatch table by state and flags */
221  tcp_lookup_dispatch_t dispatch_table[TCP_N_STATES][64];
222 
223  clib_spinlock_t half_open_lock;
224 
225  /** Pool of half-open connections on which we've sent a SYN */
226  tcp_connection_t *half_open_connections;
227 
228  /** Seed used to generate random iss */
229  tcp_iss_seed_t iss_seed;
230 
231  /** Congestion control algorithms registered */
232  tcp_cc_algorithm_t *cc_algos;
233 
234  /** Hash table of cc algorithms by name */
235  uword *cc_algo_by_name;
236 
237  /** Last cc algo registered */
238  tcp_cc_algorithm_type_e cc_last_type;
239 
240  /** Flag that indicates if stack is on or off */
241  u8 is_enabled;
242 
243  /** Flag that indicates if v4 punting is enabled */
244  u8 punt_unknown4;
245 
246  /** Flag that indicates if v6 punting is enabled */
247  u8 punt_unknown6;
248 
249  /** Rotor for v4 source addresses */
250  u32 last_v4_addr_rotor;
251 
252  /** Rotor for v6 source addresses */
253  u32 last_v6_addr_rotor;
254 
255  /** Protocol configuration */
257 } tcp_main_t;
258 
259 extern tcp_main_t tcp_main;
272 
273 #define tcp_cfg tcp_main.cfg
274 #define tcp_node_index(node_id, is_ip4) \
275  ((is_ip4) ? tcp4_##node_id##_node.index : tcp6_##node_id##_node.index)
276 
279 {
280  return &tcp_main;
281 }
282 
284 tcp_get_worker (u32 thread_index)
285 {
286  ASSERT (thread_index < vec_len (tcp_main.wrk_ctx));
287  return &tcp_main.wrk_ctx[thread_index];
288 }
289 
290 #if (VLIB_BUFFER_TRACE_TRAJECTORY)
291 #define tcp_trajectory_add_start(b, start) \
292 { \
293  (*vlib_buffer_trace_trajectory_cb) (b, start); \
294 }
295 #else
296 #define tcp_trajectory_add_start(b, start)
297 #endif
298 
301  tcp_connection_t * base);
307 
309  u32 thread_index, u8 is_ip4);
310 void tcp_send_reset (tcp_connection_t * tc);
311 void tcp_send_syn (tcp_connection_t * tc);
313 void tcp_send_fin (tcp_connection_t * tc);
314 void tcp_send_ack (tcp_connection_t * tc);
316 
320 
324 void tcp_reschedule (tcp_connection_t * tc);
327  vlib_buffer_t * b);
328 int tcp_session_custom_tx (void *conn, transport_send_params_t * sp);
329 
336  u32 start_bucket);
338 
339 void tcp_punt_unknown (vlib_main_t * vm, u8 is_ip4, u8 is_add);
341  ip4_address_t * start,
344  ip6_address_t * start,
345  ip6_address_t * end, u32 table_id);
346 
348 
355 
356 #define tcp_validate_txf_size(_tc, _a) \
357  ASSERT(_tc->state != TCP_STATE_ESTABLISHED \
358  || transport_max_tx_dequeue (&_tc->connection) >= _a)
359 
360 #endif /* _vnet_tcp_h_ */
361 
362 /*
363  * fd.io coding-style-patch-verification: ON
364  *
365  * Local Variables:
366  * eval: (c-set-style "gnu")
367  * End:
368  */
u32 * pending_timers
Definition: tcp.h:104
tcp_main_t tcp_main
Definition: tcp.c:28
ip6_address_t * ip6_src_addrs
Definition: tcp.h:198
vlib_node_registration_t tcp6_listen_node
(constructor) VLIB_REGISTER_NODE (tcp6_listen_node)
Definition: tcp_input.c:2717
u8 allow_tso
Allow use of TSO whenever available.
Definition: tcp.h:154
void tcp_program_retransmit(tcp_connection_t *tc)
Definition: tcp_output.c:1052
#define CLIB_CACHE_LINE_ALIGN_MARK(mark)
Definition: cache.h:60
void tcp_send_window_update_ack(tcp_connection_t *tc)
Send window update ack.
Definition: tcp_output.c:1080
f64 clib_time_type_t
Definition: time.h:203
tcp_cleanup_req_t * pending_cleanups
Definition: tcp.h:115
u32 * pending_disconnects
vector of pending disconnect notifications
Definition: tcp.h:86
void tcp_connection_timers_reset(tcp_connection_t *tc)
Stop all connection timers.
Definition: tcp.c:493
#define foreach_tcp_wrk_stat
Definition: tcp.h:49
vlib_node_registration_t tcp4_output_node
(constructor) VLIB_REGISTER_NODE (tcp4_output_node)
Definition: tcp_output.c:2301
unsigned long u64
Definition: types.h:89
void tcp_connection_del(tcp_connection_t *tc)
Connection removal.
Definition: tcp.c:289
u32 tcp_snd_space(tcp_connection_t *tc)
Definition: tcp.c:944
struct _tcp_main tcp_main_t
struct _tcp_connection tcp_connection_t
vlib_node_registration_t tcp6_output_node
(constructor) VLIB_REGISTER_NODE (tcp6_output_node)
Definition: tcp_output.c:2321
timer_expiration_handler tcp_timer_retransmit_handler
u32 tcp_session_push_header(transport_connection_t *tconn, vlib_buffer_t *b)
Definition: tcp_output.c:982
struct _tcp_lookup_dispatch tcp_lookup_dispatch_t
u32 * pending_resets
vector of pending reset notifications
Definition: tcp.h:89
void tcp_connection_tx_pacer_update(tcp_connection_t *tc)
Definition: tcp.c:1183
void tcp_update_burst_snd_vars(tcp_connection_t *tc)
Update burst send vars.
Definition: tcp_output.c:303
void tcp_connection_cleanup(tcp_connection_t *tc)
Cleans up connection state.
Definition: tcp.c:242
format_function_t format_tcp_flags
Definition: tcp.h:350
ip4_address_t * ip4_src_addrs
Vectors of src addresses.
Definition: tcp.h:197
u32 * pending_deq_acked
vector of pending ack dequeues
Definition: tcp.h:83
unsigned char u8
Definition: types.h:56
double f64
Definition: types.h:142
u8 *() format_function_t(u8 *s, va_list *args)
Definition: format.h:48
tcp_cc_algorithm_type_e cc_algo
Default congestion control algorithm type.
Definition: tcp.h:160
tcp_connection_t * connections
worker&#39;s pool of connections
Definition: tcp.h:80
struct _tcp_cc_algorithm tcp_cc_algorithm_t
Definition: tcp_types.h:251
vlib_node_registration_t tcp4_syn_sent_node
(constructor) VLIB_REGISTER_NODE (tcp4_syn_sent_node)
Definition: tcp_input.c:2085
struct tcp_wrk_stats_ tcp_wrk_stats_t
format_function_t format_tcp_connection
Definition: tcp.h:353
u32 min_rx_fifo
Min rx fifo for a session (in bytes)
Definition: tcp.h:141
void() timer_expiration_handler(tcp_connection_t *tc)
Definition: tcp.h:29
u8 enable_tx_pacing
Enable tx pacing for new connections.
Definition: tcp.h:151
timer_expiration_handler tcp_timer_retransmit_syn_handler
unsigned int u32
Definition: types.h:88
u16 closing_time
Timer ticks to wait in closing for fin ack.
Definition: tcp.h:185
f64 buffer_fail_fraction
Fault-injection.
Definition: tcp.h:201
struct tcp_worker_ctx_ tcp_worker_ctx_t
vlib_main_t * vm
convenience pointer to this thread&#39;s vlib main
Definition: tcp.h:92
timer_expiration_handler tcp_timer_persist_handler
void tcp_send_ack(tcp_connection_t *tc)
Definition: tcp_output.c:1011
vlib_node_registration_t tcp6_syn_sent_node
(constructor) VLIB_REGISTER_NODE (tcp6_syn_sent_node)
Definition: tcp_input.c:2104
int tcp_fastrecovery_prr_snd_space(tcp_connection_t *tc)
Estimate send space using proportional rate reduction (RFC6937)
Definition: tcp_output.c:1651
u32 max_rx_fifo
Max rx fifo size for a session (in bytes).
Definition: tcp.h:138
struct tcp_free_req_ tcp_cleanup_req_t
format_function_t format_tcp_connection_id
Definition: tcp.h:354
clib_error_t * vnet_tcp_enable_disable(vlib_main_t *vm, u8 is_en)
Definition: tcp.c:1381
void tcp_send_syn(tcp_connection_t *tc)
Send SYN.
Definition: tcp_output.c:798
unsigned short u16
Definition: types.h:57
tw_timer_wheel_16t_2w_512sl_t tcp_timer_wheel_t
Definition: tcp_types.h:445
u32 preallocated_half_open_connections
Number of preallocated half-open connections.
Definition: tcp.h:194
tcp_timer_wheel_t timer_wheel
worker timer wheel
Definition: tcp.h:118
#define always_inline
Definition: ipsec.h:28
tcp_connection_t * tcp_connection_alloc(u8 thread_index)
Definition: tcp.c:296
void tcp_connection_tx_pacer_reset(tcp_connection_t *tc, u32 window, u32 start_bucket)
Definition: tcp.c:1196
vlib_main_t * vm
Definition: in2out_ed.c:1599
u8 csum_offload
Set if csum offloading is enabled.
Definition: tcp.h:157
u16 finwait1_time
Timer ticks to wait in fin-wait1 to send fin and rcv fin-ack.
Definition: tcp.h:176
vlib_node_registration_t tcp4_listen_node
(constructor) VLIB_REGISTER_NODE (tcp4_listen_node)
Definition: tcp_input.c:2698
enum _tcp_error tcp_error_t
int tcp_configure_v4_source_address_range(vlib_main_t *vm, ip4_address_t *start, ip4_address_t *end, u32 table_id)
Configure an ipv4 source address range.
Definition: tcp_cli.c:375
void tcp_program_cleanup(tcp_worker_ctx_t *wrk, tcp_connection_t *tc)
Definition: tcp.c:335
void tcp_program_dupack(tcp_connection_t *tc)
Definition: tcp_output.c:1040
void tcp_send_reset(tcp_connection_t *tc)
Build and set reset packet for connection.
Definition: tcp_output.c:742
void tcp_punt_unknown(vlib_main_t *vm, u8 is_ip4, u8 is_add)
Definition: tcp.c:1399
format_function_t format_tcp_state
Definition: tcp.h:349
u64 second
Definition: tcp.h:131
struct _transport_connection transport_connection_t
int tcp_half_open_connection_cleanup(tcp_connection_t *tc)
Try to cleanup half-open connection.
Definition: tcp.c:209
u32 fib_node_index_t
A typedef of a node index.
Definition: fib_types.h:30
void tcp_connection_timers_init(tcp_connection_t *tc)
Initialize all connection timers as invalid.
Definition: tcp.c:476
format_function_t format_tcp_rcv_sacks
Definition: tcp.h:352
vlib_node_registration_t tcp6_input_node
(constructor) VLIB_REGISTER_NODE (tcp6_input_node)
Definition: tcp_input.c:3042
fib_node_index_t tcp_lookup_rmt_in_fib(tcp_connection_t *tc)
u16 default_mtu
Default MTU to be used when establishing connections.
Definition: tcp.h:144
void tcp_send_synack(tcp_connection_t *tc)
Definition: tcp_output.c:833
#define ASSERT(truth)
u16 closewait_time
Timer ticks to wait for close from app.
Definition: tcp.h:170
u16 initial_cwnd_multiplier
Initial CWND multiplier, which multiplies MSS to determine initial CWND.
Definition: tcp.h:148
u16 lastack_time
Timer ticks to wait in last ack for ack.
Definition: tcp.h:179
u32 max_timers_per_loop
Definition: tcp.h:98
void tcp_connection_init_vars(tcp_connection_t *tc)
Initialize tcp connection variables.
Definition: tcp.c:704
clib_time_type_t free_time
Definition: tcp.h:71
vlib_node_registration_t tcp4_rcv_process_node
(constructor) VLIB_REGISTER_NODE (tcp4_rcv_process_node)
Definition: tcp_input.c:2528
vlib_node_registration_t tcp6_established_node
(constructor) VLIB_REGISTER_NODE (tcp6_established_node)
Definition: tcp_input.c:1620
float f32
Definition: types.h:143
struct _vlib_node_registration vlib_node_registration_t
struct tcp_iss_seed_ tcp_iss_seed_t
u64 first
Definition: tcp.h:130
void tcp_send_reset_w_pkt(tcp_connection_t *tc, vlib_buffer_t *pkt, u32 thread_index, u8 is_ip4)
Send reset without reusing existing buffer.
Definition: tcp_output.c:657
u32 rwnd_min_update_ack
Min rwnd, as number of snd_mss segments, for update ack to be sent after a zero rwnd advertisement...
Definition: tcp.h:164
vlib_node_registration_t tcp4_input_node
(constructor) VLIB_REGISTER_NODE (tcp4_input_node)
Definition: tcp_input.c:3022
vlib_node_registration_t tcp6_rcv_process_node
(constructor) VLIB_REGISTER_NODE (tcp6_rcv_process_node)
Definition: tcp_input.c:2547
void tcp_send_fin(tcp_connection_t *tc)
Send FIN.
Definition: tcp_output.c:860
#define vec_len(v)
Number of elements in vector (rvalue-only, NULL tolerant)
tcp_wrk_stats_t stats
Definition: tcp.h:122
static tcp_worker_ctx_t * tcp_get_worker(u32 thread_index)
Definition: tcp.h:284
VLIB buffer representation.
Definition: buffer.h:102
f32 cleanup_time
Time to wait (sec) before cleaning up the connection.
Definition: tcp.h:188
u64 uword
Definition: types.h:112
u32 time_now
worker time
Definition: tcp.h:95
void tcp_init_snd_vars(tcp_connection_t *tc)
Initialize connection send variables.
Definition: tcp.c:669
struct tcp_configuration_ tcp_configuration_t
void tcp_connection_close(tcp_connection_t *tc)
Begin connection closing procedure.
Definition: tcp.c:360
enum _tcp_cc_algorithm_type tcp_cc_algorithm_type_e
u16 delack_time
Delayed ack time (disabled)
Definition: tcp.h:167
void tcp_reschedule(tcp_connection_t *tc)
Definition: tcp.c:1207
format_function_t format_tcp_sacks
Definition: tcp.h:351
f64 end
end of the time range
Definition: mactime.api:44
void tcp_program_ack(tcp_connection_t *tc)
Definition: tcp_output.c:1030
int tcp_configure_v6_source_address_range(vlib_main_t *vm, ip6_address_t *start, ip6_address_t *end, u32 table_id)
Configure an ipv6 source address range.
Definition: tcp_cli.c:462
u32 table_id
Definition: fib_types.api:118
static tcp_main_t * vnet_get_tcp_main()
Definition: tcp.h:278
timer_expiration_handler tcp_timer_delack_handler
u32 preallocated_connections
Number of preallocated connections.
Definition: tcp.h:191
void tcp_connection_free(tcp_connection_t *tc)
Definition: tcp.c:322
u16 timewait_time
Timer ticks to wait in time-wait.
Definition: tcp.h:173
tcp_connection_t * tcp_connection_alloc_w_base(u8 thread_index, tcp_connection_t *base)
Definition: tcp.c:309
u16 finwait2_time
Timer ticks to wait in fin-wait2 for fin.
Definition: tcp.h:182
int tcp_session_custom_tx(void *conn, transport_send_params_t *sp)
Definition: tcp_output.c:2011
u32 * tx_buffers
tx buffer free list
Definition: tcp.h:112
vlib_node_registration_t tcp4_established_node
(constructor) VLIB_REGISTER_NODE (tcp4_established_node)
Definition: tcp_input.c:1601
u32 connection_index
Definition: tcp.h:72