1 /* (C) 1999-2001 Paul `Rusty' Russell
2 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
3 * (C) 2002-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
4 * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
11 #include <linux/types.h>
12 #include <linux/timer.h>
13 #include <linux/module.h>
15 #include <linux/tcp.h>
16 #include <linux/spinlock.h>
17 #include <linux/skbuff.h>
18 #include <linux/ipv6.h>
19 #include <net/ip6_checksum.h>
20 #include <asm/unaligned.h>
24 #include <linux/netfilter.h>
25 #include <linux/netfilter_ipv4.h>
26 #include <linux/netfilter_ipv6.h>
27 #include <net/netfilter/nf_conntrack.h>
28 #include <net/netfilter/nf_conntrack_l4proto.h>
29 #include <net/netfilter/nf_conntrack_ecache.h>
30 #include <net/netfilter/nf_conntrack_seqadj.h>
31 #include <net/netfilter/nf_conntrack_synproxy.h>
32 #include <net/netfilter/nf_conntrack_timeout.h>
33 #include <net/netfilter/nf_log.h>
34 #include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
35 #include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
37 /* "Be conservative in what you do,
38 be liberal in what you accept from others."
39 If it's non-zero, we mark only out of window RST segments as INVALID. */
40 static int nf_ct_tcp_be_liberal __read_mostly = 0;
42 /* If it is set to zero, we disable picking up already established
44 static int nf_ct_tcp_loose __read_mostly = 1;
46 /* Max number of the retransmitted packets without receiving an (acceptable)
47 ACK from the destination. If this number is reached, a shorter timer
49 static int nf_ct_tcp_max_retrans __read_mostly = 3;
51 /* FIXME: Examine ipfilter's timeouts and conntrack transitions more
52 closely. They're more complex. --RR */
54 static const char *const tcp_conntrack_names[] = {
68 #define MINS * 60 SECS
69 #define HOURS * 60 MINS
70 #define DAYS * 24 HOURS
72 static const unsigned int tcp_timeouts[TCP_CONNTRACK_TIMEOUT_MAX] = {
73 [TCP_CONNTRACK_SYN_SENT] = 2 MINS,
74 [TCP_CONNTRACK_SYN_RECV] = 60 SECS,
75 [TCP_CONNTRACK_ESTABLISHED] = 5 DAYS,
76 [TCP_CONNTRACK_FIN_WAIT] = 2 MINS,
77 [TCP_CONNTRACK_CLOSE_WAIT] = 60 SECS,
78 [TCP_CONNTRACK_LAST_ACK] = 30 SECS,
79 [TCP_CONNTRACK_TIME_WAIT] = 2 MINS,
80 [TCP_CONNTRACK_CLOSE] = 10 SECS,
81 [TCP_CONNTRACK_SYN_SENT2] = 2 MINS,
82 /* RFC1122 says the R2 limit should be at least 100 seconds.
83 Linux uses 15 packets as limit, which corresponds
84 to ~13-30min depending on RTO. */
85 [TCP_CONNTRACK_RETRANS] = 5 MINS,
86 [TCP_CONNTRACK_UNACK] = 5 MINS,
89 #define sNO TCP_CONNTRACK_NONE
90 #define sSS TCP_CONNTRACK_SYN_SENT
91 #define sSR TCP_CONNTRACK_SYN_RECV
92 #define sES TCP_CONNTRACK_ESTABLISHED
93 #define sFW TCP_CONNTRACK_FIN_WAIT
94 #define sCW TCP_CONNTRACK_CLOSE_WAIT
95 #define sLA TCP_CONNTRACK_LAST_ACK
96 #define sTW TCP_CONNTRACK_TIME_WAIT
97 #define sCL TCP_CONNTRACK_CLOSE
98 #define sS2 TCP_CONNTRACK_SYN_SENT2
99 #define sIV TCP_CONNTRACK_MAX
100 #define sIG TCP_CONNTRACK_IGNORE
102 /* What TCP flags are set from RST/SYN/FIN/ACK. */
113 * The TCP state transition table needs a few words...
115 * We are the man in the middle. All the packets go through us
116 * but might get lost in transit to the destination.
117 * It is assumed that the destinations can't receive segments
120 * The checked segment is in window, but our windows are *not*
121 * equivalent with the ones of the sender/receiver. We always
122 * try to guess the state of the current sender.
124 * The meaning of the states are:
126 * NONE: initial state
127 * SYN_SENT: SYN-only packet seen
128 * SYN_SENT2: SYN-only packet seen from reply dir, simultaneous open
129 * SYN_RECV: SYN-ACK packet seen
130 * ESTABLISHED: ACK packet seen
131 * FIN_WAIT: FIN packet seen
132 * CLOSE_WAIT: ACK seen (after FIN)
133 * LAST_ACK: FIN seen (after FIN)
134 * TIME_WAIT: last ACK seen
135 * CLOSE: closed connection (RST)
137 * Packets marked as IGNORED (sIG):
138 * if they may be either invalid or valid
139 * and the receiver may send back a connection
140 * closing RST or a SYN/ACK.
142 * Packets marked as INVALID (sIV):
143 * if we regard them as truly invalid packets
145 static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
148 /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
149 /*syn*/ { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sS2 },
151 * sNO -> sSS Initialize a new connection
152 * sSS -> sSS Retransmitted SYN
153 * sS2 -> sS2 Late retransmitted SYN
155 * sES -> sIG Error: SYNs in window outside the SYN_SENT state
156 * are errors. Receiver will reply with RST
157 * and close the connection.
158 * Or we are not in sync and hold a dead connection.
162 * sTW -> sSS Reopened connection (RFC 1122).
165 /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
166 /*synack*/ { sIV, sIV, sSR, sIV, sIV, sIV, sIV, sIV, sIV, sSR },
168 * sNO -> sIV Too late and no reason to do anything
169 * sSS -> sIV Client can't send SYN and then SYN/ACK
170 * sS2 -> sSR SYN/ACK sent to SYN2 in simultaneous open
171 * sSR -> sSR Late retransmitted SYN/ACK in simultaneous open
172 * sES -> sIV Invalid SYN/ACK packets sent by the client
179 /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
180 /*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
182 * sNO -> sIV Too late and no reason to do anything...
183 * sSS -> sIV Client migth not send FIN in this state:
184 * we enforce waiting for a SYN/ACK reply first.
186 * sSR -> sFW Close started.
188 * sFW -> sLA FIN seen in both directions, waiting for
190 * Migth be a retransmitted FIN as well...
192 * sLA -> sLA Retransmitted FIN. Remain in the same state.
196 /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
197 /*ack*/ { sES, sIV, sES, sES, sCW, sCW, sTW, sTW, sCL, sIV },
199 * sNO -> sES Assumed.
200 * sSS -> sIV ACK is invalid: we haven't seen a SYN/ACK yet.
202 * sSR -> sES Established state is reached.
204 * sFW -> sCW Normal close request answered by ACK.
206 * sLA -> sTW Last ACK detected (RFC5961 challenged)
207 * sTW -> sTW Retransmitted last ACK. Remain in the same state.
210 /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
211 /*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL },
212 /*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
216 /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
217 /*syn*/ { sIV, sS2, sIV, sIV, sIV, sIV, sIV, sSS, sIV, sS2 },
219 * sNO -> sIV Never reached.
220 * sSS -> sS2 Simultaneous open
221 * sS2 -> sS2 Retransmitted simultaneous SYN
222 * sSR -> sIV Invalid SYN packets sent by the server
227 * sTW -> sSS Reopened connection, but server may have switched role
230 /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
231 /*synack*/ { sIV, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sIG, sSR },
233 * sSS -> sSR Standard open.
234 * sS2 -> sSR Simultaneous open
235 * sSR -> sIG Retransmitted SYN/ACK, ignore it.
236 * sES -> sIG Late retransmitted SYN/ACK?
237 * sFW -> sIG Might be SYN/ACK answering ignored SYN
243 /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
244 /*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
246 * sSS -> sIV Server might not send FIN in this state.
248 * sSR -> sFW Close started.
250 * sFW -> sLA FIN seen in both directions.
252 * sLA -> sLA Retransmitted FIN.
256 /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
257 /*ack*/ { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIG },
259 * sSS -> sIG Might be a half-open connection.
261 * sSR -> sSR Might answer late resent SYN.
263 * sFW -> sCW Normal close request answered by ACK.
265 * sLA -> sTW Last ACK detected (RFC5961 challenged)
266 * sTW -> sTW Retransmitted last ACK.
269 /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
270 /*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL },
271 /*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
275 static inline struct nf_tcp_net *tcp_pernet(struct net *net)
277 return &net->ct.nf_ct_proto.tcp;
280 #ifdef CONFIG_NF_CONNTRACK_PROCFS
281 /* Print out the private part of the conntrack. */
282 static void tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
284 if (test_bit(IPS_OFFLOAD_BIT, &ct->status))
287 seq_printf(s, "%s ", tcp_conntrack_names[ct->proto.tcp.state]);
291 static unsigned int get_conntrack_index(const struct tcphdr *tcph)
293 if (tcph->rst) return TCP_RST_SET;
294 else if (tcph->syn) return (tcph->ack ? TCP_SYNACK_SET : TCP_SYN_SET);
295 else if (tcph->fin) return TCP_FIN_SET;
296 else if (tcph->ack) return TCP_ACK_SET;
297 else return TCP_NONE_SET;
300 /* TCP connection tracking based on 'Real Stateful TCP Packet Filtering
301 in IP Filter' by Guido van Rooij.
303 http://www.sane.nl/events/sane2000/papers.html
304 http://www.darkart.com/mirrors/www.obfuscation.org/ipf/
306 The boundaries and the conditions are changed according to RFC793:
307 the packet must intersect the window (i.e. segments may be
308 after the right or before the left edge) and thus receivers may ACK
309 segments after the right edge of the window.
311 td_maxend = max(sack + max(win,1)) seen in reply packets
312 td_maxwin = max(max(win, 1)) + (sack - ack) seen in sent packets
313 td_maxwin += seq + len - sender.td_maxend
314 if seq + len > sender.td_maxend
315 td_end = max(seq + len) seen in sent packets
317 I. Upper bound for valid data: seq <= sender.td_maxend
318 II. Lower bound for valid data: seq + len >= sender.td_end - receiver.td_maxwin
319 III. Upper bound for valid (s)ack: sack <= receiver.td_end
320 IV. Lower bound for valid (s)ack: sack >= receiver.td_end - MAXACKWINDOW
322 where sack is the highest right edge of sack block found in the packet
323 or ack in the case of packet without SACK option.
325 The upper bound limit for a valid (s)ack is not ignored -
326 we doesn't have to deal with fragments.
329 static inline __u32 segment_seq_plus_len(__u32 seq,
331 unsigned int dataoff,
332 const struct tcphdr *tcph)
334 /* XXX Should I use payload length field in IP/IPv6 header ?
336 return (seq + len - dataoff - tcph->doff*4
337 + (tcph->syn ? 1 : 0) + (tcph->fin ? 1 : 0));
340 /* Fixme: what about big packets? */
341 #define MAXACKWINCONST 66000
342 #define MAXACKWINDOW(sender) \
343 ((sender)->td_maxwin > MAXACKWINCONST ? (sender)->td_maxwin \
347 * Simplified tcp_parse_options routine from tcp_input.c
349 static void tcp_options(const struct sk_buff *skb,
350 unsigned int dataoff,
351 const struct tcphdr *tcph,
352 struct ip_ct_tcp_state *state)
354 unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
355 const unsigned char *ptr;
356 int length = (tcph->doff*4) - sizeof(struct tcphdr);
361 ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr),
375 case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */
382 if (opsize < 2) /* "silly options" */
385 return; /* don't parse partial options */
387 if (opcode == TCPOPT_SACK_PERM
388 && opsize == TCPOLEN_SACK_PERM)
389 state->flags |= IP_CT_TCP_FLAG_SACK_PERM;
390 else if (opcode == TCPOPT_WINDOW
391 && opsize == TCPOLEN_WINDOW) {
392 state->td_scale = *(u_int8_t *)ptr;
394 if (state->td_scale > TCP_MAX_WSCALE)
395 state->td_scale = TCP_MAX_WSCALE;
398 IP_CT_TCP_FLAG_WINDOW_SCALE;
406 static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff,
407 const struct tcphdr *tcph, __u32 *sack)
409 unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
410 const unsigned char *ptr;
411 int length = (tcph->doff*4) - sizeof(struct tcphdr);
417 ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr),
421 /* Fast path for timestamp-only option */
422 if (length == TCPOLEN_TSTAMP_ALIGNED
423 && *(__be32 *)ptr == htonl((TCPOPT_NOP << 24)
425 | (TCPOPT_TIMESTAMP << 8)
426 | TCPOLEN_TIMESTAMP))
436 case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */
443 if (opsize < 2) /* "silly options" */
446 return; /* don't parse partial options */
448 if (opcode == TCPOPT_SACK
449 && opsize >= (TCPOLEN_SACK_BASE
450 + TCPOLEN_SACK_PERBLOCK)
451 && !((opsize - TCPOLEN_SACK_BASE)
452 % TCPOLEN_SACK_PERBLOCK)) {
454 i < (opsize - TCPOLEN_SACK_BASE);
455 i += TCPOLEN_SACK_PERBLOCK) {
456 tmp = get_unaligned_be32((__be32 *)(ptr+i)+1);
458 if (after(tmp, *sack))
469 static bool tcp_in_window(const struct nf_conn *ct,
470 struct ip_ct_tcp *state,
471 enum ip_conntrack_dir dir,
473 const struct sk_buff *skb,
474 unsigned int dataoff,
475 const struct tcphdr *tcph)
477 struct net *net = nf_ct_net(ct);
478 struct nf_tcp_net *tn = tcp_pernet(net);
479 struct ip_ct_tcp_state *sender = &state->seen[dir];
480 struct ip_ct_tcp_state *receiver = &state->seen[!dir];
481 const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple;
482 __u32 seq, ack, sack, end, win, swin;
485 bool res, in_recv_win;
488 * Get the required data from the packet.
490 seq = ntohl(tcph->seq);
491 ack = sack = ntohl(tcph->ack_seq);
492 win_raw = ntohs(tcph->window);
494 end = segment_seq_plus_len(seq, skb->len, dataoff, tcph);
496 if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM)
497 tcp_sack(skb, dataoff, tcph, &sack);
499 /* Take into account NAT sequence number mangling */
500 receiver_offset = nf_ct_seq_offset(ct, !dir, ack - 1);
501 ack -= receiver_offset;
502 sack -= receiver_offset;
504 pr_debug("tcp_in_window: START\n");
505 pr_debug("tcp_in_window: ");
506 nf_ct_dump_tuple(tuple);
507 pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n",
508 seq, ack, receiver_offset, sack, receiver_offset, win, end);
509 pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
510 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
511 sender->td_end, sender->td_maxend, sender->td_maxwin,
513 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
516 if (sender->td_maxwin == 0) {
518 * Initialize sender data.
522 * SYN-ACK in reply to a SYN
523 * or SYN from reply direction in simultaneous open.
526 sender->td_maxend = end;
527 sender->td_maxwin = (win == 0 ? 1 : win);
529 tcp_options(skb, dataoff, tcph, sender);
532 * Both sides must send the Window Scale option
533 * to enable window scaling in either direction.
535 if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE
536 && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE))
538 receiver->td_scale = 0;
540 /* Simultaneous open */
544 * We are in the middle of a connection,
545 * its history is lost for us.
546 * Let's try to use the data from the packet.
548 sender->td_end = end;
549 swin = win << sender->td_scale;
550 sender->td_maxwin = (swin == 0 ? 1 : swin);
551 sender->td_maxend = end + sender->td_maxwin;
552 if (receiver->td_maxwin == 0) {
553 /* We haven't seen traffic in the other
554 * direction yet but we have to tweak window
555 * tracking to pass III and IV until that
558 receiver->td_end = receiver->td_maxend = sack;
559 } else if (sack == receiver->td_end + 1) {
560 /* Likely a reply to a keepalive.
567 } else if (((state->state == TCP_CONNTRACK_SYN_SENT
568 && dir == IP_CT_DIR_ORIGINAL)
569 || (state->state == TCP_CONNTRACK_SYN_RECV
570 && dir == IP_CT_DIR_REPLY))
571 && after(end, sender->td_end)) {
573 * RFC 793: "if a TCP is reinitialized ... then it need
574 * not wait at all; it must only be sure to use sequence
575 * numbers larger than those recently used."
578 sender->td_maxend = end;
579 sender->td_maxwin = (win == 0 ? 1 : win);
581 tcp_options(skb, dataoff, tcph, sender);
586 * If there is no ACK, just pretend it was set and OK.
588 ack = sack = receiver->td_end;
589 } else if (((tcp_flag_word(tcph) & (TCP_FLAG_ACK|TCP_FLAG_RST)) ==
590 (TCP_FLAG_ACK|TCP_FLAG_RST))
593 * Broken TCP stacks, that set ACK in RST packets as well
594 * with zero ack value.
596 ack = sack = receiver->td_end;
599 if (tcph->rst && seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT)
601 * RST sent answering SYN.
603 seq = end = sender->td_end;
605 pr_debug("tcp_in_window: ");
606 nf_ct_dump_tuple(tuple);
607 pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n",
608 seq, ack, receiver_offset, sack, receiver_offset, win, end);
609 pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
610 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
611 sender->td_end, sender->td_maxend, sender->td_maxwin,
613 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
616 /* Is the ending sequence in the receive window (if available)? */
617 in_recv_win = !receiver->td_maxwin ||
618 after(end, sender->td_end - receiver->td_maxwin - 1);
620 pr_debug("tcp_in_window: I=%i II=%i III=%i IV=%i\n",
621 before(seq, sender->td_maxend + 1),
622 (in_recv_win ? 1 : 0),
623 before(sack, receiver->td_end + 1),
624 after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1));
626 if (before(seq, sender->td_maxend + 1) &&
628 before(sack, receiver->td_end + 1) &&
629 after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)) {
631 * Take into account window scaling (RFC 1323).
634 win <<= sender->td_scale;
637 * Update sender data.
639 swin = win + (sack - ack);
640 if (sender->td_maxwin < swin)
641 sender->td_maxwin = swin;
642 if (after(end, sender->td_end)) {
643 sender->td_end = end;
644 sender->flags |= IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
647 if (!(sender->flags & IP_CT_TCP_FLAG_MAXACK_SET)) {
648 sender->td_maxack = ack;
649 sender->flags |= IP_CT_TCP_FLAG_MAXACK_SET;
650 } else if (after(ack, sender->td_maxack))
651 sender->td_maxack = ack;
655 * Update receiver data.
657 if (receiver->td_maxwin != 0 && after(end, sender->td_maxend))
658 receiver->td_maxwin += end - sender->td_maxend;
659 if (after(sack + win, receiver->td_maxend - 1)) {
660 receiver->td_maxend = sack + win;
662 receiver->td_maxend++;
664 if (ack == receiver->td_end)
665 receiver->flags &= ~IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
668 * Check retransmissions.
670 if (index == TCP_ACK_SET) {
671 if (state->last_dir == dir
672 && state->last_seq == seq
673 && state->last_ack == ack
674 && state->last_end == end
675 && state->last_win == win_raw)
678 state->last_dir = dir;
679 state->last_seq = seq;
680 state->last_ack = ack;
681 state->last_end = end;
682 state->last_win = win_raw;
689 if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL ||
693 nf_ct_l4proto_log_invalid(skb, ct,
695 before(seq, sender->td_maxend + 1) ?
697 before(sack, receiver->td_end + 1) ?
698 after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1) ? "BUG"
699 : "ACK is under the lower bound (possible overly delayed ACK)"
700 : "ACK is over the upper bound (ACKed data not seen yet)"
701 : "SEQ is under the lower bound (already ACKed data retransmitted)"
702 : "SEQ is over the upper bound (over the window of the receiver)");
706 pr_debug("tcp_in_window: res=%u sender end=%u maxend=%u maxwin=%u "
707 "receiver end=%u maxend=%u maxwin=%u\n",
708 res, sender->td_end, sender->td_maxend, sender->td_maxwin,
709 receiver->td_end, receiver->td_maxend, receiver->td_maxwin);
714 /* table of valid flag combinations - PUSH, ECE and CWR are always valid */
715 static const u8 tcp_valid_flags[(TCPHDR_FIN|TCPHDR_SYN|TCPHDR_RST|TCPHDR_ACK|
719 [TCPHDR_SYN|TCPHDR_URG] = 1,
720 [TCPHDR_SYN|TCPHDR_ACK] = 1,
722 [TCPHDR_RST|TCPHDR_ACK] = 1,
723 [TCPHDR_FIN|TCPHDR_ACK] = 1,
724 [TCPHDR_FIN|TCPHDR_ACK|TCPHDR_URG] = 1,
726 [TCPHDR_ACK|TCPHDR_URG] = 1,
729 static void tcp_error_log(const struct sk_buff *skb, struct net *net,
730 u8 pf, const char *msg)
732 nf_l4proto_log_invalid(skb, net, pf, IPPROTO_TCP, "%s", msg);
735 /* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c. */
736 static int tcp_error(struct net *net, struct nf_conn *tmpl,
738 unsigned int dataoff,
740 unsigned int hooknum)
742 const struct tcphdr *th;
744 unsigned int tcplen = skb->len - dataoff;
747 /* Smaller that minimal TCP header? */
748 th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
750 tcp_error_log(skb, net, pf, "short packet");
754 /* Not whole TCP header or malformed packet */
755 if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) {
756 tcp_error_log(skb, net, pf, "truncated packet");
760 /* Checksum invalid? Ignore.
761 * We skip checking packets on the outgoing path
762 * because the checksum is assumed to be correct.
764 /* FIXME: Source route IP option packets --RR */
765 if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
766 nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) {
767 tcp_error_log(skb, net, pf, "bad checksum");
771 /* Check TCP flags. */
772 tcpflags = (tcp_flag_byte(th) & ~(TCPHDR_ECE|TCPHDR_CWR|TCPHDR_PSH));
773 if (!tcp_valid_flags[tcpflags]) {
774 tcp_error_log(skb, net, pf, "invalid tcp flag combination");
781 static bool nf_conntrack_tcp_established(const struct nf_conn *ct)
783 return ct->proto.tcp.state == TCP_CONNTRACK_ESTABLISHED &&
784 test_bit(IPS_ASSURED_BIT, &ct->status);
787 /* Returns verdict for packet, or -1 for invalid. */
788 static int tcp_packet(struct nf_conn *ct,
789 const struct sk_buff *skb,
790 unsigned int dataoff,
791 enum ip_conntrack_info ctinfo)
793 struct net *net = nf_ct_net(ct);
794 struct nf_tcp_net *tn = tcp_pernet(net);
795 struct nf_conntrack_tuple *tuple;
796 enum tcp_conntrack new_state, old_state;
797 unsigned int index, *timeouts;
798 enum ip_conntrack_dir dir;
799 const struct tcphdr *th;
801 unsigned long timeout;
803 th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
806 spin_lock_bh(&ct->lock);
807 old_state = ct->proto.tcp.state;
808 dir = CTINFO2DIR(ctinfo);
809 index = get_conntrack_index(th);
810 new_state = tcp_conntracks[dir][index][old_state];
811 tuple = &ct->tuplehash[dir].tuple;
814 case TCP_CONNTRACK_SYN_SENT:
815 if (old_state < TCP_CONNTRACK_TIME_WAIT)
817 /* RFC 1122: "When a connection is closed actively,
818 * it MUST linger in TIME-WAIT state for a time 2xMSL
819 * (Maximum Segment Lifetime). However, it MAY accept
820 * a new SYN from the remote TCP to reopen the connection
821 * directly from TIME-WAIT state, if..."
822 * We ignore the conditions because we are in the
823 * TIME-WAIT state anyway.
825 * Handle aborted connections: we and the server
826 * think there is an existing connection but the client
827 * aborts it and starts a new one.
829 if (((ct->proto.tcp.seen[dir].flags
830 | ct->proto.tcp.seen[!dir].flags)
831 & IP_CT_TCP_FLAG_CLOSE_INIT)
832 || (ct->proto.tcp.last_dir == dir
833 && ct->proto.tcp.last_index == TCP_RST_SET)) {
834 /* Attempt to reopen a closed/aborted connection.
835 * Delete this connection and look up again. */
836 spin_unlock_bh(&ct->lock);
838 /* Only repeat if we can actually remove the timer.
839 * Destruction may already be in progress in process
840 * context and we must give it a chance to terminate.
847 case TCP_CONNTRACK_IGNORE:
850 * Our connection entry may be out of sync, so ignore
851 * packets which may signal the real connection between
852 * the client and the server.
855 * b) SYN/ACK in REPLY
856 * c) ACK in reply direction after initial SYN in original.
858 * If the ignored packet is invalid, the receiver will send
859 * a RST we'll catch below.
861 if (index == TCP_SYNACK_SET
862 && ct->proto.tcp.last_index == TCP_SYN_SET
863 && ct->proto.tcp.last_dir != dir
864 && ntohl(th->ack_seq) == ct->proto.tcp.last_end) {
865 /* b) This SYN/ACK acknowledges a SYN that we earlier
866 * ignored as invalid. This means that the client and
867 * the server are both in sync, while the firewall is
868 * not. We get in sync from the previously annotated
871 old_state = TCP_CONNTRACK_SYN_SENT;
872 new_state = TCP_CONNTRACK_SYN_RECV;
873 ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_end =
874 ct->proto.tcp.last_end;
875 ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxend =
876 ct->proto.tcp.last_end;
877 ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxwin =
878 ct->proto.tcp.last_win == 0 ?
879 1 : ct->proto.tcp.last_win;
880 ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_scale =
881 ct->proto.tcp.last_wscale;
882 ct->proto.tcp.last_flags &= ~IP_CT_EXP_CHALLENGE_ACK;
883 ct->proto.tcp.seen[ct->proto.tcp.last_dir].flags =
884 ct->proto.tcp.last_flags;
885 memset(&ct->proto.tcp.seen[dir], 0,
886 sizeof(struct ip_ct_tcp_state));
889 ct->proto.tcp.last_index = index;
890 ct->proto.tcp.last_dir = dir;
891 ct->proto.tcp.last_seq = ntohl(th->seq);
892 ct->proto.tcp.last_end =
893 segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th);
894 ct->proto.tcp.last_win = ntohs(th->window);
896 /* a) This is a SYN in ORIGINAL. The client and the server
897 * may be in sync but we are not. In that case, we annotate
898 * the TCP options and let the packet go through. If it is a
899 * valid SYN packet, the server will reply with a SYN/ACK, and
900 * then we'll get in sync. Otherwise, the server potentially
901 * responds with a challenge ACK if implementing RFC5961.
903 if (index == TCP_SYN_SET && dir == IP_CT_DIR_ORIGINAL) {
904 struct ip_ct_tcp_state seen = {};
906 ct->proto.tcp.last_flags =
907 ct->proto.tcp.last_wscale = 0;
908 tcp_options(skb, dataoff, th, &seen);
909 if (seen.flags & IP_CT_TCP_FLAG_WINDOW_SCALE) {
910 ct->proto.tcp.last_flags |=
911 IP_CT_TCP_FLAG_WINDOW_SCALE;
912 ct->proto.tcp.last_wscale = seen.td_scale;
914 if (seen.flags & IP_CT_TCP_FLAG_SACK_PERM) {
915 ct->proto.tcp.last_flags |=
916 IP_CT_TCP_FLAG_SACK_PERM;
918 /* Mark the potential for RFC5961 challenge ACK,
919 * this pose a special problem for LAST_ACK state
920 * as ACK is intrepretated as ACKing last FIN.
922 if (old_state == TCP_CONNTRACK_LAST_ACK)
923 ct->proto.tcp.last_flags |=
924 IP_CT_EXP_CHALLENGE_ACK;
926 spin_unlock_bh(&ct->lock);
927 nf_ct_l4proto_log_invalid(skb, ct, "invalid packet ignored in "
928 "state %s ", tcp_conntrack_names[old_state]);
930 case TCP_CONNTRACK_MAX:
931 /* Special case for SYN proxy: when the SYN to the server or
932 * the SYN/ACK from the server is lost, the client may transmit
933 * a keep-alive packet while in SYN_SENT state. This needs to
934 * be associated with the original conntrack entry in order to
935 * generate a new SYN with the correct sequence number.
937 if (nfct_synproxy(ct) && old_state == TCP_CONNTRACK_SYN_SENT &&
938 index == TCP_ACK_SET && dir == IP_CT_DIR_ORIGINAL &&
939 ct->proto.tcp.last_dir == IP_CT_DIR_ORIGINAL &&
940 ct->proto.tcp.seen[dir].td_end - 1 == ntohl(th->seq)) {
941 pr_debug("nf_ct_tcp: SYN proxy client keep alive\n");
942 spin_unlock_bh(&ct->lock);
947 pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
948 dir, get_conntrack_index(th), old_state);
949 spin_unlock_bh(&ct->lock);
950 nf_ct_l4proto_log_invalid(skb, ct, "invalid state");
952 case TCP_CONNTRACK_TIME_WAIT:
953 /* RFC5961 compliance cause stack to send "challenge-ACK"
954 * e.g. in response to spurious SYNs. Conntrack MUST
955 * not believe this ACK is acking last FIN.
957 if (old_state == TCP_CONNTRACK_LAST_ACK &&
958 index == TCP_ACK_SET &&
959 ct->proto.tcp.last_dir != dir &&
960 ct->proto.tcp.last_index == TCP_SYN_SET &&
961 (ct->proto.tcp.last_flags & IP_CT_EXP_CHALLENGE_ACK)) {
962 /* Detected RFC5961 challenge ACK */
963 ct->proto.tcp.last_flags &= ~IP_CT_EXP_CHALLENGE_ACK;
964 spin_unlock_bh(&ct->lock);
965 nf_ct_l4proto_log_invalid(skb, ct, "challenge-ack ignored");
966 return NF_ACCEPT; /* Don't change state */
969 case TCP_CONNTRACK_SYN_SENT2:
970 /* tcp_conntracks table is not smart enough to handle
973 ct->proto.tcp.last_flags |= IP_CT_TCP_SIMULTANEOUS_OPEN;
975 case TCP_CONNTRACK_SYN_RECV:
976 if (dir == IP_CT_DIR_REPLY && index == TCP_ACK_SET &&
977 ct->proto.tcp.last_flags & IP_CT_TCP_SIMULTANEOUS_OPEN)
978 new_state = TCP_CONNTRACK_ESTABLISHED;
980 case TCP_CONNTRACK_CLOSE:
981 if (index != TCP_RST_SET)
984 if (ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET) {
985 u32 seq = ntohl(th->seq);
987 if (before(seq, ct->proto.tcp.seen[!dir].td_maxack)) {
989 spin_unlock_bh(&ct->lock);
990 nf_ct_l4proto_log_invalid(skb, ct, "invalid rst");
994 if (!nf_conntrack_tcp_established(ct) ||
995 seq == ct->proto.tcp.seen[!dir].td_maxack)
998 /* Check if rst is part of train, such as
999 * foo:80 > bar:4379: P, 235946583:235946602(19) ack 42
1000 * foo:80 > bar:4379: R, 235946602:235946602(0) ack 42
1002 if (ct->proto.tcp.last_index == TCP_ACK_SET &&
1003 ct->proto.tcp.last_dir == dir &&
1004 seq == ct->proto.tcp.last_end)
1007 /* ... RST sequence number doesn't match exactly, keep
1008 * established state to allow a possible challenge ACK.
1010 new_state = old_state;
1012 if (((test_bit(IPS_SEEN_REPLY_BIT, &ct->status)
1013 && ct->proto.tcp.last_index == TCP_SYN_SET)
1014 || (!test_bit(IPS_ASSURED_BIT, &ct->status)
1015 && ct->proto.tcp.last_index == TCP_ACK_SET))
1016 && ntohl(th->ack_seq) == ct->proto.tcp.last_end) {
1017 /* RST sent to invalid SYN or ACK we had let through
1018 * at a) and c) above:
1020 * a) SYN was in window then
1021 * c) we hold a half-open connection.
1023 * Delete our connection entry.
1024 * We skip window checking, because packet might ACK
1025 * segments we ignored. */
1030 /* Keep compilers happy. */
1034 if (!tcp_in_window(ct, &ct->proto.tcp, dir, index,
1035 skb, dataoff, th)) {
1036 spin_unlock_bh(&ct->lock);
1040 /* From now on we have got in-window packets */
1041 ct->proto.tcp.last_index = index;
1042 ct->proto.tcp.last_dir = dir;
1044 pr_debug("tcp_conntracks: ");
1045 nf_ct_dump_tuple(tuple);
1046 pr_debug("syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n",
1047 (th->syn ? 1 : 0), (th->ack ? 1 : 0),
1048 (th->fin ? 1 : 0), (th->rst ? 1 : 0),
1049 old_state, new_state);
1051 ct->proto.tcp.state = new_state;
1052 if (old_state != new_state
1053 && new_state == TCP_CONNTRACK_FIN_WAIT)
1054 ct->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT;
1056 timeouts = nf_ct_timeout_lookup(ct);
1058 timeouts = tn->timeouts;
1060 if (ct->proto.tcp.retrans >= tn->tcp_max_retrans &&
1061 timeouts[new_state] > timeouts[TCP_CONNTRACK_RETRANS])
1062 timeout = timeouts[TCP_CONNTRACK_RETRANS];
1063 else if (unlikely(index == TCP_RST_SET))
1064 timeout = timeouts[TCP_CONNTRACK_CLOSE];
1065 else if ((ct->proto.tcp.seen[0].flags | ct->proto.tcp.seen[1].flags) &
1066 IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED &&
1067 timeouts[new_state] > timeouts[TCP_CONNTRACK_UNACK])
1068 timeout = timeouts[TCP_CONNTRACK_UNACK];
1069 else if (ct->proto.tcp.last_win == 0 &&
1070 timeouts[new_state] > timeouts[TCP_CONNTRACK_RETRANS])
1071 timeout = timeouts[TCP_CONNTRACK_RETRANS];
1073 timeout = timeouts[new_state];
1074 spin_unlock_bh(&ct->lock);
1076 if (new_state != old_state)
1077 nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
1079 if (!test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
1080 /* If only reply is a RST, we can consider ourselves not to
1081 have an established connection: this is a fairly common
1082 problem case, so we can delete the conntrack
1083 immediately. --RR */
1085 nf_ct_kill_acct(ct, ctinfo, skb);
1088 /* ESTABLISHED without SEEN_REPLY, i.e. mid-connection
1089 * pickup with loose=1. Avoid large ESTABLISHED timeout.
1091 if (new_state == TCP_CONNTRACK_ESTABLISHED &&
1092 timeout > timeouts[TCP_CONNTRACK_UNACK])
1093 timeout = timeouts[TCP_CONNTRACK_UNACK];
1094 } else if (!test_bit(IPS_ASSURED_BIT, &ct->status)
1095 && (old_state == TCP_CONNTRACK_SYN_RECV
1096 || old_state == TCP_CONNTRACK_ESTABLISHED)
1097 && new_state == TCP_CONNTRACK_ESTABLISHED) {
1098 /* Set ASSURED if we see see valid ack in ESTABLISHED
1099 after SYN_RECV or a valid answer for a picked up
1101 set_bit(IPS_ASSURED_BIT, &ct->status);
1102 nf_conntrack_event_cache(IPCT_ASSURED, ct);
1104 nf_ct_refresh_acct(ct, ctinfo, skb, timeout);
1109 /* Called when a new connection for this protocol found. */
1110 static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
1111 unsigned int dataoff)
1113 enum tcp_conntrack new_state;
1114 const struct tcphdr *th;
1115 struct tcphdr _tcph;
1116 struct net *net = nf_ct_net(ct);
1117 struct nf_tcp_net *tn = tcp_pernet(net);
1118 const struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[0];
1119 const struct ip_ct_tcp_state *receiver = &ct->proto.tcp.seen[1];
1121 th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
1124 /* Don't need lock here: this conntrack not in circulation yet */
1125 new_state = tcp_conntracks[0][get_conntrack_index(th)][TCP_CONNTRACK_NONE];
1127 /* Invalid: delete conntrack */
1128 if (new_state >= TCP_CONNTRACK_MAX) {
1129 pr_debug("nf_ct_tcp: invalid new deleting.\n");
1133 if (new_state == TCP_CONNTRACK_SYN_SENT) {
1134 memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
1136 ct->proto.tcp.seen[0].td_end =
1137 segment_seq_plus_len(ntohl(th->seq), skb->len,
1139 ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
1140 if (ct->proto.tcp.seen[0].td_maxwin == 0)
1141 ct->proto.tcp.seen[0].td_maxwin = 1;
1142 ct->proto.tcp.seen[0].td_maxend =
1143 ct->proto.tcp.seen[0].td_end;
1145 tcp_options(skb, dataoff, th, &ct->proto.tcp.seen[0]);
1146 } else if (tn->tcp_loose == 0) {
1147 /* Don't try to pick up connections. */
1150 memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
1152 * We are in the middle of a connection,
1153 * its history is lost for us.
1154 * Let's try to use the data from the packet.
1156 ct->proto.tcp.seen[0].td_end =
1157 segment_seq_plus_len(ntohl(th->seq), skb->len,
1159 ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
1160 if (ct->proto.tcp.seen[0].td_maxwin == 0)
1161 ct->proto.tcp.seen[0].td_maxwin = 1;
1162 ct->proto.tcp.seen[0].td_maxend =
1163 ct->proto.tcp.seen[0].td_end +
1164 ct->proto.tcp.seen[0].td_maxwin;
1166 /* We assume SACK and liberal window checking to handle
1168 ct->proto.tcp.seen[0].flags =
1169 ct->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM |
1170 IP_CT_TCP_FLAG_BE_LIBERAL;
1173 /* tcp_packet will set them */
1174 ct->proto.tcp.last_index = TCP_NONE_SET;
1176 pr_debug("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i "
1177 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
1178 sender->td_end, sender->td_maxend, sender->td_maxwin,
1180 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
1181 receiver->td_scale);
1185 static bool tcp_can_early_drop(const struct nf_conn *ct)
1187 switch (ct->proto.tcp.state) {
1188 case TCP_CONNTRACK_FIN_WAIT:
1189 case TCP_CONNTRACK_LAST_ACK:
1190 case TCP_CONNTRACK_TIME_WAIT:
1191 case TCP_CONNTRACK_CLOSE:
1192 case TCP_CONNTRACK_CLOSE_WAIT:
1201 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
1203 #include <linux/netfilter/nfnetlink.h>
1204 #include <linux/netfilter/nfnetlink_conntrack.h>
1206 static int tcp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
1209 struct nlattr *nest_parms;
1210 struct nf_ct_tcp_flags tmp = {};
1212 spin_lock_bh(&ct->lock);
1213 nest_parms = nla_nest_start(skb, CTA_PROTOINFO_TCP | NLA_F_NESTED);
1215 goto nla_put_failure;
1217 if (nla_put_u8(skb, CTA_PROTOINFO_TCP_STATE, ct->proto.tcp.state) ||
1218 nla_put_u8(skb, CTA_PROTOINFO_TCP_WSCALE_ORIGINAL,
1219 ct->proto.tcp.seen[0].td_scale) ||
1220 nla_put_u8(skb, CTA_PROTOINFO_TCP_WSCALE_REPLY,
1221 ct->proto.tcp.seen[1].td_scale))
1222 goto nla_put_failure;
1224 tmp.flags = ct->proto.tcp.seen[0].flags;
1225 if (nla_put(skb, CTA_PROTOINFO_TCP_FLAGS_ORIGINAL,
1226 sizeof(struct nf_ct_tcp_flags), &tmp))
1227 goto nla_put_failure;
1229 tmp.flags = ct->proto.tcp.seen[1].flags;
1230 if (nla_put(skb, CTA_PROTOINFO_TCP_FLAGS_REPLY,
1231 sizeof(struct nf_ct_tcp_flags), &tmp))
1232 goto nla_put_failure;
1233 spin_unlock_bh(&ct->lock);
1235 nla_nest_end(skb, nest_parms);
1240 spin_unlock_bh(&ct->lock);
1244 static const struct nla_policy tcp_nla_policy[CTA_PROTOINFO_TCP_MAX+1] = {
1245 [CTA_PROTOINFO_TCP_STATE] = { .type = NLA_U8 },
1246 [CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] = { .type = NLA_U8 },
1247 [CTA_PROTOINFO_TCP_WSCALE_REPLY] = { .type = NLA_U8 },
1248 [CTA_PROTOINFO_TCP_FLAGS_ORIGINAL] = { .len = sizeof(struct nf_ct_tcp_flags) },
1249 [CTA_PROTOINFO_TCP_FLAGS_REPLY] = { .len = sizeof(struct nf_ct_tcp_flags) },
1252 #define TCP_NLATTR_SIZE ( \
1253 NLA_ALIGN(NLA_HDRLEN + 1) + \
1254 NLA_ALIGN(NLA_HDRLEN + 1) + \
1255 NLA_ALIGN(NLA_HDRLEN + sizeof(struct nf_ct_tcp_flags)) + \
1256 NLA_ALIGN(NLA_HDRLEN + sizeof(struct nf_ct_tcp_flags)))
1258 static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct)
1260 struct nlattr *pattr = cda[CTA_PROTOINFO_TCP];
1261 struct nlattr *tb[CTA_PROTOINFO_TCP_MAX+1];
1264 /* updates could not contain anything about the private
1265 * protocol info, in that case skip the parsing */
1269 err = nla_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, pattr,
1270 tcp_nla_policy, NULL);
1274 if (tb[CTA_PROTOINFO_TCP_STATE] &&
1275 nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]) >= TCP_CONNTRACK_MAX)
1278 spin_lock_bh(&ct->lock);
1279 if (tb[CTA_PROTOINFO_TCP_STATE])
1280 ct->proto.tcp.state = nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]);
1282 if (tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]) {
1283 struct nf_ct_tcp_flags *attr =
1284 nla_data(tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]);
1285 ct->proto.tcp.seen[0].flags &= ~attr->mask;
1286 ct->proto.tcp.seen[0].flags |= attr->flags & attr->mask;
1289 if (tb[CTA_PROTOINFO_TCP_FLAGS_REPLY]) {
1290 struct nf_ct_tcp_flags *attr =
1291 nla_data(tb[CTA_PROTOINFO_TCP_FLAGS_REPLY]);
1292 ct->proto.tcp.seen[1].flags &= ~attr->mask;
1293 ct->proto.tcp.seen[1].flags |= attr->flags & attr->mask;
1296 if (tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] &&
1297 tb[CTA_PROTOINFO_TCP_WSCALE_REPLY] &&
1298 ct->proto.tcp.seen[0].flags & IP_CT_TCP_FLAG_WINDOW_SCALE &&
1299 ct->proto.tcp.seen[1].flags & IP_CT_TCP_FLAG_WINDOW_SCALE) {
1300 ct->proto.tcp.seen[0].td_scale =
1301 nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL]);
1302 ct->proto.tcp.seen[1].td_scale =
1303 nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_REPLY]);
1305 spin_unlock_bh(&ct->lock);
1310 static unsigned int tcp_nlattr_tuple_size(void)
1312 static unsigned int size __read_mostly;
1315 size = nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1);
1321 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
1323 #include <linux/netfilter/nfnetlink.h>
1324 #include <linux/netfilter/nfnetlink_cttimeout.h>
1326 static int tcp_timeout_nlattr_to_obj(struct nlattr *tb[],
1327 struct net *net, void *data)
1329 struct nf_tcp_net *tn = tcp_pernet(net);
1330 unsigned int *timeouts = data;
1334 timeouts = tn->timeouts;
1335 /* set default TCP timeouts. */
1336 for (i=0; i<TCP_CONNTRACK_TIMEOUT_MAX; i++)
1337 timeouts[i] = tn->timeouts[i];
1339 if (tb[CTA_TIMEOUT_TCP_SYN_SENT]) {
1340 timeouts[TCP_CONNTRACK_SYN_SENT] =
1341 ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_SENT]))*HZ;
1344 if (tb[CTA_TIMEOUT_TCP_SYN_RECV]) {
1345 timeouts[TCP_CONNTRACK_SYN_RECV] =
1346 ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_RECV]))*HZ;
1348 if (tb[CTA_TIMEOUT_TCP_ESTABLISHED]) {
1349 timeouts[TCP_CONNTRACK_ESTABLISHED] =
1350 ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_ESTABLISHED]))*HZ;
1352 if (tb[CTA_TIMEOUT_TCP_FIN_WAIT]) {
1353 timeouts[TCP_CONNTRACK_FIN_WAIT] =
1354 ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_FIN_WAIT]))*HZ;
1356 if (tb[CTA_TIMEOUT_TCP_CLOSE_WAIT]) {
1357 timeouts[TCP_CONNTRACK_CLOSE_WAIT] =
1358 ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_CLOSE_WAIT]))*HZ;
1360 if (tb[CTA_TIMEOUT_TCP_LAST_ACK]) {
1361 timeouts[TCP_CONNTRACK_LAST_ACK] =
1362 ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_LAST_ACK]))*HZ;
1364 if (tb[CTA_TIMEOUT_TCP_TIME_WAIT]) {
1365 timeouts[TCP_CONNTRACK_TIME_WAIT] =
1366 ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_TIME_WAIT]))*HZ;
1368 if (tb[CTA_TIMEOUT_TCP_CLOSE]) {
1369 timeouts[TCP_CONNTRACK_CLOSE] =
1370 ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_CLOSE]))*HZ;
1372 if (tb[CTA_TIMEOUT_TCP_SYN_SENT2]) {
1373 timeouts[TCP_CONNTRACK_SYN_SENT2] =
1374 ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_SENT2]))*HZ;
1376 if (tb[CTA_TIMEOUT_TCP_RETRANS]) {
1377 timeouts[TCP_CONNTRACK_RETRANS] =
1378 ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_RETRANS]))*HZ;
1380 if (tb[CTA_TIMEOUT_TCP_UNACK]) {
1381 timeouts[TCP_CONNTRACK_UNACK] =
1382 ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_UNACK]))*HZ;
1385 timeouts[CTA_TIMEOUT_TCP_UNSPEC] = timeouts[CTA_TIMEOUT_TCP_SYN_SENT];
1390 tcp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data)
1392 const unsigned int *timeouts = data;
1394 if (nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_SENT,
1395 htonl(timeouts[TCP_CONNTRACK_SYN_SENT] / HZ)) ||
1396 nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_RECV,
1397 htonl(timeouts[TCP_CONNTRACK_SYN_RECV] / HZ)) ||
1398 nla_put_be32(skb, CTA_TIMEOUT_TCP_ESTABLISHED,
1399 htonl(timeouts[TCP_CONNTRACK_ESTABLISHED] / HZ)) ||
1400 nla_put_be32(skb, CTA_TIMEOUT_TCP_FIN_WAIT,
1401 htonl(timeouts[TCP_CONNTRACK_FIN_WAIT] / HZ)) ||
1402 nla_put_be32(skb, CTA_TIMEOUT_TCP_CLOSE_WAIT,
1403 htonl(timeouts[TCP_CONNTRACK_CLOSE_WAIT] / HZ)) ||
1404 nla_put_be32(skb, CTA_TIMEOUT_TCP_LAST_ACK,
1405 htonl(timeouts[TCP_CONNTRACK_LAST_ACK] / HZ)) ||
1406 nla_put_be32(skb, CTA_TIMEOUT_TCP_TIME_WAIT,
1407 htonl(timeouts[TCP_CONNTRACK_TIME_WAIT] / HZ)) ||
1408 nla_put_be32(skb, CTA_TIMEOUT_TCP_CLOSE,
1409 htonl(timeouts[TCP_CONNTRACK_CLOSE] / HZ)) ||
1410 nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_SENT2,
1411 htonl(timeouts[TCP_CONNTRACK_SYN_SENT2] / HZ)) ||
1412 nla_put_be32(skb, CTA_TIMEOUT_TCP_RETRANS,
1413 htonl(timeouts[TCP_CONNTRACK_RETRANS] / HZ)) ||
1414 nla_put_be32(skb, CTA_TIMEOUT_TCP_UNACK,
1415 htonl(timeouts[TCP_CONNTRACK_UNACK] / HZ)))
1416 goto nla_put_failure;
1423 static const struct nla_policy tcp_timeout_nla_policy[CTA_TIMEOUT_TCP_MAX+1] = {
1424 [CTA_TIMEOUT_TCP_SYN_SENT] = { .type = NLA_U32 },
1425 [CTA_TIMEOUT_TCP_SYN_RECV] = { .type = NLA_U32 },
1426 [CTA_TIMEOUT_TCP_ESTABLISHED] = { .type = NLA_U32 },
1427 [CTA_TIMEOUT_TCP_FIN_WAIT] = { .type = NLA_U32 },
1428 [CTA_TIMEOUT_TCP_CLOSE_WAIT] = { .type = NLA_U32 },
1429 [CTA_TIMEOUT_TCP_LAST_ACK] = { .type = NLA_U32 },
1430 [CTA_TIMEOUT_TCP_TIME_WAIT] = { .type = NLA_U32 },
1431 [CTA_TIMEOUT_TCP_CLOSE] = { .type = NLA_U32 },
1432 [CTA_TIMEOUT_TCP_SYN_SENT2] = { .type = NLA_U32 },
1433 [CTA_TIMEOUT_TCP_RETRANS] = { .type = NLA_U32 },
1434 [CTA_TIMEOUT_TCP_UNACK] = { .type = NLA_U32 },
1436 #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
1438 #ifdef CONFIG_SYSCTL
1439 static struct ctl_table tcp_sysctl_table[] = {
1441 .procname = "nf_conntrack_tcp_timeout_syn_sent",
1442 .maxlen = sizeof(unsigned int),
1444 .proc_handler = proc_dointvec_jiffies,
1447 .procname = "nf_conntrack_tcp_timeout_syn_recv",
1448 .maxlen = sizeof(unsigned int),
1450 .proc_handler = proc_dointvec_jiffies,
1453 .procname = "nf_conntrack_tcp_timeout_established",
1454 .maxlen = sizeof(unsigned int),
1456 .proc_handler = proc_dointvec_jiffies,
1459 .procname = "nf_conntrack_tcp_timeout_fin_wait",
1460 .maxlen = sizeof(unsigned int),
1462 .proc_handler = proc_dointvec_jiffies,
1465 .procname = "nf_conntrack_tcp_timeout_close_wait",
1466 .maxlen = sizeof(unsigned int),
1468 .proc_handler = proc_dointvec_jiffies,
1471 .procname = "nf_conntrack_tcp_timeout_last_ack",
1472 .maxlen = sizeof(unsigned int),
1474 .proc_handler = proc_dointvec_jiffies,
1477 .procname = "nf_conntrack_tcp_timeout_time_wait",
1478 .maxlen = sizeof(unsigned int),
1480 .proc_handler = proc_dointvec_jiffies,
1483 .procname = "nf_conntrack_tcp_timeout_close",
1484 .maxlen = sizeof(unsigned int),
1486 .proc_handler = proc_dointvec_jiffies,
1489 .procname = "nf_conntrack_tcp_timeout_max_retrans",
1490 .maxlen = sizeof(unsigned int),
1492 .proc_handler = proc_dointvec_jiffies,
1495 .procname = "nf_conntrack_tcp_timeout_unacknowledged",
1496 .maxlen = sizeof(unsigned int),
1498 .proc_handler = proc_dointvec_jiffies,
1501 .procname = "nf_conntrack_tcp_loose",
1502 .maxlen = sizeof(unsigned int),
1504 .proc_handler = proc_dointvec,
1507 .procname = "nf_conntrack_tcp_be_liberal",
1508 .maxlen = sizeof(unsigned int),
1510 .proc_handler = proc_dointvec,
1513 .procname = "nf_conntrack_tcp_max_retrans",
1514 .maxlen = sizeof(unsigned int),
1516 .proc_handler = proc_dointvec,
1520 #endif /* CONFIG_SYSCTL */
1522 static int tcp_kmemdup_sysctl_table(struct nf_proto_net *pn,
1523 struct nf_tcp_net *tn)
1525 #ifdef CONFIG_SYSCTL
1529 pn->ctl_table = kmemdup(tcp_sysctl_table,
1530 sizeof(tcp_sysctl_table),
1535 pn->ctl_table[0].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT];
1536 pn->ctl_table[1].data = &tn->timeouts[TCP_CONNTRACK_SYN_RECV];
1537 pn->ctl_table[2].data = &tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
1538 pn->ctl_table[3].data = &tn->timeouts[TCP_CONNTRACK_FIN_WAIT];
1539 pn->ctl_table[4].data = &tn->timeouts[TCP_CONNTRACK_CLOSE_WAIT];
1540 pn->ctl_table[5].data = &tn->timeouts[TCP_CONNTRACK_LAST_ACK];
1541 pn->ctl_table[6].data = &tn->timeouts[TCP_CONNTRACK_TIME_WAIT];
1542 pn->ctl_table[7].data = &tn->timeouts[TCP_CONNTRACK_CLOSE];
1543 pn->ctl_table[8].data = &tn->timeouts[TCP_CONNTRACK_RETRANS];
1544 pn->ctl_table[9].data = &tn->timeouts[TCP_CONNTRACK_UNACK];
1545 pn->ctl_table[10].data = &tn->tcp_loose;
1546 pn->ctl_table[11].data = &tn->tcp_be_liberal;
1547 pn->ctl_table[12].data = &tn->tcp_max_retrans;
1552 static int tcp_init_net(struct net *net, u_int16_t proto)
1554 struct nf_tcp_net *tn = tcp_pernet(net);
1555 struct nf_proto_net *pn = &tn->pn;
1560 for (i = 0; i < TCP_CONNTRACK_TIMEOUT_MAX; i++)
1561 tn->timeouts[i] = tcp_timeouts[i];
1563 /* timeouts[0] is unused, make it same as SYN_SENT so
1564 * ->timeouts[0] contains 'new' timeout, like udp or icmp.
1566 tn->timeouts[0] = tcp_timeouts[TCP_CONNTRACK_SYN_SENT];
1567 tn->tcp_loose = nf_ct_tcp_loose;
1568 tn->tcp_be_liberal = nf_ct_tcp_be_liberal;
1569 tn->tcp_max_retrans = nf_ct_tcp_max_retrans;
1572 return tcp_kmemdup_sysctl_table(pn, tn);
1575 static struct nf_proto_net *tcp_get_net_proto(struct net *net)
1577 return &net->ct.nf_ct_proto.tcp.pn;
1580 const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 =
1583 .l4proto = IPPROTO_TCP,
1584 #ifdef CONFIG_NF_CONNTRACK_PROCFS
1585 .print_conntrack = tcp_print_conntrack,
1587 .packet = tcp_packet,
1590 .can_early_drop = tcp_can_early_drop,
1591 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
1592 .to_nlattr = tcp_to_nlattr,
1593 .from_nlattr = nlattr_to_tcp,
1594 .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
1595 .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
1596 .nlattr_tuple_size = tcp_nlattr_tuple_size,
1597 .nlattr_size = TCP_NLATTR_SIZE,
1598 .nla_policy = nf_ct_port_nla_policy,
1600 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
1602 .nlattr_to_obj = tcp_timeout_nlattr_to_obj,
1603 .obj_to_nlattr = tcp_timeout_obj_to_nlattr,
1604 .nlattr_max = CTA_TIMEOUT_TCP_MAX,
1605 .obj_size = sizeof(unsigned int) *
1606 TCP_CONNTRACK_TIMEOUT_MAX,
1607 .nla_policy = tcp_timeout_nla_policy,
1609 #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
1610 .init_net = tcp_init_net,
1611 .get_net_proto = tcp_get_net_proto,
1613 EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp4);
1615 const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 =
1617 .l3proto = PF_INET6,
1618 .l4proto = IPPROTO_TCP,
1619 #ifdef CONFIG_NF_CONNTRACK_PROCFS
1620 .print_conntrack = tcp_print_conntrack,
1622 .packet = tcp_packet,
1625 .can_early_drop = tcp_can_early_drop,
1626 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
1627 .nlattr_size = TCP_NLATTR_SIZE,
1628 .to_nlattr = tcp_to_nlattr,
1629 .from_nlattr = nlattr_to_tcp,
1630 .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
1631 .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
1632 .nlattr_tuple_size = tcp_nlattr_tuple_size,
1633 .nla_policy = nf_ct_port_nla_policy,
1635 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
1637 .nlattr_to_obj = tcp_timeout_nlattr_to_obj,
1638 .obj_to_nlattr = tcp_timeout_obj_to_nlattr,
1639 .nlattr_max = CTA_TIMEOUT_TCP_MAX,
1640 .obj_size = sizeof(unsigned int) *
1641 TCP_CONNTRACK_TIMEOUT_MAX,
1642 .nla_policy = tcp_timeout_nla_policy,
1644 #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
1645 .init_net = tcp_init_net,
1646 .get_net_proto = tcp_get_net_proto,
1648 EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp6);