GNU Linux-libre 4.19.211-gnu1
[releases.git] / net / netfilter / nf_conntrack_proto_tcp.c
1 /* (C) 1999-2001 Paul `Rusty' Russell
2  * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
3  * (C) 2002-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
4  * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 as
8  * published by the Free Software Foundation.
9  */
10
11 #include <linux/types.h>
12 #include <linux/timer.h>
13 #include <linux/module.h>
14 #include <linux/in.h>
15 #include <linux/tcp.h>
16 #include <linux/spinlock.h>
17 #include <linux/skbuff.h>
18 #include <linux/ipv6.h>
19 #include <net/ip6_checksum.h>
20 #include <asm/unaligned.h>
21
22 #include <net/tcp.h>
23
24 #include <linux/netfilter.h>
25 #include <linux/netfilter_ipv4.h>
26 #include <linux/netfilter_ipv6.h>
27 #include <net/netfilter/nf_conntrack.h>
28 #include <net/netfilter/nf_conntrack_l4proto.h>
29 #include <net/netfilter/nf_conntrack_ecache.h>
30 #include <net/netfilter/nf_conntrack_seqadj.h>
31 #include <net/netfilter/nf_conntrack_synproxy.h>
32 #include <net/netfilter/nf_conntrack_timeout.h>
33 #include <net/netfilter/nf_log.h>
34 #include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
35 #include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
36
37 /* "Be conservative in what you do,
38     be liberal in what you accept from others."
39     If it's non-zero, we mark only out of window RST segments as INVALID. */
40 static int nf_ct_tcp_be_liberal __read_mostly = 0;
41
42 /* If it is set to zero, we disable picking up already established
43    connections. */
44 static int nf_ct_tcp_loose __read_mostly = 1;
45
46 /* Max number of the retransmitted packets without receiving an (acceptable)
47    ACK from the destination. If this number is reached, a shorter timer
48    will be started. */
49 static int nf_ct_tcp_max_retrans __read_mostly = 3;
50
51   /* FIXME: Examine ipfilter's timeouts and conntrack transitions more
52      closely.  They're more complex. --RR */
53
54 static const char *const tcp_conntrack_names[] = {
55         "NONE",
56         "SYN_SENT",
57         "SYN_RECV",
58         "ESTABLISHED",
59         "FIN_WAIT",
60         "CLOSE_WAIT",
61         "LAST_ACK",
62         "TIME_WAIT",
63         "CLOSE",
64         "SYN_SENT2",
65 };
66
67 #define SECS * HZ
68 #define MINS * 60 SECS
69 #define HOURS * 60 MINS
70 #define DAYS * 24 HOURS
71
72 static const unsigned int tcp_timeouts[TCP_CONNTRACK_TIMEOUT_MAX] = {
73         [TCP_CONNTRACK_SYN_SENT]        = 2 MINS,
74         [TCP_CONNTRACK_SYN_RECV]        = 60 SECS,
75         [TCP_CONNTRACK_ESTABLISHED]     = 5 DAYS,
76         [TCP_CONNTRACK_FIN_WAIT]        = 2 MINS,
77         [TCP_CONNTRACK_CLOSE_WAIT]      = 60 SECS,
78         [TCP_CONNTRACK_LAST_ACK]        = 30 SECS,
79         [TCP_CONNTRACK_TIME_WAIT]       = 2 MINS,
80         [TCP_CONNTRACK_CLOSE]           = 10 SECS,
81         [TCP_CONNTRACK_SYN_SENT2]       = 2 MINS,
82 /* RFC1122 says the R2 limit should be at least 100 seconds.
83    Linux uses 15 packets as limit, which corresponds
84    to ~13-30min depending on RTO. */
85         [TCP_CONNTRACK_RETRANS]         = 5 MINS,
86         [TCP_CONNTRACK_UNACK]           = 5 MINS,
87 };
88
89 #define sNO TCP_CONNTRACK_NONE
90 #define sSS TCP_CONNTRACK_SYN_SENT
91 #define sSR TCP_CONNTRACK_SYN_RECV
92 #define sES TCP_CONNTRACK_ESTABLISHED
93 #define sFW TCP_CONNTRACK_FIN_WAIT
94 #define sCW TCP_CONNTRACK_CLOSE_WAIT
95 #define sLA TCP_CONNTRACK_LAST_ACK
96 #define sTW TCP_CONNTRACK_TIME_WAIT
97 #define sCL TCP_CONNTRACK_CLOSE
98 #define sS2 TCP_CONNTRACK_SYN_SENT2
99 #define sIV TCP_CONNTRACK_MAX
100 #define sIG TCP_CONNTRACK_IGNORE
101
102 /* What TCP flags are set from RST/SYN/FIN/ACK. */
103 enum tcp_bit_set {
104         TCP_SYN_SET,
105         TCP_SYNACK_SET,
106         TCP_FIN_SET,
107         TCP_ACK_SET,
108         TCP_RST_SET,
109         TCP_NONE_SET,
110 };
111
112 /*
113  * The TCP state transition table needs a few words...
114  *
115  * We are the man in the middle. All the packets go through us
116  * but might get lost in transit to the destination.
117  * It is assumed that the destinations can't receive segments
118  * we haven't seen.
119  *
120  * The checked segment is in window, but our windows are *not*
121  * equivalent with the ones of the sender/receiver. We always
122  * try to guess the state of the current sender.
123  *
124  * The meaning of the states are:
125  *
126  * NONE:        initial state
127  * SYN_SENT:    SYN-only packet seen
128  * SYN_SENT2:   SYN-only packet seen from reply dir, simultaneous open
129  * SYN_RECV:    SYN-ACK packet seen
130  * ESTABLISHED: ACK packet seen
131  * FIN_WAIT:    FIN packet seen
132  * CLOSE_WAIT:  ACK seen (after FIN)
133  * LAST_ACK:    FIN seen (after FIN)
134  * TIME_WAIT:   last ACK seen
135  * CLOSE:       closed connection (RST)
136  *
137  * Packets marked as IGNORED (sIG):
138  *      if they may be either invalid or valid
139  *      and the receiver may send back a connection
140  *      closing RST or a SYN/ACK.
141  *
142  * Packets marked as INVALID (sIV):
143  *      if we regard them as truly invalid packets
144  */
145 static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
146         {
147 /* ORIGINAL */
148 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
149 /*syn*/    { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sS2 },
150 /*
151  *      sNO -> sSS      Initialize a new connection
152  *      sSS -> sSS      Retransmitted SYN
153  *      sS2 -> sS2      Late retransmitted SYN
154  *      sSR -> sIG
155  *      sES -> sIG      Error: SYNs in window outside the SYN_SENT state
156  *                      are errors. Receiver will reply with RST
157  *                      and close the connection.
158  *                      Or we are not in sync and hold a dead connection.
159  *      sFW -> sIG
160  *      sCW -> sIG
161  *      sLA -> sIG
162  *      sTW -> sSS      Reopened connection (RFC 1122).
163  *      sCL -> sSS
164  */
165 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
166 /*synack*/ { sIV, sIV, sSR, sIV, sIV, sIV, sIV, sIV, sIV, sSR },
167 /*
168  *      sNO -> sIV      Too late and no reason to do anything
169  *      sSS -> sIV      Client can't send SYN and then SYN/ACK
170  *      sS2 -> sSR      SYN/ACK sent to SYN2 in simultaneous open
171  *      sSR -> sSR      Late retransmitted SYN/ACK in simultaneous open
172  *      sES -> sIV      Invalid SYN/ACK packets sent by the client
173  *      sFW -> sIV
174  *      sCW -> sIV
175  *      sLA -> sIV
176  *      sTW -> sIV
177  *      sCL -> sIV
178  */
179 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
180 /*fin*/    { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
181 /*
182  *      sNO -> sIV      Too late and no reason to do anything...
183  *      sSS -> sIV      Client migth not send FIN in this state:
184  *                      we enforce waiting for a SYN/ACK reply first.
185  *      sS2 -> sIV
186  *      sSR -> sFW      Close started.
187  *      sES -> sFW
188  *      sFW -> sLA      FIN seen in both directions, waiting for
189  *                      the last ACK.
190  *                      Migth be a retransmitted FIN as well...
191  *      sCW -> sLA
192  *      sLA -> sLA      Retransmitted FIN. Remain in the same state.
193  *      sTW -> sTW
194  *      sCL -> sCL
195  */
196 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
197 /*ack*/    { sES, sIV, sES, sES, sCW, sCW, sTW, sTW, sCL, sIV },
198 /*
199  *      sNO -> sES      Assumed.
200  *      sSS -> sIV      ACK is invalid: we haven't seen a SYN/ACK yet.
201  *      sS2 -> sIV
202  *      sSR -> sES      Established state is reached.
203  *      sES -> sES      :-)
204  *      sFW -> sCW      Normal close request answered by ACK.
205  *      sCW -> sCW
206  *      sLA -> sTW      Last ACK detected (RFC5961 challenged)
207  *      sTW -> sTW      Retransmitted last ACK. Remain in the same state.
208  *      sCL -> sCL
209  */
210 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
211 /*rst*/    { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL },
212 /*none*/   { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
213         },
214         {
215 /* REPLY */
216 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
217 /*syn*/    { sIV, sS2, sIV, sIV, sIV, sIV, sIV, sSS, sIV, sS2 },
218 /*
219  *      sNO -> sIV      Never reached.
220  *      sSS -> sS2      Simultaneous open
221  *      sS2 -> sS2      Retransmitted simultaneous SYN
222  *      sSR -> sIV      Invalid SYN packets sent by the server
223  *      sES -> sIV
224  *      sFW -> sIV
225  *      sCW -> sIV
226  *      sLA -> sIV
227  *      sTW -> sSS      Reopened connection, but server may have switched role
228  *      sCL -> sIV
229  */
230 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
231 /*synack*/ { sIV, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sIG, sSR },
232 /*
233  *      sSS -> sSR      Standard open.
234  *      sS2 -> sSR      Simultaneous open
235  *      sSR -> sIG      Retransmitted SYN/ACK, ignore it.
236  *      sES -> sIG      Late retransmitted SYN/ACK?
237  *      sFW -> sIG      Might be SYN/ACK answering ignored SYN
238  *      sCW -> sIG
239  *      sLA -> sIG
240  *      sTW -> sIG
241  *      sCL -> sIG
242  */
243 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
244 /*fin*/    { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
245 /*
246  *      sSS -> sIV      Server might not send FIN in this state.
247  *      sS2 -> sIV
248  *      sSR -> sFW      Close started.
249  *      sES -> sFW
250  *      sFW -> sLA      FIN seen in both directions.
251  *      sCW -> sLA
252  *      sLA -> sLA      Retransmitted FIN.
253  *      sTW -> sTW
254  *      sCL -> sCL
255  */
256 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
257 /*ack*/    { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIG },
258 /*
259  *      sSS -> sIG      Might be a half-open connection.
260  *      sS2 -> sIG
261  *      sSR -> sSR      Might answer late resent SYN.
262  *      sES -> sES      :-)
263  *      sFW -> sCW      Normal close request answered by ACK.
264  *      sCW -> sCW
265  *      sLA -> sTW      Last ACK detected (RFC5961 challenged)
266  *      sTW -> sTW      Retransmitted last ACK.
267  *      sCL -> sCL
268  */
269 /*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
270 /*rst*/    { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL },
271 /*none*/   { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
272         }
273 };
274
275 static inline struct nf_tcp_net *tcp_pernet(struct net *net)
276 {
277         return &net->ct.nf_ct_proto.tcp;
278 }
279
280 #ifdef CONFIG_NF_CONNTRACK_PROCFS
281 /* Print out the private part of the conntrack. */
282 static void tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
283 {
284         if (test_bit(IPS_OFFLOAD_BIT, &ct->status))
285                 return;
286
287         seq_printf(s, "%s ", tcp_conntrack_names[ct->proto.tcp.state]);
288 }
289 #endif
290
291 static unsigned int get_conntrack_index(const struct tcphdr *tcph)
292 {
293         if (tcph->rst) return TCP_RST_SET;
294         else if (tcph->syn) return (tcph->ack ? TCP_SYNACK_SET : TCP_SYN_SET);
295         else if (tcph->fin) return TCP_FIN_SET;
296         else if (tcph->ack) return TCP_ACK_SET;
297         else return TCP_NONE_SET;
298 }
299
300 /* TCP connection tracking based on 'Real Stateful TCP Packet Filtering
301    in IP Filter' by Guido van Rooij.
302
303    http://www.sane.nl/events/sane2000/papers.html
304    http://www.darkart.com/mirrors/www.obfuscation.org/ipf/
305
306    The boundaries and the conditions are changed according to RFC793:
307    the packet must intersect the window (i.e. segments may be
308    after the right or before the left edge) and thus receivers may ACK
309    segments after the right edge of the window.
310
311         td_maxend = max(sack + max(win,1)) seen in reply packets
312         td_maxwin = max(max(win, 1)) + (sack - ack) seen in sent packets
313         td_maxwin += seq + len - sender.td_maxend
314                         if seq + len > sender.td_maxend
315         td_end    = max(seq + len) seen in sent packets
316
317    I.   Upper bound for valid data:     seq <= sender.td_maxend
318    II.  Lower bound for valid data:     seq + len >= sender.td_end - receiver.td_maxwin
319    III. Upper bound for valid (s)ack:   sack <= receiver.td_end
320    IV.  Lower bound for valid (s)ack:   sack >= receiver.td_end - MAXACKWINDOW
321
322    where sack is the highest right edge of sack block found in the packet
323    or ack in the case of packet without SACK option.
324
325    The upper bound limit for a valid (s)ack is not ignored -
326    we doesn't have to deal with fragments.
327 */
328
329 static inline __u32 segment_seq_plus_len(__u32 seq,
330                                          size_t len,
331                                          unsigned int dataoff,
332                                          const struct tcphdr *tcph)
333 {
334         /* XXX Should I use payload length field in IP/IPv6 header ?
335          * - YK */
336         return (seq + len - dataoff - tcph->doff*4
337                 + (tcph->syn ? 1 : 0) + (tcph->fin ? 1 : 0));
338 }
339
340 /* Fixme: what about big packets? */
341 #define MAXACKWINCONST                  66000
342 #define MAXACKWINDOW(sender)                                            \
343         ((sender)->td_maxwin > MAXACKWINCONST ? (sender)->td_maxwin     \
344                                               : MAXACKWINCONST)
345
346 /*
347  * Simplified tcp_parse_options routine from tcp_input.c
348  */
349 static void tcp_options(const struct sk_buff *skb,
350                         unsigned int dataoff,
351                         const struct tcphdr *tcph,
352                         struct ip_ct_tcp_state *state)
353 {
354         unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
355         const unsigned char *ptr;
356         int length = (tcph->doff*4) - sizeof(struct tcphdr);
357
358         if (!length)
359                 return;
360
361         ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr),
362                                  length, buff);
363         BUG_ON(ptr == NULL);
364
365         state->td_scale =
366         state->flags = 0;
367
368         while (length > 0) {
369                 int opcode=*ptr++;
370                 int opsize;
371
372                 switch (opcode) {
373                 case TCPOPT_EOL:
374                         return;
375                 case TCPOPT_NOP:        /* Ref: RFC 793 section 3.1 */
376                         length--;
377                         continue;
378                 default:
379                         if (length < 2)
380                                 return;
381                         opsize=*ptr++;
382                         if (opsize < 2) /* "silly options" */
383                                 return;
384                         if (opsize > length)
385                                 return; /* don't parse partial options */
386
387                         if (opcode == TCPOPT_SACK_PERM
388                             && opsize == TCPOLEN_SACK_PERM)
389                                 state->flags |= IP_CT_TCP_FLAG_SACK_PERM;
390                         else if (opcode == TCPOPT_WINDOW
391                                  && opsize == TCPOLEN_WINDOW) {
392                                 state->td_scale = *(u_int8_t *)ptr;
393
394                                 if (state->td_scale > TCP_MAX_WSCALE)
395                                         state->td_scale = TCP_MAX_WSCALE;
396
397                                 state->flags |=
398                                         IP_CT_TCP_FLAG_WINDOW_SCALE;
399                         }
400                         ptr += opsize - 2;
401                         length -= opsize;
402                 }
403         }
404 }
405
406 static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff,
407                      const struct tcphdr *tcph, __u32 *sack)
408 {
409         unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
410         const unsigned char *ptr;
411         int length = (tcph->doff*4) - sizeof(struct tcphdr);
412         __u32 tmp;
413
414         if (!length)
415                 return;
416
417         ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr),
418                                  length, buff);
419         BUG_ON(ptr == NULL);
420
421         /* Fast path for timestamp-only option */
422         if (length == TCPOLEN_TSTAMP_ALIGNED
423             && *(__be32 *)ptr == htonl((TCPOPT_NOP << 24)
424                                        | (TCPOPT_NOP << 16)
425                                        | (TCPOPT_TIMESTAMP << 8)
426                                        | TCPOLEN_TIMESTAMP))
427                 return;
428
429         while (length > 0) {
430                 int opcode = *ptr++;
431                 int opsize, i;
432
433                 switch (opcode) {
434                 case TCPOPT_EOL:
435                         return;
436                 case TCPOPT_NOP:        /* Ref: RFC 793 section 3.1 */
437                         length--;
438                         continue;
439                 default:
440                         if (length < 2)
441                                 return;
442                         opsize = *ptr++;
443                         if (opsize < 2) /* "silly options" */
444                                 return;
445                         if (opsize > length)
446                                 return; /* don't parse partial options */
447
448                         if (opcode == TCPOPT_SACK
449                             && opsize >= (TCPOLEN_SACK_BASE
450                                           + TCPOLEN_SACK_PERBLOCK)
451                             && !((opsize - TCPOLEN_SACK_BASE)
452                                  % TCPOLEN_SACK_PERBLOCK)) {
453                                 for (i = 0;
454                                      i < (opsize - TCPOLEN_SACK_BASE);
455                                      i += TCPOLEN_SACK_PERBLOCK) {
456                                         tmp = get_unaligned_be32((__be32 *)(ptr+i)+1);
457
458                                         if (after(tmp, *sack))
459                                                 *sack = tmp;
460                                 }
461                                 return;
462                         }
463                         ptr += opsize - 2;
464                         length -= opsize;
465                 }
466         }
467 }
468
469 static bool tcp_in_window(const struct nf_conn *ct,
470                           struct ip_ct_tcp *state,
471                           enum ip_conntrack_dir dir,
472                           unsigned int index,
473                           const struct sk_buff *skb,
474                           unsigned int dataoff,
475                           const struct tcphdr *tcph)
476 {
477         struct net *net = nf_ct_net(ct);
478         struct nf_tcp_net *tn = tcp_pernet(net);
479         struct ip_ct_tcp_state *sender = &state->seen[dir];
480         struct ip_ct_tcp_state *receiver = &state->seen[!dir];
481         const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple;
482         __u32 seq, ack, sack, end, win, swin;
483         u16 win_raw;
484         s32 receiver_offset;
485         bool res, in_recv_win;
486
487         /*
488          * Get the required data from the packet.
489          */
490         seq = ntohl(tcph->seq);
491         ack = sack = ntohl(tcph->ack_seq);
492         win_raw = ntohs(tcph->window);
493         win = win_raw;
494         end = segment_seq_plus_len(seq, skb->len, dataoff, tcph);
495
496         if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM)
497                 tcp_sack(skb, dataoff, tcph, &sack);
498
499         /* Take into account NAT sequence number mangling */
500         receiver_offset = nf_ct_seq_offset(ct, !dir, ack - 1);
501         ack -= receiver_offset;
502         sack -= receiver_offset;
503
504         pr_debug("tcp_in_window: START\n");
505         pr_debug("tcp_in_window: ");
506         nf_ct_dump_tuple(tuple);
507         pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n",
508                  seq, ack, receiver_offset, sack, receiver_offset, win, end);
509         pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
510                  "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
511                  sender->td_end, sender->td_maxend, sender->td_maxwin,
512                  sender->td_scale,
513                  receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
514                  receiver->td_scale);
515
516         if (sender->td_maxwin == 0) {
517                 /*
518                  * Initialize sender data.
519                  */
520                 if (tcph->syn) {
521                         /*
522                          * SYN-ACK in reply to a SYN
523                          * or SYN from reply direction in simultaneous open.
524                          */
525                         sender->td_end =
526                         sender->td_maxend = end;
527                         sender->td_maxwin = (win == 0 ? 1 : win);
528
529                         tcp_options(skb, dataoff, tcph, sender);
530                         /*
531                          * RFC 1323:
532                          * Both sides must send the Window Scale option
533                          * to enable window scaling in either direction.
534                          */
535                         if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE
536                               && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE))
537                                 sender->td_scale =
538                                 receiver->td_scale = 0;
539                         if (!tcph->ack)
540                                 /* Simultaneous open */
541                                 return true;
542                 } else {
543                         /*
544                          * We are in the middle of a connection,
545                          * its history is lost for us.
546                          * Let's try to use the data from the packet.
547                          */
548                         sender->td_end = end;
549                         swin = win << sender->td_scale;
550                         sender->td_maxwin = (swin == 0 ? 1 : swin);
551                         sender->td_maxend = end + sender->td_maxwin;
552                         if (receiver->td_maxwin == 0) {
553                                 /* We haven't seen traffic in the other
554                                  * direction yet but we have to tweak window
555                                  * tracking to pass III and IV until that
556                                  * happens.
557                                  */
558                                 receiver->td_end = receiver->td_maxend = sack;
559                         } else if (sack == receiver->td_end + 1) {
560                                 /* Likely a reply to a keepalive.
561                                  * Needed for III.
562                                  */
563                                 receiver->td_end++;
564                         }
565
566                 }
567         } else if (((state->state == TCP_CONNTRACK_SYN_SENT
568                      && dir == IP_CT_DIR_ORIGINAL)
569                    || (state->state == TCP_CONNTRACK_SYN_RECV
570                      && dir == IP_CT_DIR_REPLY))
571                    && after(end, sender->td_end)) {
572                 /*
573                  * RFC 793: "if a TCP is reinitialized ... then it need
574                  * not wait at all; it must only be sure to use sequence
575                  * numbers larger than those recently used."
576                  */
577                 sender->td_end =
578                 sender->td_maxend = end;
579                 sender->td_maxwin = (win == 0 ? 1 : win);
580
581                 tcp_options(skb, dataoff, tcph, sender);
582         }
583
584         if (!(tcph->ack)) {
585                 /*
586                  * If there is no ACK, just pretend it was set and OK.
587                  */
588                 ack = sack = receiver->td_end;
589         } else if (((tcp_flag_word(tcph) & (TCP_FLAG_ACK|TCP_FLAG_RST)) ==
590                     (TCP_FLAG_ACK|TCP_FLAG_RST))
591                    && (ack == 0)) {
592                 /*
593                  * Broken TCP stacks, that set ACK in RST packets as well
594                  * with zero ack value.
595                  */
596                 ack = sack = receiver->td_end;
597         }
598
599         if (tcph->rst && seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT)
600                 /*
601                  * RST sent answering SYN.
602                  */
603                 seq = end = sender->td_end;
604
605         pr_debug("tcp_in_window: ");
606         nf_ct_dump_tuple(tuple);
607         pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n",
608                  seq, ack, receiver_offset, sack, receiver_offset, win, end);
609         pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
610                  "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
611                  sender->td_end, sender->td_maxend, sender->td_maxwin,
612                  sender->td_scale,
613                  receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
614                  receiver->td_scale);
615
616         /* Is the ending sequence in the receive window (if available)? */
617         in_recv_win = !receiver->td_maxwin ||
618                       after(end, sender->td_end - receiver->td_maxwin - 1);
619
620         pr_debug("tcp_in_window: I=%i II=%i III=%i IV=%i\n",
621                  before(seq, sender->td_maxend + 1),
622                  (in_recv_win ? 1 : 0),
623                  before(sack, receiver->td_end + 1),
624                  after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1));
625
626         if (before(seq, sender->td_maxend + 1) &&
627             in_recv_win &&
628             before(sack, receiver->td_end + 1) &&
629             after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)) {
630                 /*
631                  * Take into account window scaling (RFC 1323).
632                  */
633                 if (!tcph->syn)
634                         win <<= sender->td_scale;
635
636                 /*
637                  * Update sender data.
638                  */
639                 swin = win + (sack - ack);
640                 if (sender->td_maxwin < swin)
641                         sender->td_maxwin = swin;
642                 if (after(end, sender->td_end)) {
643                         sender->td_end = end;
644                         sender->flags |= IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
645                 }
646                 if (tcph->ack) {
647                         if (!(sender->flags & IP_CT_TCP_FLAG_MAXACK_SET)) {
648                                 sender->td_maxack = ack;
649                                 sender->flags |= IP_CT_TCP_FLAG_MAXACK_SET;
650                         } else if (after(ack, sender->td_maxack))
651                                 sender->td_maxack = ack;
652                 }
653
654                 /*
655                  * Update receiver data.
656                  */
657                 if (receiver->td_maxwin != 0 && after(end, sender->td_maxend))
658                         receiver->td_maxwin += end - sender->td_maxend;
659                 if (after(sack + win, receiver->td_maxend - 1)) {
660                         receiver->td_maxend = sack + win;
661                         if (win == 0)
662                                 receiver->td_maxend++;
663                 }
664                 if (ack == receiver->td_end)
665                         receiver->flags &= ~IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
666
667                 /*
668                  * Check retransmissions.
669                  */
670                 if (index == TCP_ACK_SET) {
671                         if (state->last_dir == dir
672                             && state->last_seq == seq
673                             && state->last_ack == ack
674                             && state->last_end == end
675                             && state->last_win == win_raw)
676                                 state->retrans++;
677                         else {
678                                 state->last_dir = dir;
679                                 state->last_seq = seq;
680                                 state->last_ack = ack;
681                                 state->last_end = end;
682                                 state->last_win = win_raw;
683                                 state->retrans = 0;
684                         }
685                 }
686                 res = true;
687         } else {
688                 res = false;
689                 if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL ||
690                     tn->tcp_be_liberal)
691                         res = true;
692                 if (!res) {
693                         nf_ct_l4proto_log_invalid(skb, ct,
694                         "%s",
695                         before(seq, sender->td_maxend + 1) ?
696                         in_recv_win ?
697                         before(sack, receiver->td_end + 1) ?
698                         after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1) ? "BUG"
699                         : "ACK is under the lower bound (possible overly delayed ACK)"
700                         : "ACK is over the upper bound (ACKed data not seen yet)"
701                         : "SEQ is under the lower bound (already ACKed data retransmitted)"
702                         : "SEQ is over the upper bound (over the window of the receiver)");
703                 }
704         }
705
706         pr_debug("tcp_in_window: res=%u sender end=%u maxend=%u maxwin=%u "
707                  "receiver end=%u maxend=%u maxwin=%u\n",
708                  res, sender->td_end, sender->td_maxend, sender->td_maxwin,
709                  receiver->td_end, receiver->td_maxend, receiver->td_maxwin);
710
711         return res;
712 }
713
714 /* table of valid flag combinations - PUSH, ECE and CWR are always valid */
715 static const u8 tcp_valid_flags[(TCPHDR_FIN|TCPHDR_SYN|TCPHDR_RST|TCPHDR_ACK|
716                                  TCPHDR_URG) + 1] =
717 {
718         [TCPHDR_SYN]                            = 1,
719         [TCPHDR_SYN|TCPHDR_URG]                 = 1,
720         [TCPHDR_SYN|TCPHDR_ACK]                 = 1,
721         [TCPHDR_RST]                            = 1,
722         [TCPHDR_RST|TCPHDR_ACK]                 = 1,
723         [TCPHDR_FIN|TCPHDR_ACK]                 = 1,
724         [TCPHDR_FIN|TCPHDR_ACK|TCPHDR_URG]      = 1,
725         [TCPHDR_ACK]                            = 1,
726         [TCPHDR_ACK|TCPHDR_URG]                 = 1,
727 };
728
729 static void tcp_error_log(const struct sk_buff *skb, struct net *net,
730                           u8 pf, const char *msg)
731 {
732         nf_l4proto_log_invalid(skb, net, pf, IPPROTO_TCP, "%s", msg);
733 }
734
735 /* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c.  */
736 static int tcp_error(struct net *net, struct nf_conn *tmpl,
737                      struct sk_buff *skb,
738                      unsigned int dataoff,
739                      u_int8_t pf,
740                      unsigned int hooknum)
741 {
742         const struct tcphdr *th;
743         struct tcphdr _tcph;
744         unsigned int tcplen = skb->len - dataoff;
745         u_int8_t tcpflags;
746
747         /* Smaller that minimal TCP header? */
748         th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
749         if (th == NULL) {
750                 tcp_error_log(skb, net, pf, "short packet");
751                 return -NF_ACCEPT;
752         }
753
754         /* Not whole TCP header or malformed packet */
755         if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) {
756                 tcp_error_log(skb, net, pf, "truncated packet");
757                 return -NF_ACCEPT;
758         }
759
760         /* Checksum invalid? Ignore.
761          * We skip checking packets on the outgoing path
762          * because the checksum is assumed to be correct.
763          */
764         /* FIXME: Source route IP option packets --RR */
765         if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
766             nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) {
767                 tcp_error_log(skb, net, pf, "bad checksum");
768                 return -NF_ACCEPT;
769         }
770
771         /* Check TCP flags. */
772         tcpflags = (tcp_flag_byte(th) & ~(TCPHDR_ECE|TCPHDR_CWR|TCPHDR_PSH));
773         if (!tcp_valid_flags[tcpflags]) {
774                 tcp_error_log(skb, net, pf, "invalid tcp flag combination");
775                 return -NF_ACCEPT;
776         }
777
778         return NF_ACCEPT;
779 }
780
781 static bool nf_conntrack_tcp_established(const struct nf_conn *ct)
782 {
783         return ct->proto.tcp.state == TCP_CONNTRACK_ESTABLISHED &&
784                test_bit(IPS_ASSURED_BIT, &ct->status);
785 }
786
787 /* Returns verdict for packet, or -1 for invalid. */
788 static int tcp_packet(struct nf_conn *ct,
789                       const struct sk_buff *skb,
790                       unsigned int dataoff,
791                       enum ip_conntrack_info ctinfo)
792 {
793         struct net *net = nf_ct_net(ct);
794         struct nf_tcp_net *tn = tcp_pernet(net);
795         struct nf_conntrack_tuple *tuple;
796         enum tcp_conntrack new_state, old_state;
797         unsigned int index, *timeouts;
798         enum ip_conntrack_dir dir;
799         const struct tcphdr *th;
800         struct tcphdr _tcph;
801         unsigned long timeout;
802
803         th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
804         BUG_ON(th == NULL);
805
806         spin_lock_bh(&ct->lock);
807         old_state = ct->proto.tcp.state;
808         dir = CTINFO2DIR(ctinfo);
809         index = get_conntrack_index(th);
810         new_state = tcp_conntracks[dir][index][old_state];
811         tuple = &ct->tuplehash[dir].tuple;
812
813         switch (new_state) {
814         case TCP_CONNTRACK_SYN_SENT:
815                 if (old_state < TCP_CONNTRACK_TIME_WAIT)
816                         break;
817                 /* RFC 1122: "When a connection is closed actively,
818                  * it MUST linger in TIME-WAIT state for a time 2xMSL
819                  * (Maximum Segment Lifetime). However, it MAY accept
820                  * a new SYN from the remote TCP to reopen the connection
821                  * directly from TIME-WAIT state, if..."
822                  * We ignore the conditions because we are in the
823                  * TIME-WAIT state anyway.
824                  *
825                  * Handle aborted connections: we and the server
826                  * think there is an existing connection but the client
827                  * aborts it and starts a new one.
828                  */
829                 if (((ct->proto.tcp.seen[dir].flags
830                       | ct->proto.tcp.seen[!dir].flags)
831                      & IP_CT_TCP_FLAG_CLOSE_INIT)
832                     || (ct->proto.tcp.last_dir == dir
833                         && ct->proto.tcp.last_index == TCP_RST_SET)) {
834                         /* Attempt to reopen a closed/aborted connection.
835                          * Delete this connection and look up again. */
836                         spin_unlock_bh(&ct->lock);
837
838                         /* Only repeat if we can actually remove the timer.
839                          * Destruction may already be in progress in process
840                          * context and we must give it a chance to terminate.
841                          */
842                         if (nf_ct_kill(ct))
843                                 return -NF_REPEAT;
844                         return NF_DROP;
845                 }
846                 /* Fall through */
847         case TCP_CONNTRACK_IGNORE:
848                 /* Ignored packets:
849                  *
850                  * Our connection entry may be out of sync, so ignore
851                  * packets which may signal the real connection between
852                  * the client and the server.
853                  *
854                  * a) SYN in ORIGINAL
855                  * b) SYN/ACK in REPLY
856                  * c) ACK in reply direction after initial SYN in original.
857                  *
858                  * If the ignored packet is invalid, the receiver will send
859                  * a RST we'll catch below.
860                  */
861                 if (index == TCP_SYNACK_SET
862                     && ct->proto.tcp.last_index == TCP_SYN_SET
863                     && ct->proto.tcp.last_dir != dir
864                     && ntohl(th->ack_seq) == ct->proto.tcp.last_end) {
865                         /* b) This SYN/ACK acknowledges a SYN that we earlier
866                          * ignored as invalid. This means that the client and
867                          * the server are both in sync, while the firewall is
868                          * not. We get in sync from the previously annotated
869                          * values.
870                          */
871                         old_state = TCP_CONNTRACK_SYN_SENT;
872                         new_state = TCP_CONNTRACK_SYN_RECV;
873                         ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_end =
874                                 ct->proto.tcp.last_end;
875                         ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxend =
876                                 ct->proto.tcp.last_end;
877                         ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxwin =
878                                 ct->proto.tcp.last_win == 0 ?
879                                         1 : ct->proto.tcp.last_win;
880                         ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_scale =
881                                 ct->proto.tcp.last_wscale;
882                         ct->proto.tcp.last_flags &= ~IP_CT_EXP_CHALLENGE_ACK;
883                         ct->proto.tcp.seen[ct->proto.tcp.last_dir].flags =
884                                 ct->proto.tcp.last_flags;
885                         memset(&ct->proto.tcp.seen[dir], 0,
886                                sizeof(struct ip_ct_tcp_state));
887                         break;
888                 }
889                 ct->proto.tcp.last_index = index;
890                 ct->proto.tcp.last_dir = dir;
891                 ct->proto.tcp.last_seq = ntohl(th->seq);
892                 ct->proto.tcp.last_end =
893                     segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th);
894                 ct->proto.tcp.last_win = ntohs(th->window);
895
896                 /* a) This is a SYN in ORIGINAL. The client and the server
897                  * may be in sync but we are not. In that case, we annotate
898                  * the TCP options and let the packet go through. If it is a
899                  * valid SYN packet, the server will reply with a SYN/ACK, and
900                  * then we'll get in sync. Otherwise, the server potentially
901                  * responds with a challenge ACK if implementing RFC5961.
902                  */
903                 if (index == TCP_SYN_SET && dir == IP_CT_DIR_ORIGINAL) {
904                         struct ip_ct_tcp_state seen = {};
905
906                         ct->proto.tcp.last_flags =
907                         ct->proto.tcp.last_wscale = 0;
908                         tcp_options(skb, dataoff, th, &seen);
909                         if (seen.flags & IP_CT_TCP_FLAG_WINDOW_SCALE) {
910                                 ct->proto.tcp.last_flags |=
911                                         IP_CT_TCP_FLAG_WINDOW_SCALE;
912                                 ct->proto.tcp.last_wscale = seen.td_scale;
913                         }
914                         if (seen.flags & IP_CT_TCP_FLAG_SACK_PERM) {
915                                 ct->proto.tcp.last_flags |=
916                                         IP_CT_TCP_FLAG_SACK_PERM;
917                         }
918                         /* Mark the potential for RFC5961 challenge ACK,
919                          * this pose a special problem for LAST_ACK state
920                          * as ACK is intrepretated as ACKing last FIN.
921                          */
922                         if (old_state == TCP_CONNTRACK_LAST_ACK)
923                                 ct->proto.tcp.last_flags |=
924                                         IP_CT_EXP_CHALLENGE_ACK;
925                 }
926                 spin_unlock_bh(&ct->lock);
927                 nf_ct_l4proto_log_invalid(skb, ct, "invalid packet ignored in "
928                                           "state %s ", tcp_conntrack_names[old_state]);
929                 return NF_ACCEPT;
930         case TCP_CONNTRACK_MAX:
931                 /* Special case for SYN proxy: when the SYN to the server or
932                  * the SYN/ACK from the server is lost, the client may transmit
933                  * a keep-alive packet while in SYN_SENT state. This needs to
934                  * be associated with the original conntrack entry in order to
935                  * generate a new SYN with the correct sequence number.
936                  */
937                 if (nfct_synproxy(ct) && old_state == TCP_CONNTRACK_SYN_SENT &&
938                     index == TCP_ACK_SET && dir == IP_CT_DIR_ORIGINAL &&
939                     ct->proto.tcp.last_dir == IP_CT_DIR_ORIGINAL &&
940                     ct->proto.tcp.seen[dir].td_end - 1 == ntohl(th->seq)) {
941                         pr_debug("nf_ct_tcp: SYN proxy client keep alive\n");
942                         spin_unlock_bh(&ct->lock);
943                         return NF_ACCEPT;
944                 }
945
946                 /* Invalid packet */
947                 pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
948                          dir, get_conntrack_index(th), old_state);
949                 spin_unlock_bh(&ct->lock);
950                 nf_ct_l4proto_log_invalid(skb, ct, "invalid state");
951                 return -NF_ACCEPT;
952         case TCP_CONNTRACK_TIME_WAIT:
953                 /* RFC5961 compliance cause stack to send "challenge-ACK"
954                  * e.g. in response to spurious SYNs.  Conntrack MUST
955                  * not believe this ACK is acking last FIN.
956                  */
957                 if (old_state == TCP_CONNTRACK_LAST_ACK &&
958                     index == TCP_ACK_SET &&
959                     ct->proto.tcp.last_dir != dir &&
960                     ct->proto.tcp.last_index == TCP_SYN_SET &&
961                     (ct->proto.tcp.last_flags & IP_CT_EXP_CHALLENGE_ACK)) {
962                         /* Detected RFC5961 challenge ACK */
963                         ct->proto.tcp.last_flags &= ~IP_CT_EXP_CHALLENGE_ACK;
964                         spin_unlock_bh(&ct->lock);
965                         nf_ct_l4proto_log_invalid(skb, ct, "challenge-ack ignored");
966                         return NF_ACCEPT; /* Don't change state */
967                 }
968                 break;
969         case TCP_CONNTRACK_SYN_SENT2:
970                 /* tcp_conntracks table is not smart enough to handle
971                  * simultaneous open.
972                  */
973                 ct->proto.tcp.last_flags |= IP_CT_TCP_SIMULTANEOUS_OPEN;
974                 break;
975         case TCP_CONNTRACK_SYN_RECV:
976                 if (dir == IP_CT_DIR_REPLY && index == TCP_ACK_SET &&
977                     ct->proto.tcp.last_flags & IP_CT_TCP_SIMULTANEOUS_OPEN)
978                         new_state = TCP_CONNTRACK_ESTABLISHED;
979                 break;
980         case TCP_CONNTRACK_CLOSE:
981                 if (index != TCP_RST_SET)
982                         break;
983
984                 if (ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET) {
985                         u32 seq = ntohl(th->seq);
986
987                         if (before(seq, ct->proto.tcp.seen[!dir].td_maxack)) {
988                                 /* Invalid RST  */
989                                 spin_unlock_bh(&ct->lock);
990                                 nf_ct_l4proto_log_invalid(skb, ct, "invalid rst");
991                                 return -NF_ACCEPT;
992                         }
993
994                         if (!nf_conntrack_tcp_established(ct) ||
995                             seq == ct->proto.tcp.seen[!dir].td_maxack)
996                                 break;
997
998                         /* Check if rst is part of train, such as
999                          *   foo:80 > bar:4379: P, 235946583:235946602(19) ack 42
1000                          *   foo:80 > bar:4379: R, 235946602:235946602(0)  ack 42
1001                          */
1002                         if (ct->proto.tcp.last_index == TCP_ACK_SET &&
1003                             ct->proto.tcp.last_dir == dir &&
1004                             seq == ct->proto.tcp.last_end)
1005                                 break;
1006
1007                         /* ... RST sequence number doesn't match exactly, keep
1008                          * established state to allow a possible challenge ACK.
1009                          */
1010                         new_state = old_state;
1011                 }
1012                 if (((test_bit(IPS_SEEN_REPLY_BIT, &ct->status)
1013                          && ct->proto.tcp.last_index == TCP_SYN_SET)
1014                         || (!test_bit(IPS_ASSURED_BIT, &ct->status)
1015                             && ct->proto.tcp.last_index == TCP_ACK_SET))
1016                     && ntohl(th->ack_seq) == ct->proto.tcp.last_end) {
1017                         /* RST sent to invalid SYN or ACK we had let through
1018                          * at a) and c) above:
1019                          *
1020                          * a) SYN was in window then
1021                          * c) we hold a half-open connection.
1022                          *
1023                          * Delete our connection entry.
1024                          * We skip window checking, because packet might ACK
1025                          * segments we ignored. */
1026                         goto in_window;
1027                 }
1028                 break;
1029         default:
1030                 /* Keep compilers happy. */
1031                 break;
1032         }
1033
1034         if (!tcp_in_window(ct, &ct->proto.tcp, dir, index,
1035                            skb, dataoff, th)) {
1036                 spin_unlock_bh(&ct->lock);
1037                 return -NF_ACCEPT;
1038         }
1039      in_window:
1040         /* From now on we have got in-window packets */
1041         ct->proto.tcp.last_index = index;
1042         ct->proto.tcp.last_dir = dir;
1043
1044         pr_debug("tcp_conntracks: ");
1045         nf_ct_dump_tuple(tuple);
1046         pr_debug("syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n",
1047                  (th->syn ? 1 : 0), (th->ack ? 1 : 0),
1048                  (th->fin ? 1 : 0), (th->rst ? 1 : 0),
1049                  old_state, new_state);
1050
1051         ct->proto.tcp.state = new_state;
1052         if (old_state != new_state
1053             && new_state == TCP_CONNTRACK_FIN_WAIT)
1054                 ct->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT;
1055
1056         timeouts = nf_ct_timeout_lookup(ct);
1057         if (!timeouts)
1058                 timeouts = tn->timeouts;
1059
1060         if (ct->proto.tcp.retrans >= tn->tcp_max_retrans &&
1061             timeouts[new_state] > timeouts[TCP_CONNTRACK_RETRANS])
1062                 timeout = timeouts[TCP_CONNTRACK_RETRANS];
1063         else if (unlikely(index == TCP_RST_SET))
1064                 timeout = timeouts[TCP_CONNTRACK_CLOSE];
1065         else if ((ct->proto.tcp.seen[0].flags | ct->proto.tcp.seen[1].flags) &
1066                  IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED &&
1067                  timeouts[new_state] > timeouts[TCP_CONNTRACK_UNACK])
1068                 timeout = timeouts[TCP_CONNTRACK_UNACK];
1069         else if (ct->proto.tcp.last_win == 0 &&
1070                  timeouts[new_state] > timeouts[TCP_CONNTRACK_RETRANS])
1071                 timeout = timeouts[TCP_CONNTRACK_RETRANS];
1072         else
1073                 timeout = timeouts[new_state];
1074         spin_unlock_bh(&ct->lock);
1075
1076         if (new_state != old_state)
1077                 nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
1078
1079         if (!test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
1080                 /* If only reply is a RST, we can consider ourselves not to
1081                    have an established connection: this is a fairly common
1082                    problem case, so we can delete the conntrack
1083                    immediately.  --RR */
1084                 if (th->rst) {
1085                         nf_ct_kill_acct(ct, ctinfo, skb);
1086                         return NF_ACCEPT;
1087                 }
1088                 /* ESTABLISHED without SEEN_REPLY, i.e. mid-connection
1089                  * pickup with loose=1. Avoid large ESTABLISHED timeout.
1090                  */
1091                 if (new_state == TCP_CONNTRACK_ESTABLISHED &&
1092                     timeout > timeouts[TCP_CONNTRACK_UNACK])
1093                         timeout = timeouts[TCP_CONNTRACK_UNACK];
1094         } else if (!test_bit(IPS_ASSURED_BIT, &ct->status)
1095                    && (old_state == TCP_CONNTRACK_SYN_RECV
1096                        || old_state == TCP_CONNTRACK_ESTABLISHED)
1097                    && new_state == TCP_CONNTRACK_ESTABLISHED) {
1098                 /* Set ASSURED if we see see valid ack in ESTABLISHED
1099                    after SYN_RECV or a valid answer for a picked up
1100                    connection. */
1101                 set_bit(IPS_ASSURED_BIT, &ct->status);
1102                 nf_conntrack_event_cache(IPCT_ASSURED, ct);
1103         }
1104         nf_ct_refresh_acct(ct, ctinfo, skb, timeout);
1105
1106         return NF_ACCEPT;
1107 }
1108
1109 /* Called when a new connection for this protocol found. */
1110 static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
1111                     unsigned int dataoff)
1112 {
1113         enum tcp_conntrack new_state;
1114         const struct tcphdr *th;
1115         struct tcphdr _tcph;
1116         struct net *net = nf_ct_net(ct);
1117         struct nf_tcp_net *tn = tcp_pernet(net);
1118         const struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[0];
1119         const struct ip_ct_tcp_state *receiver = &ct->proto.tcp.seen[1];
1120
1121         th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
1122         BUG_ON(th == NULL);
1123
1124         /* Don't need lock here: this conntrack not in circulation yet */
1125         new_state = tcp_conntracks[0][get_conntrack_index(th)][TCP_CONNTRACK_NONE];
1126
1127         /* Invalid: delete conntrack */
1128         if (new_state >= TCP_CONNTRACK_MAX) {
1129                 pr_debug("nf_ct_tcp: invalid new deleting.\n");
1130                 return false;
1131         }
1132
1133         if (new_state == TCP_CONNTRACK_SYN_SENT) {
1134                 memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
1135                 /* SYN packet */
1136                 ct->proto.tcp.seen[0].td_end =
1137                         segment_seq_plus_len(ntohl(th->seq), skb->len,
1138                                              dataoff, th);
1139                 ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
1140                 if (ct->proto.tcp.seen[0].td_maxwin == 0)
1141                         ct->proto.tcp.seen[0].td_maxwin = 1;
1142                 ct->proto.tcp.seen[0].td_maxend =
1143                         ct->proto.tcp.seen[0].td_end;
1144
1145                 tcp_options(skb, dataoff, th, &ct->proto.tcp.seen[0]);
1146         } else if (tn->tcp_loose == 0) {
1147                 /* Don't try to pick up connections. */
1148                 return false;
1149         } else {
1150                 memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
1151                 /*
1152                  * We are in the middle of a connection,
1153                  * its history is lost for us.
1154                  * Let's try to use the data from the packet.
1155                  */
1156                 ct->proto.tcp.seen[0].td_end =
1157                         segment_seq_plus_len(ntohl(th->seq), skb->len,
1158                                              dataoff, th);
1159                 ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
1160                 if (ct->proto.tcp.seen[0].td_maxwin == 0)
1161                         ct->proto.tcp.seen[0].td_maxwin = 1;
1162                 ct->proto.tcp.seen[0].td_maxend =
1163                         ct->proto.tcp.seen[0].td_end +
1164                         ct->proto.tcp.seen[0].td_maxwin;
1165
1166                 /* We assume SACK and liberal window checking to handle
1167                  * window scaling */
1168                 ct->proto.tcp.seen[0].flags =
1169                 ct->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM |
1170                                               IP_CT_TCP_FLAG_BE_LIBERAL;
1171         }
1172
1173         /* tcp_packet will set them */
1174         ct->proto.tcp.last_index = TCP_NONE_SET;
1175
1176         pr_debug("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i "
1177                  "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
1178                  sender->td_end, sender->td_maxend, sender->td_maxwin,
1179                  sender->td_scale,
1180                  receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
1181                  receiver->td_scale);
1182         return true;
1183 }
1184
1185 static bool tcp_can_early_drop(const struct nf_conn *ct)
1186 {
1187         switch (ct->proto.tcp.state) {
1188         case TCP_CONNTRACK_FIN_WAIT:
1189         case TCP_CONNTRACK_LAST_ACK:
1190         case TCP_CONNTRACK_TIME_WAIT:
1191         case TCP_CONNTRACK_CLOSE:
1192         case TCP_CONNTRACK_CLOSE_WAIT:
1193                 return true;
1194         default:
1195                 break;
1196         }
1197
1198         return false;
1199 }
1200
1201 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
1202
1203 #include <linux/netfilter/nfnetlink.h>
1204 #include <linux/netfilter/nfnetlink_conntrack.h>
1205
1206 static int tcp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
1207                          struct nf_conn *ct)
1208 {
1209         struct nlattr *nest_parms;
1210         struct nf_ct_tcp_flags tmp = {};
1211
1212         spin_lock_bh(&ct->lock);
1213         nest_parms = nla_nest_start(skb, CTA_PROTOINFO_TCP | NLA_F_NESTED);
1214         if (!nest_parms)
1215                 goto nla_put_failure;
1216
1217         if (nla_put_u8(skb, CTA_PROTOINFO_TCP_STATE, ct->proto.tcp.state) ||
1218             nla_put_u8(skb, CTA_PROTOINFO_TCP_WSCALE_ORIGINAL,
1219                        ct->proto.tcp.seen[0].td_scale) ||
1220             nla_put_u8(skb, CTA_PROTOINFO_TCP_WSCALE_REPLY,
1221                        ct->proto.tcp.seen[1].td_scale))
1222                 goto nla_put_failure;
1223
1224         tmp.flags = ct->proto.tcp.seen[0].flags;
1225         if (nla_put(skb, CTA_PROTOINFO_TCP_FLAGS_ORIGINAL,
1226                     sizeof(struct nf_ct_tcp_flags), &tmp))
1227                 goto nla_put_failure;
1228
1229         tmp.flags = ct->proto.tcp.seen[1].flags;
1230         if (nla_put(skb, CTA_PROTOINFO_TCP_FLAGS_REPLY,
1231                     sizeof(struct nf_ct_tcp_flags), &tmp))
1232                 goto nla_put_failure;
1233         spin_unlock_bh(&ct->lock);
1234
1235         nla_nest_end(skb, nest_parms);
1236
1237         return 0;
1238
1239 nla_put_failure:
1240         spin_unlock_bh(&ct->lock);
1241         return -1;
1242 }
1243
1244 static const struct nla_policy tcp_nla_policy[CTA_PROTOINFO_TCP_MAX+1] = {
1245         [CTA_PROTOINFO_TCP_STATE]           = { .type = NLA_U8 },
1246         [CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] = { .type = NLA_U8 },
1247         [CTA_PROTOINFO_TCP_WSCALE_REPLY]    = { .type = NLA_U8 },
1248         [CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]  = { .len = sizeof(struct nf_ct_tcp_flags) },
1249         [CTA_PROTOINFO_TCP_FLAGS_REPLY]     = { .len =  sizeof(struct nf_ct_tcp_flags) },
1250 };
1251
1252 #define TCP_NLATTR_SIZE ( \
1253         NLA_ALIGN(NLA_HDRLEN + 1) + \
1254         NLA_ALIGN(NLA_HDRLEN + 1) + \
1255         NLA_ALIGN(NLA_HDRLEN + sizeof(struct nf_ct_tcp_flags)) + \
1256         NLA_ALIGN(NLA_HDRLEN + sizeof(struct nf_ct_tcp_flags)))
1257
1258 static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct)
1259 {
1260         struct nlattr *pattr = cda[CTA_PROTOINFO_TCP];
1261         struct nlattr *tb[CTA_PROTOINFO_TCP_MAX+1];
1262         int err;
1263
1264         /* updates could not contain anything about the private
1265          * protocol info, in that case skip the parsing */
1266         if (!pattr)
1267                 return 0;
1268
1269         err = nla_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, pattr,
1270                                tcp_nla_policy, NULL);
1271         if (err < 0)
1272                 return err;
1273
1274         if (tb[CTA_PROTOINFO_TCP_STATE] &&
1275             nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]) >= TCP_CONNTRACK_MAX)
1276                 return -EINVAL;
1277
1278         spin_lock_bh(&ct->lock);
1279         if (tb[CTA_PROTOINFO_TCP_STATE])
1280                 ct->proto.tcp.state = nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]);
1281
1282         if (tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]) {
1283                 struct nf_ct_tcp_flags *attr =
1284                         nla_data(tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]);
1285                 ct->proto.tcp.seen[0].flags &= ~attr->mask;
1286                 ct->proto.tcp.seen[0].flags |= attr->flags & attr->mask;
1287         }
1288
1289         if (tb[CTA_PROTOINFO_TCP_FLAGS_REPLY]) {
1290                 struct nf_ct_tcp_flags *attr =
1291                         nla_data(tb[CTA_PROTOINFO_TCP_FLAGS_REPLY]);
1292                 ct->proto.tcp.seen[1].flags &= ~attr->mask;
1293                 ct->proto.tcp.seen[1].flags |= attr->flags & attr->mask;
1294         }
1295
1296         if (tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] &&
1297             tb[CTA_PROTOINFO_TCP_WSCALE_REPLY] &&
1298             ct->proto.tcp.seen[0].flags & IP_CT_TCP_FLAG_WINDOW_SCALE &&
1299             ct->proto.tcp.seen[1].flags & IP_CT_TCP_FLAG_WINDOW_SCALE) {
1300                 ct->proto.tcp.seen[0].td_scale =
1301                         nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL]);
1302                 ct->proto.tcp.seen[1].td_scale =
1303                         nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_REPLY]);
1304         }
1305         spin_unlock_bh(&ct->lock);
1306
1307         return 0;
1308 }
1309
1310 static unsigned int tcp_nlattr_tuple_size(void)
1311 {
1312         static unsigned int size __read_mostly;
1313
1314         if (!size)
1315                 size = nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1);
1316
1317         return size;
1318 }
1319 #endif
1320
1321 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
1322
1323 #include <linux/netfilter/nfnetlink.h>
1324 #include <linux/netfilter/nfnetlink_cttimeout.h>
1325
1326 static int tcp_timeout_nlattr_to_obj(struct nlattr *tb[],
1327                                      struct net *net, void *data)
1328 {
1329         struct nf_tcp_net *tn = tcp_pernet(net);
1330         unsigned int *timeouts = data;
1331         int i;
1332
1333         if (!timeouts)
1334                 timeouts = tn->timeouts;
1335         /* set default TCP timeouts. */
1336         for (i=0; i<TCP_CONNTRACK_TIMEOUT_MAX; i++)
1337                 timeouts[i] = tn->timeouts[i];
1338
1339         if (tb[CTA_TIMEOUT_TCP_SYN_SENT]) {
1340                 timeouts[TCP_CONNTRACK_SYN_SENT] =
1341                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_SENT]))*HZ;
1342         }
1343
1344         if (tb[CTA_TIMEOUT_TCP_SYN_RECV]) {
1345                 timeouts[TCP_CONNTRACK_SYN_RECV] =
1346                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_RECV]))*HZ;
1347         }
1348         if (tb[CTA_TIMEOUT_TCP_ESTABLISHED]) {
1349                 timeouts[TCP_CONNTRACK_ESTABLISHED] =
1350                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_ESTABLISHED]))*HZ;
1351         }
1352         if (tb[CTA_TIMEOUT_TCP_FIN_WAIT]) {
1353                 timeouts[TCP_CONNTRACK_FIN_WAIT] =
1354                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_FIN_WAIT]))*HZ;
1355         }
1356         if (tb[CTA_TIMEOUT_TCP_CLOSE_WAIT]) {
1357                 timeouts[TCP_CONNTRACK_CLOSE_WAIT] =
1358                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_CLOSE_WAIT]))*HZ;
1359         }
1360         if (tb[CTA_TIMEOUT_TCP_LAST_ACK]) {
1361                 timeouts[TCP_CONNTRACK_LAST_ACK] =
1362                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_LAST_ACK]))*HZ;
1363         }
1364         if (tb[CTA_TIMEOUT_TCP_TIME_WAIT]) {
1365                 timeouts[TCP_CONNTRACK_TIME_WAIT] =
1366                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_TIME_WAIT]))*HZ;
1367         }
1368         if (tb[CTA_TIMEOUT_TCP_CLOSE]) {
1369                 timeouts[TCP_CONNTRACK_CLOSE] =
1370                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_CLOSE]))*HZ;
1371         }
1372         if (tb[CTA_TIMEOUT_TCP_SYN_SENT2]) {
1373                 timeouts[TCP_CONNTRACK_SYN_SENT2] =
1374                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_SENT2]))*HZ;
1375         }
1376         if (tb[CTA_TIMEOUT_TCP_RETRANS]) {
1377                 timeouts[TCP_CONNTRACK_RETRANS] =
1378                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_RETRANS]))*HZ;
1379         }
1380         if (tb[CTA_TIMEOUT_TCP_UNACK]) {
1381                 timeouts[TCP_CONNTRACK_UNACK] =
1382                         ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_UNACK]))*HZ;
1383         }
1384
1385         timeouts[CTA_TIMEOUT_TCP_UNSPEC] = timeouts[CTA_TIMEOUT_TCP_SYN_SENT];
1386         return 0;
1387 }
1388
1389 static int
1390 tcp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data)
1391 {
1392         const unsigned int *timeouts = data;
1393
1394         if (nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_SENT,
1395                         htonl(timeouts[TCP_CONNTRACK_SYN_SENT] / HZ)) ||
1396             nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_RECV,
1397                          htonl(timeouts[TCP_CONNTRACK_SYN_RECV] / HZ)) ||
1398             nla_put_be32(skb, CTA_TIMEOUT_TCP_ESTABLISHED,
1399                          htonl(timeouts[TCP_CONNTRACK_ESTABLISHED] / HZ)) ||
1400             nla_put_be32(skb, CTA_TIMEOUT_TCP_FIN_WAIT,
1401                          htonl(timeouts[TCP_CONNTRACK_FIN_WAIT] / HZ)) ||
1402             nla_put_be32(skb, CTA_TIMEOUT_TCP_CLOSE_WAIT,
1403                          htonl(timeouts[TCP_CONNTRACK_CLOSE_WAIT] / HZ)) ||
1404             nla_put_be32(skb, CTA_TIMEOUT_TCP_LAST_ACK,
1405                          htonl(timeouts[TCP_CONNTRACK_LAST_ACK] / HZ)) ||
1406             nla_put_be32(skb, CTA_TIMEOUT_TCP_TIME_WAIT,
1407                          htonl(timeouts[TCP_CONNTRACK_TIME_WAIT] / HZ)) ||
1408             nla_put_be32(skb, CTA_TIMEOUT_TCP_CLOSE,
1409                          htonl(timeouts[TCP_CONNTRACK_CLOSE] / HZ)) ||
1410             nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_SENT2,
1411                          htonl(timeouts[TCP_CONNTRACK_SYN_SENT2] / HZ)) ||
1412             nla_put_be32(skb, CTA_TIMEOUT_TCP_RETRANS,
1413                          htonl(timeouts[TCP_CONNTRACK_RETRANS] / HZ)) ||
1414             nla_put_be32(skb, CTA_TIMEOUT_TCP_UNACK,
1415                          htonl(timeouts[TCP_CONNTRACK_UNACK] / HZ)))
1416                 goto nla_put_failure;
1417         return 0;
1418
1419 nla_put_failure:
1420         return -ENOSPC;
1421 }
1422
1423 static const struct nla_policy tcp_timeout_nla_policy[CTA_TIMEOUT_TCP_MAX+1] = {
1424         [CTA_TIMEOUT_TCP_SYN_SENT]      = { .type = NLA_U32 },
1425         [CTA_TIMEOUT_TCP_SYN_RECV]      = { .type = NLA_U32 },
1426         [CTA_TIMEOUT_TCP_ESTABLISHED]   = { .type = NLA_U32 },
1427         [CTA_TIMEOUT_TCP_FIN_WAIT]      = { .type = NLA_U32 },
1428         [CTA_TIMEOUT_TCP_CLOSE_WAIT]    = { .type = NLA_U32 },
1429         [CTA_TIMEOUT_TCP_LAST_ACK]      = { .type = NLA_U32 },
1430         [CTA_TIMEOUT_TCP_TIME_WAIT]     = { .type = NLA_U32 },
1431         [CTA_TIMEOUT_TCP_CLOSE]         = { .type = NLA_U32 },
1432         [CTA_TIMEOUT_TCP_SYN_SENT2]     = { .type = NLA_U32 },
1433         [CTA_TIMEOUT_TCP_RETRANS]       = { .type = NLA_U32 },
1434         [CTA_TIMEOUT_TCP_UNACK]         = { .type = NLA_U32 },
1435 };
1436 #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
1437
1438 #ifdef CONFIG_SYSCTL
1439 static struct ctl_table tcp_sysctl_table[] = {
1440         {
1441                 .procname       = "nf_conntrack_tcp_timeout_syn_sent",
1442                 .maxlen         = sizeof(unsigned int),
1443                 .mode           = 0644,
1444                 .proc_handler   = proc_dointvec_jiffies,
1445         },
1446         {
1447                 .procname       = "nf_conntrack_tcp_timeout_syn_recv",
1448                 .maxlen         = sizeof(unsigned int),
1449                 .mode           = 0644,
1450                 .proc_handler   = proc_dointvec_jiffies,
1451         },
1452         {
1453                 .procname       = "nf_conntrack_tcp_timeout_established",
1454                 .maxlen         = sizeof(unsigned int),
1455                 .mode           = 0644,
1456                 .proc_handler   = proc_dointvec_jiffies,
1457         },
1458         {
1459                 .procname       = "nf_conntrack_tcp_timeout_fin_wait",
1460                 .maxlen         = sizeof(unsigned int),
1461                 .mode           = 0644,
1462                 .proc_handler   = proc_dointvec_jiffies,
1463         },
1464         {
1465                 .procname       = "nf_conntrack_tcp_timeout_close_wait",
1466                 .maxlen         = sizeof(unsigned int),
1467                 .mode           = 0644,
1468                 .proc_handler   = proc_dointvec_jiffies,
1469         },
1470         {
1471                 .procname       = "nf_conntrack_tcp_timeout_last_ack",
1472                 .maxlen         = sizeof(unsigned int),
1473                 .mode           = 0644,
1474                 .proc_handler   = proc_dointvec_jiffies,
1475         },
1476         {
1477                 .procname       = "nf_conntrack_tcp_timeout_time_wait",
1478                 .maxlen         = sizeof(unsigned int),
1479                 .mode           = 0644,
1480                 .proc_handler   = proc_dointvec_jiffies,
1481         },
1482         {
1483                 .procname       = "nf_conntrack_tcp_timeout_close",
1484                 .maxlen         = sizeof(unsigned int),
1485                 .mode           = 0644,
1486                 .proc_handler   = proc_dointvec_jiffies,
1487         },
1488         {
1489                 .procname       = "nf_conntrack_tcp_timeout_max_retrans",
1490                 .maxlen         = sizeof(unsigned int),
1491                 .mode           = 0644,
1492                 .proc_handler   = proc_dointvec_jiffies,
1493         },
1494         {
1495                 .procname       = "nf_conntrack_tcp_timeout_unacknowledged",
1496                 .maxlen         = sizeof(unsigned int),
1497                 .mode           = 0644,
1498                 .proc_handler   = proc_dointvec_jiffies,
1499         },
1500         {
1501                 .procname       = "nf_conntrack_tcp_loose",
1502                 .maxlen         = sizeof(unsigned int),
1503                 .mode           = 0644,
1504                 .proc_handler   = proc_dointvec,
1505         },
1506         {
1507                 .procname       = "nf_conntrack_tcp_be_liberal",
1508                 .maxlen         = sizeof(unsigned int),
1509                 .mode           = 0644,
1510                 .proc_handler   = proc_dointvec,
1511         },
1512         {
1513                 .procname       = "nf_conntrack_tcp_max_retrans",
1514                 .maxlen         = sizeof(unsigned int),
1515                 .mode           = 0644,
1516                 .proc_handler   = proc_dointvec,
1517         },
1518         { }
1519 };
1520 #endif /* CONFIG_SYSCTL */
1521
1522 static int tcp_kmemdup_sysctl_table(struct nf_proto_net *pn,
1523                                     struct nf_tcp_net *tn)
1524 {
1525 #ifdef CONFIG_SYSCTL
1526         if (pn->ctl_table)
1527                 return 0;
1528
1529         pn->ctl_table = kmemdup(tcp_sysctl_table,
1530                                 sizeof(tcp_sysctl_table),
1531                                 GFP_KERNEL);
1532         if (!pn->ctl_table)
1533                 return -ENOMEM;
1534
1535         pn->ctl_table[0].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT];
1536         pn->ctl_table[1].data = &tn->timeouts[TCP_CONNTRACK_SYN_RECV];
1537         pn->ctl_table[2].data = &tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
1538         pn->ctl_table[3].data = &tn->timeouts[TCP_CONNTRACK_FIN_WAIT];
1539         pn->ctl_table[4].data = &tn->timeouts[TCP_CONNTRACK_CLOSE_WAIT];
1540         pn->ctl_table[5].data = &tn->timeouts[TCP_CONNTRACK_LAST_ACK];
1541         pn->ctl_table[6].data = &tn->timeouts[TCP_CONNTRACK_TIME_WAIT];
1542         pn->ctl_table[7].data = &tn->timeouts[TCP_CONNTRACK_CLOSE];
1543         pn->ctl_table[8].data = &tn->timeouts[TCP_CONNTRACK_RETRANS];
1544         pn->ctl_table[9].data = &tn->timeouts[TCP_CONNTRACK_UNACK];
1545         pn->ctl_table[10].data = &tn->tcp_loose;
1546         pn->ctl_table[11].data = &tn->tcp_be_liberal;
1547         pn->ctl_table[12].data = &tn->tcp_max_retrans;
1548 #endif
1549         return 0;
1550 }
1551
1552 static int tcp_init_net(struct net *net, u_int16_t proto)
1553 {
1554         struct nf_tcp_net *tn = tcp_pernet(net);
1555         struct nf_proto_net *pn = &tn->pn;
1556
1557         if (!pn->users) {
1558                 int i;
1559
1560                 for (i = 0; i < TCP_CONNTRACK_TIMEOUT_MAX; i++)
1561                         tn->timeouts[i] = tcp_timeouts[i];
1562
1563                 /* timeouts[0] is unused, make it same as SYN_SENT so
1564                  * ->timeouts[0] contains 'new' timeout, like udp or icmp.
1565                  */
1566                 tn->timeouts[0] = tcp_timeouts[TCP_CONNTRACK_SYN_SENT];
1567                 tn->tcp_loose = nf_ct_tcp_loose;
1568                 tn->tcp_be_liberal = nf_ct_tcp_be_liberal;
1569                 tn->tcp_max_retrans = nf_ct_tcp_max_retrans;
1570         }
1571
1572         return tcp_kmemdup_sysctl_table(pn, tn);
1573 }
1574
1575 static struct nf_proto_net *tcp_get_net_proto(struct net *net)
1576 {
1577         return &net->ct.nf_ct_proto.tcp.pn;
1578 }
1579
1580 const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 =
1581 {
1582         .l3proto                = PF_INET,
1583         .l4proto                = IPPROTO_TCP,
1584 #ifdef CONFIG_NF_CONNTRACK_PROCFS
1585         .print_conntrack        = tcp_print_conntrack,
1586 #endif
1587         .packet                 = tcp_packet,
1588         .new                    = tcp_new,
1589         .error                  = tcp_error,
1590         .can_early_drop         = tcp_can_early_drop,
1591 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
1592         .to_nlattr              = tcp_to_nlattr,
1593         .from_nlattr            = nlattr_to_tcp,
1594         .tuple_to_nlattr        = nf_ct_port_tuple_to_nlattr,
1595         .nlattr_to_tuple        = nf_ct_port_nlattr_to_tuple,
1596         .nlattr_tuple_size      = tcp_nlattr_tuple_size,
1597         .nlattr_size            = TCP_NLATTR_SIZE,
1598         .nla_policy             = nf_ct_port_nla_policy,
1599 #endif
1600 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
1601         .ctnl_timeout           = {
1602                 .nlattr_to_obj  = tcp_timeout_nlattr_to_obj,
1603                 .obj_to_nlattr  = tcp_timeout_obj_to_nlattr,
1604                 .nlattr_max     = CTA_TIMEOUT_TCP_MAX,
1605                 .obj_size       = sizeof(unsigned int) *
1606                                         TCP_CONNTRACK_TIMEOUT_MAX,
1607                 .nla_policy     = tcp_timeout_nla_policy,
1608         },
1609 #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
1610         .init_net               = tcp_init_net,
1611         .get_net_proto          = tcp_get_net_proto,
1612 };
1613 EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp4);
1614
1615 const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 =
1616 {
1617         .l3proto                = PF_INET6,
1618         .l4proto                = IPPROTO_TCP,
1619 #ifdef CONFIG_NF_CONNTRACK_PROCFS
1620         .print_conntrack        = tcp_print_conntrack,
1621 #endif
1622         .packet                 = tcp_packet,
1623         .new                    = tcp_new,
1624         .error                  = tcp_error,
1625         .can_early_drop         = tcp_can_early_drop,
1626 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
1627         .nlattr_size            = TCP_NLATTR_SIZE,
1628         .to_nlattr              = tcp_to_nlattr,
1629         .from_nlattr            = nlattr_to_tcp,
1630         .tuple_to_nlattr        = nf_ct_port_tuple_to_nlattr,
1631         .nlattr_to_tuple        = nf_ct_port_nlattr_to_tuple,
1632         .nlattr_tuple_size      = tcp_nlattr_tuple_size,
1633         .nla_policy             = nf_ct_port_nla_policy,
1634 #endif
1635 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
1636         .ctnl_timeout           = {
1637                 .nlattr_to_obj  = tcp_timeout_nlattr_to_obj,
1638                 .obj_to_nlattr  = tcp_timeout_obj_to_nlattr,
1639                 .nlattr_max     = CTA_TIMEOUT_TCP_MAX,
1640                 .obj_size       = sizeof(unsigned int) *
1641                                         TCP_CONNTRACK_TIMEOUT_MAX,
1642                 .nla_policy     = tcp_timeout_nla_policy,
1643         },
1644 #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
1645         .init_net               = tcp_init_net,
1646         .get_net_proto          = tcp_get_net_proto,
1647 };
1648 EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp6);