GNU Linux-libre 5.4.257-gnu1
[releases.git] / net / dccp / proto.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  *  net/dccp/proto.c
4  *
5  *  An implementation of the DCCP protocol
6  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
7  */
8
9 #include <linux/dccp.h>
10 #include <linux/module.h>
11 #include <linux/types.h>
12 #include <linux/sched.h>
13 #include <linux/kernel.h>
14 #include <linux/skbuff.h>
15 #include <linux/netdevice.h>
16 #include <linux/in.h>
17 #include <linux/if_arp.h>
18 #include <linux/init.h>
19 #include <linux/random.h>
20 #include <linux/slab.h>
21 #include <net/checksum.h>
22
23 #include <net/inet_sock.h>
24 #include <net/inet_common.h>
25 #include <net/sock.h>
26 #include <net/xfrm.h>
27
28 #include <asm/ioctls.h>
29 #include <linux/spinlock.h>
30 #include <linux/timer.h>
31 #include <linux/delay.h>
32 #include <linux/poll.h>
33
34 #include "ccid.h"
35 #include "dccp.h"
36 #include "feat.h"
37
38 #define CREATE_TRACE_POINTS
39 #include "trace.h"
40
41 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
42
43 EXPORT_SYMBOL_GPL(dccp_statistics);
44
45 struct percpu_counter dccp_orphan_count;
46 EXPORT_SYMBOL_GPL(dccp_orphan_count);
47
48 struct inet_hashinfo dccp_hashinfo;
49 EXPORT_SYMBOL_GPL(dccp_hashinfo);
50
51 /* the maximum queue length for tx in packets. 0 is no limit */
52 int sysctl_dccp_tx_qlen __read_mostly = 5;
53
54 #ifdef CONFIG_IP_DCCP_DEBUG
55 static const char *dccp_state_name(const int state)
56 {
57         static const char *const dccp_state_names[] = {
58         [DCCP_OPEN]             = "OPEN",
59         [DCCP_REQUESTING]       = "REQUESTING",
60         [DCCP_PARTOPEN]         = "PARTOPEN",
61         [DCCP_LISTEN]           = "LISTEN",
62         [DCCP_RESPOND]          = "RESPOND",
63         [DCCP_CLOSING]          = "CLOSING",
64         [DCCP_ACTIVE_CLOSEREQ]  = "CLOSEREQ",
65         [DCCP_PASSIVE_CLOSE]    = "PASSIVE_CLOSE",
66         [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ",
67         [DCCP_TIME_WAIT]        = "TIME_WAIT",
68         [DCCP_CLOSED]           = "CLOSED",
69         };
70
71         if (state >= DCCP_MAX_STATES)
72                 return "INVALID STATE!";
73         else
74                 return dccp_state_names[state];
75 }
76 #endif
77
78 void dccp_set_state(struct sock *sk, const int state)
79 {
80         const int oldstate = sk->sk_state;
81
82         dccp_pr_debug("%s(%p)  %s  -->  %s\n", dccp_role(sk), sk,
83                       dccp_state_name(oldstate), dccp_state_name(state));
84         WARN_ON(state == oldstate);
85
86         switch (state) {
87         case DCCP_OPEN:
88                 if (oldstate != DCCP_OPEN)
89                         DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
90                 /* Client retransmits all Confirm options until entering OPEN */
91                 if (oldstate == DCCP_PARTOPEN)
92                         dccp_feat_list_purge(&dccp_sk(sk)->dccps_featneg);
93                 break;
94
95         case DCCP_CLOSED:
96                 if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ ||
97                     oldstate == DCCP_CLOSING)
98                         DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
99
100                 sk->sk_prot->unhash(sk);
101                 if (inet_csk(sk)->icsk_bind_hash != NULL &&
102                     !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
103                         inet_put_port(sk);
104                 /* fall through */
105         default:
106                 if (oldstate == DCCP_OPEN)
107                         DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
108         }
109
110         /* Change state AFTER socket is unhashed to avoid closed
111          * socket sitting in hash tables.
112          */
113         inet_sk_set_state(sk, state);
114 }
115
116 EXPORT_SYMBOL_GPL(dccp_set_state);
117
118 static void dccp_finish_passive_close(struct sock *sk)
119 {
120         switch (sk->sk_state) {
121         case DCCP_PASSIVE_CLOSE:
122                 /* Node (client or server) has received Close packet. */
123                 dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
124                 dccp_set_state(sk, DCCP_CLOSED);
125                 break;
126         case DCCP_PASSIVE_CLOSEREQ:
127                 /*
128                  * Client received CloseReq. We set the `active' flag so that
129                  * dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
130                  */
131                 dccp_send_close(sk, 1);
132                 dccp_set_state(sk, DCCP_CLOSING);
133         }
134 }
135
136 void dccp_done(struct sock *sk)
137 {
138         dccp_set_state(sk, DCCP_CLOSED);
139         dccp_clear_xmit_timers(sk);
140
141         sk->sk_shutdown = SHUTDOWN_MASK;
142
143         if (!sock_flag(sk, SOCK_DEAD))
144                 sk->sk_state_change(sk);
145         else
146                 inet_csk_destroy_sock(sk);
147 }
148
149 EXPORT_SYMBOL_GPL(dccp_done);
150
151 const char *dccp_packet_name(const int type)
152 {
153         static const char *const dccp_packet_names[] = {
154                 [DCCP_PKT_REQUEST]  = "REQUEST",
155                 [DCCP_PKT_RESPONSE] = "RESPONSE",
156                 [DCCP_PKT_DATA]     = "DATA",
157                 [DCCP_PKT_ACK]      = "ACK",
158                 [DCCP_PKT_DATAACK]  = "DATAACK",
159                 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
160                 [DCCP_PKT_CLOSE]    = "CLOSE",
161                 [DCCP_PKT_RESET]    = "RESET",
162                 [DCCP_PKT_SYNC]     = "SYNC",
163                 [DCCP_PKT_SYNCACK]  = "SYNCACK",
164         };
165
166         if (type >= DCCP_NR_PKT_TYPES)
167                 return "INVALID";
168         else
169                 return dccp_packet_names[type];
170 }
171
172 EXPORT_SYMBOL_GPL(dccp_packet_name);
173
174 void dccp_destruct_common(struct sock *sk)
175 {
176         struct dccp_sock *dp = dccp_sk(sk);
177
178         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
179         dp->dccps_hc_tx_ccid = NULL;
180 }
181 EXPORT_SYMBOL_GPL(dccp_destruct_common);
182
183 static void dccp_sk_destruct(struct sock *sk)
184 {
185         dccp_destruct_common(sk);
186         inet_sock_destruct(sk);
187 }
188
189 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
190 {
191         struct dccp_sock *dp = dccp_sk(sk);
192         struct inet_connection_sock *icsk = inet_csk(sk);
193
194         icsk->icsk_rto          = DCCP_TIMEOUT_INIT;
195         icsk->icsk_syn_retries  = sysctl_dccp_request_retries;
196         sk->sk_state            = DCCP_CLOSED;
197         sk->sk_write_space      = dccp_write_space;
198         sk->sk_destruct         = dccp_sk_destruct;
199         icsk->icsk_sync_mss     = dccp_sync_mss;
200         dp->dccps_mss_cache     = 536;
201         dp->dccps_rate_last     = jiffies;
202         dp->dccps_role          = DCCP_ROLE_UNDEFINED;
203         dp->dccps_service       = DCCP_SERVICE_CODE_IS_ABSENT;
204         dp->dccps_tx_qlen       = sysctl_dccp_tx_qlen;
205
206         dccp_init_xmit_timers(sk);
207
208         INIT_LIST_HEAD(&dp->dccps_featneg);
209         /* control socket doesn't need feat nego */
210         if (likely(ctl_sock_initialized))
211                 return dccp_feat_init(sk);
212         return 0;
213 }
214
215 EXPORT_SYMBOL_GPL(dccp_init_sock);
216
217 void dccp_destroy_sock(struct sock *sk)
218 {
219         struct dccp_sock *dp = dccp_sk(sk);
220
221         __skb_queue_purge(&sk->sk_write_queue);
222         if (sk->sk_send_head != NULL) {
223                 kfree_skb(sk->sk_send_head);
224                 sk->sk_send_head = NULL;
225         }
226
227         /* Clean up a referenced DCCP bind bucket. */
228         if (inet_csk(sk)->icsk_bind_hash != NULL)
229                 inet_put_port(sk);
230
231         kfree(dp->dccps_service_list);
232         dp->dccps_service_list = NULL;
233
234         if (dp->dccps_hc_rx_ackvec != NULL) {
235                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
236                 dp->dccps_hc_rx_ackvec = NULL;
237         }
238         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
239         dp->dccps_hc_rx_ccid = NULL;
240
241         /* clean up feature negotiation state */
242         dccp_feat_list_purge(&dp->dccps_featneg);
243 }
244
245 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
246
247 static inline int dccp_listen_start(struct sock *sk, int backlog)
248 {
249         struct dccp_sock *dp = dccp_sk(sk);
250
251         dp->dccps_role = DCCP_ROLE_LISTEN;
252         /* do not start to listen if feature negotiation setup fails */
253         if (dccp_feat_finalise_settings(dp))
254                 return -EPROTO;
255         return inet_csk_listen_start(sk, backlog);
256 }
257
258 static inline int dccp_need_reset(int state)
259 {
260         return state != DCCP_CLOSED && state != DCCP_LISTEN &&
261                state != DCCP_REQUESTING;
262 }
263
264 int dccp_disconnect(struct sock *sk, int flags)
265 {
266         struct inet_connection_sock *icsk = inet_csk(sk);
267         struct inet_sock *inet = inet_sk(sk);
268         struct dccp_sock *dp = dccp_sk(sk);
269         const int old_state = sk->sk_state;
270
271         if (old_state != DCCP_CLOSED)
272                 dccp_set_state(sk, DCCP_CLOSED);
273
274         /*
275          * This corresponds to the ABORT function of RFC793, sec. 3.8
276          * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
277          */
278         if (old_state == DCCP_LISTEN) {
279                 inet_csk_listen_stop(sk);
280         } else if (dccp_need_reset(old_state)) {
281                 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
282                 sk->sk_err = ECONNRESET;
283         } else if (old_state == DCCP_REQUESTING)
284                 sk->sk_err = ECONNRESET;
285
286         dccp_clear_xmit_timers(sk);
287         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
288         dp->dccps_hc_rx_ccid = NULL;
289
290         __skb_queue_purge(&sk->sk_receive_queue);
291         __skb_queue_purge(&sk->sk_write_queue);
292         if (sk->sk_send_head != NULL) {
293                 __kfree_skb(sk->sk_send_head);
294                 sk->sk_send_head = NULL;
295         }
296
297         inet->inet_dport = 0;
298
299         if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
300                 inet_reset_saddr(sk);
301
302         sk->sk_shutdown = 0;
303         sock_reset_flag(sk, SOCK_DONE);
304
305         icsk->icsk_backoff = 0;
306         inet_csk_delack_init(sk);
307         __sk_dst_reset(sk);
308
309         WARN_ON(inet->inet_num && !icsk->icsk_bind_hash);
310
311         sk->sk_error_report(sk);
312         return 0;
313 }
314
315 EXPORT_SYMBOL_GPL(dccp_disconnect);
316
317 /*
318  *      Wait for a DCCP event.
319  *
320  *      Note that we don't need to lock the socket, as the upper poll layers
321  *      take care of normal races (between the test and the event) and we don't
322  *      go look at any of the socket buffers directly.
323  */
324 __poll_t dccp_poll(struct file *file, struct socket *sock,
325                        poll_table *wait)
326 {
327         struct sock *sk = sock->sk;
328         __poll_t mask;
329         u8 shutdown;
330         int state;
331
332         sock_poll_wait(file, sock, wait);
333
334         state = inet_sk_state_load(sk);
335         if (state == DCCP_LISTEN)
336                 return inet_csk_listen_poll(sk);
337
338         /* Socket is not locked. We are protected from async events
339            by poll logic and correct handling of state changes
340            made by another threads is impossible in any case.
341          */
342
343         mask = 0;
344         if (READ_ONCE(sk->sk_err))
345                 mask = EPOLLERR;
346         shutdown = READ_ONCE(sk->sk_shutdown);
347
348         if (shutdown == SHUTDOWN_MASK || state == DCCP_CLOSED)
349                 mask |= EPOLLHUP;
350         if (shutdown & RCV_SHUTDOWN)
351                 mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
352
353         /* Connected? */
354         if ((1 << state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
355                 if (atomic_read(&sk->sk_rmem_alloc) > 0)
356                         mask |= EPOLLIN | EPOLLRDNORM;
357
358                 if (!(shutdown & SEND_SHUTDOWN)) {
359                         if (sk_stream_is_writeable(sk)) {
360                                 mask |= EPOLLOUT | EPOLLWRNORM;
361                         } else {  /* send SIGIO later */
362                                 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
363                                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
364
365                                 /* Race breaker. If space is freed after
366                                  * wspace test but before the flags are set,
367                                  * IO signal will be lost.
368                                  */
369                                 if (sk_stream_is_writeable(sk))
370                                         mask |= EPOLLOUT | EPOLLWRNORM;
371                         }
372                 }
373         }
374         return mask;
375 }
376 EXPORT_SYMBOL_GPL(dccp_poll);
377
378 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
379 {
380         int rc = -ENOTCONN;
381
382         lock_sock(sk);
383
384         if (sk->sk_state == DCCP_LISTEN)
385                 goto out;
386
387         switch (cmd) {
388         case SIOCINQ: {
389                 struct sk_buff *skb;
390                 unsigned long amount = 0;
391
392                 skb = skb_peek(&sk->sk_receive_queue);
393                 if (skb != NULL) {
394                         /*
395                          * We will only return the amount of this packet since
396                          * that is all that will be read.
397                          */
398                         amount = skb->len;
399                 }
400                 rc = put_user(amount, (int __user *)arg);
401         }
402                 break;
403         default:
404                 rc = -ENOIOCTLCMD;
405                 break;
406         }
407 out:
408         release_sock(sk);
409         return rc;
410 }
411
412 EXPORT_SYMBOL_GPL(dccp_ioctl);
413
414 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
415                                    char __user *optval, unsigned int optlen)
416 {
417         struct dccp_sock *dp = dccp_sk(sk);
418         struct dccp_service_list *sl = NULL;
419
420         if (service == DCCP_SERVICE_INVALID_VALUE ||
421             optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
422                 return -EINVAL;
423
424         if (optlen > sizeof(service)) {
425                 sl = kmalloc(optlen, GFP_KERNEL);
426                 if (sl == NULL)
427                         return -ENOMEM;
428
429                 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
430                 if (copy_from_user(sl->dccpsl_list,
431                                    optval + sizeof(service),
432                                    optlen - sizeof(service)) ||
433                     dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
434                         kfree(sl);
435                         return -EFAULT;
436                 }
437         }
438
439         lock_sock(sk);
440         dp->dccps_service = service;
441
442         kfree(dp->dccps_service_list);
443
444         dp->dccps_service_list = sl;
445         release_sock(sk);
446         return 0;
447 }
448
449 static int dccp_setsockopt_cscov(struct sock *sk, int cscov, bool rx)
450 {
451         u8 *list, len;
452         int i, rc;
453
454         if (cscov < 0 || cscov > 15)
455                 return -EINVAL;
456         /*
457          * Populate a list of permissible values, in the range cscov...15. This
458          * is necessary since feature negotiation of single values only works if
459          * both sides incidentally choose the same value. Since the list starts
460          * lowest-value first, negotiation will pick the smallest shared value.
461          */
462         if (cscov == 0)
463                 return 0;
464         len = 16 - cscov;
465
466         list = kmalloc(len, GFP_KERNEL);
467         if (list == NULL)
468                 return -ENOBUFS;
469
470         for (i = 0; i < len; i++)
471                 list[i] = cscov++;
472
473         rc = dccp_feat_register_sp(sk, DCCPF_MIN_CSUM_COVER, rx, list, len);
474
475         if (rc == 0) {
476                 if (rx)
477                         dccp_sk(sk)->dccps_pcrlen = cscov;
478                 else
479                         dccp_sk(sk)->dccps_pcslen = cscov;
480         }
481         kfree(list);
482         return rc;
483 }
484
485 static int dccp_setsockopt_ccid(struct sock *sk, int type,
486                                 char __user *optval, unsigned int optlen)
487 {
488         u8 *val;
489         int rc = 0;
490
491         if (optlen < 1 || optlen > DCCP_FEAT_MAX_SP_VALS)
492                 return -EINVAL;
493
494         val = memdup_user(optval, optlen);
495         if (IS_ERR(val))
496                 return PTR_ERR(val);
497
498         lock_sock(sk);
499         if (type == DCCP_SOCKOPT_TX_CCID || type == DCCP_SOCKOPT_CCID)
500                 rc = dccp_feat_register_sp(sk, DCCPF_CCID, 1, val, optlen);
501
502         if (!rc && (type == DCCP_SOCKOPT_RX_CCID || type == DCCP_SOCKOPT_CCID))
503                 rc = dccp_feat_register_sp(sk, DCCPF_CCID, 0, val, optlen);
504         release_sock(sk);
505
506         kfree(val);
507         return rc;
508 }
509
510 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
511                 char __user *optval, unsigned int optlen)
512 {
513         struct dccp_sock *dp = dccp_sk(sk);
514         int val, err = 0;
515
516         switch (optname) {
517         case DCCP_SOCKOPT_PACKET_SIZE:
518                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
519                 return 0;
520         case DCCP_SOCKOPT_CHANGE_L:
521         case DCCP_SOCKOPT_CHANGE_R:
522                 DCCP_WARN("sockopt(CHANGE_L/R) is deprecated: fix your app\n");
523                 return 0;
524         case DCCP_SOCKOPT_CCID:
525         case DCCP_SOCKOPT_RX_CCID:
526         case DCCP_SOCKOPT_TX_CCID:
527                 return dccp_setsockopt_ccid(sk, optname, optval, optlen);
528         }
529
530         if (optlen < (int)sizeof(int))
531                 return -EINVAL;
532
533         if (get_user(val, (int __user *)optval))
534                 return -EFAULT;
535
536         if (optname == DCCP_SOCKOPT_SERVICE)
537                 return dccp_setsockopt_service(sk, val, optval, optlen);
538
539         lock_sock(sk);
540         switch (optname) {
541         case DCCP_SOCKOPT_SERVER_TIMEWAIT:
542                 if (dp->dccps_role != DCCP_ROLE_SERVER)
543                         err = -EOPNOTSUPP;
544                 else
545                         dp->dccps_server_timewait = (val != 0);
546                 break;
547         case DCCP_SOCKOPT_SEND_CSCOV:
548                 err = dccp_setsockopt_cscov(sk, val, false);
549                 break;
550         case DCCP_SOCKOPT_RECV_CSCOV:
551                 err = dccp_setsockopt_cscov(sk, val, true);
552                 break;
553         case DCCP_SOCKOPT_QPOLICY_ID:
554                 if (sk->sk_state != DCCP_CLOSED)
555                         err = -EISCONN;
556                 else if (val < 0 || val >= DCCPQ_POLICY_MAX)
557                         err = -EINVAL;
558                 else
559                         dp->dccps_qpolicy = val;
560                 break;
561         case DCCP_SOCKOPT_QPOLICY_TXQLEN:
562                 if (val < 0)
563                         err = -EINVAL;
564                 else
565                         dp->dccps_tx_qlen = val;
566                 break;
567         default:
568                 err = -ENOPROTOOPT;
569                 break;
570         }
571         release_sock(sk);
572
573         return err;
574 }
575
576 int dccp_setsockopt(struct sock *sk, int level, int optname,
577                     char __user *optval, unsigned int optlen)
578 {
579         if (level != SOL_DCCP)
580                 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
581                                                              optname, optval,
582                                                              optlen);
583         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
584 }
585
586 EXPORT_SYMBOL_GPL(dccp_setsockopt);
587
588 #ifdef CONFIG_COMPAT
589 int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
590                            char __user *optval, unsigned int optlen)
591 {
592         if (level != SOL_DCCP)
593                 return inet_csk_compat_setsockopt(sk, level, optname,
594                                                   optval, optlen);
595         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
596 }
597
598 EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
599 #endif
600
601 static int dccp_getsockopt_service(struct sock *sk, int len,
602                                    __be32 __user *optval,
603                                    int __user *optlen)
604 {
605         const struct dccp_sock *dp = dccp_sk(sk);
606         const struct dccp_service_list *sl;
607         int err = -ENOENT, slen = 0, total_len = sizeof(u32);
608
609         lock_sock(sk);
610         if ((sl = dp->dccps_service_list) != NULL) {
611                 slen = sl->dccpsl_nr * sizeof(u32);
612                 total_len += slen;
613         }
614
615         err = -EINVAL;
616         if (total_len > len)
617                 goto out;
618
619         err = 0;
620         if (put_user(total_len, optlen) ||
621             put_user(dp->dccps_service, optval) ||
622             (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
623                 err = -EFAULT;
624 out:
625         release_sock(sk);
626         return err;
627 }
628
629 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
630                     char __user *optval, int __user *optlen)
631 {
632         struct dccp_sock *dp;
633         int val, len;
634
635         if (get_user(len, optlen))
636                 return -EFAULT;
637
638         if (len < (int)sizeof(int))
639                 return -EINVAL;
640
641         dp = dccp_sk(sk);
642
643         switch (optname) {
644         case DCCP_SOCKOPT_PACKET_SIZE:
645                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
646                 return 0;
647         case DCCP_SOCKOPT_SERVICE:
648                 return dccp_getsockopt_service(sk, len,
649                                                (__be32 __user *)optval, optlen);
650         case DCCP_SOCKOPT_GET_CUR_MPS:
651                 val = READ_ONCE(dp->dccps_mss_cache);
652                 break;
653         case DCCP_SOCKOPT_AVAILABLE_CCIDS:
654                 return ccid_getsockopt_builtin_ccids(sk, len, optval, optlen);
655         case DCCP_SOCKOPT_TX_CCID:
656                 val = ccid_get_current_tx_ccid(dp);
657                 if (val < 0)
658                         return -ENOPROTOOPT;
659                 break;
660         case DCCP_SOCKOPT_RX_CCID:
661                 val = ccid_get_current_rx_ccid(dp);
662                 if (val < 0)
663                         return -ENOPROTOOPT;
664                 break;
665         case DCCP_SOCKOPT_SERVER_TIMEWAIT:
666                 val = dp->dccps_server_timewait;
667                 break;
668         case DCCP_SOCKOPT_SEND_CSCOV:
669                 val = dp->dccps_pcslen;
670                 break;
671         case DCCP_SOCKOPT_RECV_CSCOV:
672                 val = dp->dccps_pcrlen;
673                 break;
674         case DCCP_SOCKOPT_QPOLICY_ID:
675                 val = dp->dccps_qpolicy;
676                 break;
677         case DCCP_SOCKOPT_QPOLICY_TXQLEN:
678                 val = dp->dccps_tx_qlen;
679                 break;
680         case 128 ... 191:
681                 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
682                                              len, (u32 __user *)optval, optlen);
683         case 192 ... 255:
684                 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
685                                              len, (u32 __user *)optval, optlen);
686         default:
687                 return -ENOPROTOOPT;
688         }
689
690         len = sizeof(val);
691         if (put_user(len, optlen) || copy_to_user(optval, &val, len))
692                 return -EFAULT;
693
694         return 0;
695 }
696
697 int dccp_getsockopt(struct sock *sk, int level, int optname,
698                     char __user *optval, int __user *optlen)
699 {
700         if (level != SOL_DCCP)
701                 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
702                                                              optname, optval,
703                                                              optlen);
704         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
705 }
706
707 EXPORT_SYMBOL_GPL(dccp_getsockopt);
708
709 #ifdef CONFIG_COMPAT
710 int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
711                            char __user *optval, int __user *optlen)
712 {
713         if (level != SOL_DCCP)
714                 return inet_csk_compat_getsockopt(sk, level, optname,
715                                                   optval, optlen);
716         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
717 }
718
719 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
720 #endif
721
722 static int dccp_msghdr_parse(struct msghdr *msg, struct sk_buff *skb)
723 {
724         struct cmsghdr *cmsg;
725
726         /*
727          * Assign an (opaque) qpolicy priority value to skb->priority.
728          *
729          * We are overloading this skb field for use with the qpolicy subystem.
730          * The skb->priority is normally used for the SO_PRIORITY option, which
731          * is initialised from sk_priority. Since the assignment of sk_priority
732          * to skb->priority happens later (on layer 3), we overload this field
733          * for use with queueing priorities as long as the skb is on layer 4.
734          * The default priority value (if nothing is set) is 0.
735          */
736         skb->priority = 0;
737
738         for_each_cmsghdr(cmsg, msg) {
739                 if (!CMSG_OK(msg, cmsg))
740                         return -EINVAL;
741
742                 if (cmsg->cmsg_level != SOL_DCCP)
743                         continue;
744
745                 if (cmsg->cmsg_type <= DCCP_SCM_QPOLICY_MAX &&
746                     !dccp_qpolicy_param_ok(skb->sk, cmsg->cmsg_type))
747                         return -EINVAL;
748
749                 switch (cmsg->cmsg_type) {
750                 case DCCP_SCM_PRIORITY:
751                         if (cmsg->cmsg_len != CMSG_LEN(sizeof(__u32)))
752                                 return -EINVAL;
753                         skb->priority = *(__u32 *)CMSG_DATA(cmsg);
754                         break;
755                 default:
756                         return -EINVAL;
757                 }
758         }
759         return 0;
760 }
761
762 int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
763 {
764         const struct dccp_sock *dp = dccp_sk(sk);
765         const int flags = msg->msg_flags;
766         const int noblock = flags & MSG_DONTWAIT;
767         struct sk_buff *skb;
768         int rc, size;
769         long timeo;
770
771         trace_dccp_probe(sk, len);
772
773         if (len > READ_ONCE(dp->dccps_mss_cache))
774                 return -EMSGSIZE;
775
776         lock_sock(sk);
777
778         timeo = sock_sndtimeo(sk, noblock);
779
780         /*
781          * We have to use sk_stream_wait_connect here to set sk_write_pending,
782          * so that the trick in dccp_rcv_request_sent_state_process.
783          */
784         /* Wait for a connection to finish. */
785         if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
786                 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
787                         goto out_release;
788
789         size = sk->sk_prot->max_header + len;
790         release_sock(sk);
791         skb = sock_alloc_send_skb(sk, size, noblock, &rc);
792         lock_sock(sk);
793         if (skb == NULL)
794                 goto out_release;
795
796         if (dccp_qpolicy_full(sk)) {
797                 rc = -EAGAIN;
798                 goto out_discard;
799         }
800
801         if (sk->sk_state == DCCP_CLOSED) {
802                 rc = -ENOTCONN;
803                 goto out_discard;
804         }
805
806         /* We need to check dccps_mss_cache after socket is locked. */
807         if (len > dp->dccps_mss_cache) {
808                 rc = -EMSGSIZE;
809                 goto out_discard;
810         }
811
812         skb_reserve(skb, sk->sk_prot->max_header);
813         rc = memcpy_from_msg(skb_put(skb, len), msg, len);
814         if (rc != 0)
815                 goto out_discard;
816
817         rc = dccp_msghdr_parse(msg, skb);
818         if (rc != 0)
819                 goto out_discard;
820
821         dccp_qpolicy_push(sk, skb);
822         /*
823          * The xmit_timer is set if the TX CCID is rate-based and will expire
824          * when congestion control permits to release further packets into the
825          * network. Window-based CCIDs do not use this timer.
826          */
827         if (!timer_pending(&dp->dccps_xmit_timer))
828                 dccp_write_xmit(sk);
829 out_release:
830         release_sock(sk);
831         return rc ? : len;
832 out_discard:
833         kfree_skb(skb);
834         goto out_release;
835 }
836
837 EXPORT_SYMBOL_GPL(dccp_sendmsg);
838
839 int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
840                  int flags, int *addr_len)
841 {
842         const struct dccp_hdr *dh;
843         long timeo;
844
845         lock_sock(sk);
846
847         if (sk->sk_state == DCCP_LISTEN) {
848                 len = -ENOTCONN;
849                 goto out;
850         }
851
852         timeo = sock_rcvtimeo(sk, nonblock);
853
854         do {
855                 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
856
857                 if (skb == NULL)
858                         goto verify_sock_status;
859
860                 dh = dccp_hdr(skb);
861
862                 switch (dh->dccph_type) {
863                 case DCCP_PKT_DATA:
864                 case DCCP_PKT_DATAACK:
865                         goto found_ok_skb;
866
867                 case DCCP_PKT_CLOSE:
868                 case DCCP_PKT_CLOSEREQ:
869                         if (!(flags & MSG_PEEK))
870                                 dccp_finish_passive_close(sk);
871                         /* fall through */
872                 case DCCP_PKT_RESET:
873                         dccp_pr_debug("found fin (%s) ok!\n",
874                                       dccp_packet_name(dh->dccph_type));
875                         len = 0;
876                         goto found_fin_ok;
877                 default:
878                         dccp_pr_debug("packet_type=%s\n",
879                                       dccp_packet_name(dh->dccph_type));
880                         sk_eat_skb(sk, skb);
881                 }
882 verify_sock_status:
883                 if (sock_flag(sk, SOCK_DONE)) {
884                         len = 0;
885                         break;
886                 }
887
888                 if (sk->sk_err) {
889                         len = sock_error(sk);
890                         break;
891                 }
892
893                 if (sk->sk_shutdown & RCV_SHUTDOWN) {
894                         len = 0;
895                         break;
896                 }
897
898                 if (sk->sk_state == DCCP_CLOSED) {
899                         if (!sock_flag(sk, SOCK_DONE)) {
900                                 /* This occurs when user tries to read
901                                  * from never connected socket.
902                                  */
903                                 len = -ENOTCONN;
904                                 break;
905                         }
906                         len = 0;
907                         break;
908                 }
909
910                 if (!timeo) {
911                         len = -EAGAIN;
912                         break;
913                 }
914
915                 if (signal_pending(current)) {
916                         len = sock_intr_errno(timeo);
917                         break;
918                 }
919
920                 sk_wait_data(sk, &timeo, NULL);
921                 continue;
922         found_ok_skb:
923                 if (len > skb->len)
924                         len = skb->len;
925                 else if (len < skb->len)
926                         msg->msg_flags |= MSG_TRUNC;
927
928                 if (skb_copy_datagram_msg(skb, 0, msg, len)) {
929                         /* Exception. Bailout! */
930                         len = -EFAULT;
931                         break;
932                 }
933                 if (flags & MSG_TRUNC)
934                         len = skb->len;
935         found_fin_ok:
936                 if (!(flags & MSG_PEEK))
937                         sk_eat_skb(sk, skb);
938                 break;
939         } while (1);
940 out:
941         release_sock(sk);
942         return len;
943 }
944
945 EXPORT_SYMBOL_GPL(dccp_recvmsg);
946
947 int inet_dccp_listen(struct socket *sock, int backlog)
948 {
949         struct sock *sk = sock->sk;
950         unsigned char old_state;
951         int err;
952
953         lock_sock(sk);
954
955         err = -EINVAL;
956         if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
957                 goto out;
958
959         old_state = sk->sk_state;
960         if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
961                 goto out;
962
963         sk->sk_max_ack_backlog = backlog;
964         /* Really, if the socket is already in listen state
965          * we can only allow the backlog to be adjusted.
966          */
967         if (old_state != DCCP_LISTEN) {
968                 /*
969                  * FIXME: here it probably should be sk->sk_prot->listen_start
970                  * see tcp_listen_start
971                  */
972                 err = dccp_listen_start(sk, backlog);
973                 if (err)
974                         goto out;
975         }
976         err = 0;
977
978 out:
979         release_sock(sk);
980         return err;
981 }
982
983 EXPORT_SYMBOL_GPL(inet_dccp_listen);
984
985 static void dccp_terminate_connection(struct sock *sk)
986 {
987         u8 next_state = DCCP_CLOSED;
988
989         switch (sk->sk_state) {
990         case DCCP_PASSIVE_CLOSE:
991         case DCCP_PASSIVE_CLOSEREQ:
992                 dccp_finish_passive_close(sk);
993                 break;
994         case DCCP_PARTOPEN:
995                 dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk);
996                 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
997                 /* fall through */
998         case DCCP_OPEN:
999                 dccp_send_close(sk, 1);
1000
1001                 if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER &&
1002                     !dccp_sk(sk)->dccps_server_timewait)
1003                         next_state = DCCP_ACTIVE_CLOSEREQ;
1004                 else
1005                         next_state = DCCP_CLOSING;
1006                 /* fall through */
1007         default:
1008                 dccp_set_state(sk, next_state);
1009         }
1010 }
1011
1012 void dccp_close(struct sock *sk, long timeout)
1013 {
1014         struct dccp_sock *dp = dccp_sk(sk);
1015         struct sk_buff *skb;
1016         u32 data_was_unread = 0;
1017         int state;
1018
1019         lock_sock(sk);
1020
1021         sk->sk_shutdown = SHUTDOWN_MASK;
1022
1023         if (sk->sk_state == DCCP_LISTEN) {
1024                 dccp_set_state(sk, DCCP_CLOSED);
1025
1026                 /* Special case. */
1027                 inet_csk_listen_stop(sk);
1028
1029                 goto adjudge_to_death;
1030         }
1031
1032         sk_stop_timer(sk, &dp->dccps_xmit_timer);
1033
1034         /*
1035          * We need to flush the recv. buffs.  We do this only on the
1036          * descriptor close, not protocol-sourced closes, because the
1037           *reader process may not have drained the data yet!
1038          */
1039         while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
1040                 data_was_unread += skb->len;
1041                 __kfree_skb(skb);
1042         }
1043
1044         /* If socket has been already reset kill it. */
1045         if (sk->sk_state == DCCP_CLOSED)
1046                 goto adjudge_to_death;
1047
1048         if (data_was_unread) {
1049                 /* Unread data was tossed, send an appropriate Reset Code */
1050                 DCCP_WARN("ABORT with %u bytes unread\n", data_was_unread);
1051                 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
1052                 dccp_set_state(sk, DCCP_CLOSED);
1053         } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
1054                 /* Check zero linger _after_ checking for unread data. */
1055                 sk->sk_prot->disconnect(sk, 0);
1056         } else if (sk->sk_state != DCCP_CLOSED) {
1057                 /*
1058                  * Normal connection termination. May need to wait if there are
1059                  * still packets in the TX queue that are delayed by the CCID.
1060                  */
1061                 dccp_flush_write_queue(sk, &timeout);
1062                 dccp_terminate_connection(sk);
1063         }
1064
1065         /*
1066          * Flush write queue. This may be necessary in several cases:
1067          * - we have been closed by the peer but still have application data;
1068          * - abortive termination (unread data or zero linger time),
1069          * - normal termination but queue could not be flushed within time limit
1070          */
1071         __skb_queue_purge(&sk->sk_write_queue);
1072
1073         sk_stream_wait_close(sk, timeout);
1074
1075 adjudge_to_death:
1076         state = sk->sk_state;
1077         sock_hold(sk);
1078         sock_orphan(sk);
1079
1080         /*
1081          * It is the last release_sock in its life. It will remove backlog.
1082          */
1083         release_sock(sk);
1084         /*
1085          * Now socket is owned by kernel and we acquire BH lock
1086          * to finish close. No need to check for user refs.
1087          */
1088         local_bh_disable();
1089         bh_lock_sock(sk);
1090         WARN_ON(sock_owned_by_user(sk));
1091
1092         percpu_counter_inc(sk->sk_prot->orphan_count);
1093
1094         /* Have we already been destroyed by a softirq or backlog? */
1095         if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
1096                 goto out;
1097
1098         if (sk->sk_state == DCCP_CLOSED)
1099                 inet_csk_destroy_sock(sk);
1100
1101         /* Otherwise, socket is reprieved until protocol close. */
1102
1103 out:
1104         bh_unlock_sock(sk);
1105         local_bh_enable();
1106         sock_put(sk);
1107 }
1108
1109 EXPORT_SYMBOL_GPL(dccp_close);
1110
1111 void dccp_shutdown(struct sock *sk, int how)
1112 {
1113         dccp_pr_debug("called shutdown(%x)\n", how);
1114 }
1115
1116 EXPORT_SYMBOL_GPL(dccp_shutdown);
1117
1118 static inline int __init dccp_mib_init(void)
1119 {
1120         dccp_statistics = alloc_percpu(struct dccp_mib);
1121         if (!dccp_statistics)
1122                 return -ENOMEM;
1123         return 0;
1124 }
1125
1126 static inline void dccp_mib_exit(void)
1127 {
1128         free_percpu(dccp_statistics);
1129 }
1130
1131 static int thash_entries;
1132 module_param(thash_entries, int, 0444);
1133 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1134
1135 #ifdef CONFIG_IP_DCCP_DEBUG
1136 bool dccp_debug;
1137 module_param(dccp_debug, bool, 0644);
1138 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
1139
1140 EXPORT_SYMBOL_GPL(dccp_debug);
1141 #endif
1142
1143 static int __init dccp_init(void)
1144 {
1145         unsigned long goal;
1146         unsigned long nr_pages = totalram_pages();
1147         int ehash_order, bhash_order, i;
1148         int rc;
1149
1150         BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
1151                      FIELD_SIZEOF(struct sk_buff, cb));
1152         rc = percpu_counter_init(&dccp_orphan_count, 0, GFP_KERNEL);
1153         if (rc)
1154                 goto out_fail;
1155         inet_hashinfo_init(&dccp_hashinfo);
1156         rc = inet_hashinfo2_init_mod(&dccp_hashinfo);
1157         if (rc)
1158                 goto out_free_percpu;
1159         rc = -ENOBUFS;
1160         dccp_hashinfo.bind_bucket_cachep =
1161                 kmem_cache_create("dccp_bind_bucket",
1162                                   sizeof(struct inet_bind_bucket), 0,
1163                                   SLAB_HWCACHE_ALIGN, NULL);
1164         if (!dccp_hashinfo.bind_bucket_cachep)
1165                 goto out_free_hashinfo2;
1166
1167         /*
1168          * Size and allocate the main established and bind bucket
1169          * hash tables.
1170          *
1171          * The methodology is similar to that of the buffer cache.
1172          */
1173         if (nr_pages >= (128 * 1024))
1174                 goal = nr_pages >> (21 - PAGE_SHIFT);
1175         else
1176                 goal = nr_pages >> (23 - PAGE_SHIFT);
1177
1178         if (thash_entries)
1179                 goal = (thash_entries *
1180                         sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1181         for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1182                 ;
1183         do {
1184                 unsigned long hash_size = (1UL << ehash_order) * PAGE_SIZE /
1185                                         sizeof(struct inet_ehash_bucket);
1186
1187                 while (hash_size & (hash_size - 1))
1188                         hash_size--;
1189                 dccp_hashinfo.ehash_mask = hash_size - 1;
1190                 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1191                         __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, ehash_order);
1192         } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1193
1194         if (!dccp_hashinfo.ehash) {
1195                 DCCP_CRIT("Failed to allocate DCCP established hash table");
1196                 goto out_free_bind_bucket_cachep;
1197         }
1198
1199         for (i = 0; i <= dccp_hashinfo.ehash_mask; i++)
1200                 INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].chain, i);
1201
1202         if (inet_ehash_locks_alloc(&dccp_hashinfo))
1203                         goto out_free_dccp_ehash;
1204
1205         bhash_order = ehash_order;
1206
1207         do {
1208                 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1209                                         sizeof(struct inet_bind_hashbucket);
1210                 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1211                     bhash_order > 0)
1212                         continue;
1213                 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1214                         __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, bhash_order);
1215         } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1216
1217         if (!dccp_hashinfo.bhash) {
1218                 DCCP_CRIT("Failed to allocate DCCP bind hash table");
1219                 goto out_free_dccp_locks;
1220         }
1221
1222         for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1223                 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1224                 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1225         }
1226
1227         rc = dccp_mib_init();
1228         if (rc)
1229                 goto out_free_dccp_bhash;
1230
1231         rc = dccp_ackvec_init();
1232         if (rc)
1233                 goto out_free_dccp_mib;
1234
1235         rc = dccp_sysctl_init();
1236         if (rc)
1237                 goto out_ackvec_exit;
1238
1239         rc = ccid_initialize_builtins();
1240         if (rc)
1241                 goto out_sysctl_exit;
1242
1243         dccp_timestamping_init();
1244
1245         return 0;
1246
1247 out_sysctl_exit:
1248         dccp_sysctl_exit();
1249 out_ackvec_exit:
1250         dccp_ackvec_exit();
1251 out_free_dccp_mib:
1252         dccp_mib_exit();
1253 out_free_dccp_bhash:
1254         free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1255 out_free_dccp_locks:
1256         inet_ehash_locks_free(&dccp_hashinfo);
1257 out_free_dccp_ehash:
1258         free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1259 out_free_bind_bucket_cachep:
1260         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1261 out_free_hashinfo2:
1262         inet_hashinfo2_free_mod(&dccp_hashinfo);
1263 out_free_percpu:
1264         percpu_counter_destroy(&dccp_orphan_count);
1265 out_fail:
1266         dccp_hashinfo.bhash = NULL;
1267         dccp_hashinfo.ehash = NULL;
1268         dccp_hashinfo.bind_bucket_cachep = NULL;
1269         return rc;
1270 }
1271
1272 static void __exit dccp_fini(void)
1273 {
1274         ccid_cleanup_builtins();
1275         dccp_mib_exit();
1276         free_pages((unsigned long)dccp_hashinfo.bhash,
1277                    get_order(dccp_hashinfo.bhash_size *
1278                              sizeof(struct inet_bind_hashbucket)));
1279         free_pages((unsigned long)dccp_hashinfo.ehash,
1280                    get_order((dccp_hashinfo.ehash_mask + 1) *
1281                              sizeof(struct inet_ehash_bucket)));
1282         inet_ehash_locks_free(&dccp_hashinfo);
1283         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1284         dccp_ackvec_exit();
1285         dccp_sysctl_exit();
1286         inet_hashinfo2_free_mod(&dccp_hashinfo);
1287         percpu_counter_destroy(&dccp_orphan_count);
1288 }
1289
1290 module_init(dccp_init);
1291 module_exit(dccp_fini);
1292
1293 MODULE_LICENSE("GPL");
1294 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1295 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");