GNU Linux-libre 4.9.314-gnu1
[releases.git] / net / dccp / proto.c
1 /*
2  *  net/dccp/proto.c
3  *
4  *  An implementation of the DCCP protocol
5  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6  *
7  *      This program is free software; you can redistribute it and/or modify it
8  *      under the terms of the GNU General Public License version 2 as
9  *      published by the Free Software Foundation.
10  */
11
12 #include <linux/dccp.h>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/sched.h>
16 #include <linux/kernel.h>
17 #include <linux/skbuff.h>
18 #include <linux/netdevice.h>
19 #include <linux/in.h>
20 #include <linux/if_arp.h>
21 #include <linux/init.h>
22 #include <linux/random.h>
23 #include <linux/slab.h>
24 #include <net/checksum.h>
25
26 #include <net/inet_sock.h>
27 #include <net/inet_common.h>
28 #include <net/sock.h>
29 #include <net/xfrm.h>
30
31 #include <asm/ioctls.h>
32 #include <linux/spinlock.h>
33 #include <linux/timer.h>
34 #include <linux/delay.h>
35 #include <linux/poll.h>
36
37 #include "ccid.h"
38 #include "dccp.h"
39 #include "feat.h"
40
41 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
42
43 EXPORT_SYMBOL_GPL(dccp_statistics);
44
45 struct percpu_counter dccp_orphan_count;
46 EXPORT_SYMBOL_GPL(dccp_orphan_count);
47
48 struct inet_hashinfo dccp_hashinfo;
49 EXPORT_SYMBOL_GPL(dccp_hashinfo);
50
51 /* the maximum queue length for tx in packets. 0 is no limit */
52 int sysctl_dccp_tx_qlen __read_mostly = 5;
53
54 #ifdef CONFIG_IP_DCCP_DEBUG
55 static const char *dccp_state_name(const int state)
56 {
57         static const char *const dccp_state_names[] = {
58         [DCCP_OPEN]             = "OPEN",
59         [DCCP_REQUESTING]       = "REQUESTING",
60         [DCCP_PARTOPEN]         = "PARTOPEN",
61         [DCCP_LISTEN]           = "LISTEN",
62         [DCCP_RESPOND]          = "RESPOND",
63         [DCCP_CLOSING]          = "CLOSING",
64         [DCCP_ACTIVE_CLOSEREQ]  = "CLOSEREQ",
65         [DCCP_PASSIVE_CLOSE]    = "PASSIVE_CLOSE",
66         [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ",
67         [DCCP_TIME_WAIT]        = "TIME_WAIT",
68         [DCCP_CLOSED]           = "CLOSED",
69         };
70
71         if (state >= DCCP_MAX_STATES)
72                 return "INVALID STATE!";
73         else
74                 return dccp_state_names[state];
75 }
76 #endif
77
78 void dccp_set_state(struct sock *sk, const int state)
79 {
80         const int oldstate = sk->sk_state;
81
82         dccp_pr_debug("%s(%p)  %s  -->  %s\n", dccp_role(sk), sk,
83                       dccp_state_name(oldstate), dccp_state_name(state));
84         WARN_ON(state == oldstate);
85
86         switch (state) {
87         case DCCP_OPEN:
88                 if (oldstate != DCCP_OPEN)
89                         DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
90                 /* Client retransmits all Confirm options until entering OPEN */
91                 if (oldstate == DCCP_PARTOPEN)
92                         dccp_feat_list_purge(&dccp_sk(sk)->dccps_featneg);
93                 break;
94
95         case DCCP_CLOSED:
96                 if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ ||
97                     oldstate == DCCP_CLOSING)
98                         DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
99
100                 sk->sk_prot->unhash(sk);
101                 if (inet_csk(sk)->icsk_bind_hash != NULL &&
102                     !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
103                         inet_put_port(sk);
104                 /* fall through */
105         default:
106                 if (oldstate == DCCP_OPEN)
107                         DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
108         }
109
110         /* Change state AFTER socket is unhashed to avoid closed
111          * socket sitting in hash tables.
112          */
113         sk->sk_state = state;
114 }
115
116 EXPORT_SYMBOL_GPL(dccp_set_state);
117
118 static void dccp_finish_passive_close(struct sock *sk)
119 {
120         switch (sk->sk_state) {
121         case DCCP_PASSIVE_CLOSE:
122                 /* Node (client or server) has received Close packet. */
123                 dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
124                 dccp_set_state(sk, DCCP_CLOSED);
125                 break;
126         case DCCP_PASSIVE_CLOSEREQ:
127                 /*
128                  * Client received CloseReq. We set the `active' flag so that
129                  * dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
130                  */
131                 dccp_send_close(sk, 1);
132                 dccp_set_state(sk, DCCP_CLOSING);
133         }
134 }
135
136 void dccp_done(struct sock *sk)
137 {
138         dccp_set_state(sk, DCCP_CLOSED);
139         dccp_clear_xmit_timers(sk);
140
141         sk->sk_shutdown = SHUTDOWN_MASK;
142
143         if (!sock_flag(sk, SOCK_DEAD))
144                 sk->sk_state_change(sk);
145         else
146                 inet_csk_destroy_sock(sk);
147 }
148
149 EXPORT_SYMBOL_GPL(dccp_done);
150
151 const char *dccp_packet_name(const int type)
152 {
153         static const char *const dccp_packet_names[] = {
154                 [DCCP_PKT_REQUEST]  = "REQUEST",
155                 [DCCP_PKT_RESPONSE] = "RESPONSE",
156                 [DCCP_PKT_DATA]     = "DATA",
157                 [DCCP_PKT_ACK]      = "ACK",
158                 [DCCP_PKT_DATAACK]  = "DATAACK",
159                 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
160                 [DCCP_PKT_CLOSE]    = "CLOSE",
161                 [DCCP_PKT_RESET]    = "RESET",
162                 [DCCP_PKT_SYNC]     = "SYNC",
163                 [DCCP_PKT_SYNCACK]  = "SYNCACK",
164         };
165
166         if (type >= DCCP_NR_PKT_TYPES)
167                 return "INVALID";
168         else
169                 return dccp_packet_names[type];
170 }
171
172 EXPORT_SYMBOL_GPL(dccp_packet_name);
173
174 static void dccp_sk_destruct(struct sock *sk)
175 {
176         struct dccp_sock *dp = dccp_sk(sk);
177
178         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
179         dp->dccps_hc_tx_ccid = NULL;
180         inet_sock_destruct(sk);
181 }
182
183 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
184 {
185         struct dccp_sock *dp = dccp_sk(sk);
186         struct inet_connection_sock *icsk = inet_csk(sk);
187
188         icsk->icsk_rto          = DCCP_TIMEOUT_INIT;
189         icsk->icsk_syn_retries  = sysctl_dccp_request_retries;
190         sk->sk_state            = DCCP_CLOSED;
191         sk->sk_write_space      = dccp_write_space;
192         sk->sk_destruct         = dccp_sk_destruct;
193         icsk->icsk_sync_mss     = dccp_sync_mss;
194         dp->dccps_mss_cache     = 536;
195         dp->dccps_rate_last     = jiffies;
196         dp->dccps_role          = DCCP_ROLE_UNDEFINED;
197         dp->dccps_service       = DCCP_SERVICE_CODE_IS_ABSENT;
198         dp->dccps_tx_qlen       = sysctl_dccp_tx_qlen;
199
200         dccp_init_xmit_timers(sk);
201
202         INIT_LIST_HEAD(&dp->dccps_featneg);
203         /* control socket doesn't need feat nego */
204         if (likely(ctl_sock_initialized))
205                 return dccp_feat_init(sk);
206         return 0;
207 }
208
209 EXPORT_SYMBOL_GPL(dccp_init_sock);
210
211 void dccp_destroy_sock(struct sock *sk)
212 {
213         struct dccp_sock *dp = dccp_sk(sk);
214
215         __skb_queue_purge(&sk->sk_write_queue);
216         if (sk->sk_send_head != NULL) {
217                 kfree_skb(sk->sk_send_head);
218                 sk->sk_send_head = NULL;
219         }
220
221         /* Clean up a referenced DCCP bind bucket. */
222         if (inet_csk(sk)->icsk_bind_hash != NULL)
223                 inet_put_port(sk);
224
225         kfree(dp->dccps_service_list);
226         dp->dccps_service_list = NULL;
227
228         if (dp->dccps_hc_rx_ackvec != NULL) {
229                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
230                 dp->dccps_hc_rx_ackvec = NULL;
231         }
232         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
233         dp->dccps_hc_rx_ccid = NULL;
234
235         /* clean up feature negotiation state */
236         dccp_feat_list_purge(&dp->dccps_featneg);
237 }
238
239 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
240
241 static inline int dccp_listen_start(struct sock *sk, int backlog)
242 {
243         struct dccp_sock *dp = dccp_sk(sk);
244
245         dp->dccps_role = DCCP_ROLE_LISTEN;
246         /* do not start to listen if feature negotiation setup fails */
247         if (dccp_feat_finalise_settings(dp))
248                 return -EPROTO;
249         return inet_csk_listen_start(sk, backlog);
250 }
251
252 static inline int dccp_need_reset(int state)
253 {
254         return state != DCCP_CLOSED && state != DCCP_LISTEN &&
255                state != DCCP_REQUESTING;
256 }
257
258 int dccp_disconnect(struct sock *sk, int flags)
259 {
260         struct inet_connection_sock *icsk = inet_csk(sk);
261         struct inet_sock *inet = inet_sk(sk);
262         struct dccp_sock *dp = dccp_sk(sk);
263         int err = 0;
264         const int old_state = sk->sk_state;
265
266         if (old_state != DCCP_CLOSED)
267                 dccp_set_state(sk, DCCP_CLOSED);
268
269         /*
270          * This corresponds to the ABORT function of RFC793, sec. 3.8
271          * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
272          */
273         if (old_state == DCCP_LISTEN) {
274                 inet_csk_listen_stop(sk);
275         } else if (dccp_need_reset(old_state)) {
276                 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
277                 sk->sk_err = ECONNRESET;
278         } else if (old_state == DCCP_REQUESTING)
279                 sk->sk_err = ECONNRESET;
280
281         dccp_clear_xmit_timers(sk);
282         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
283         dp->dccps_hc_rx_ccid = NULL;
284
285         __skb_queue_purge(&sk->sk_receive_queue);
286         __skb_queue_purge(&sk->sk_write_queue);
287         if (sk->sk_send_head != NULL) {
288                 __kfree_skb(sk->sk_send_head);
289                 sk->sk_send_head = NULL;
290         }
291
292         inet->inet_dport = 0;
293
294         if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
295                 inet_reset_saddr(sk);
296
297         sk->sk_shutdown = 0;
298         sock_reset_flag(sk, SOCK_DONE);
299
300         icsk->icsk_backoff = 0;
301         inet_csk_delack_init(sk);
302         __sk_dst_reset(sk);
303
304         WARN_ON(inet->inet_num && !icsk->icsk_bind_hash);
305
306         sk->sk_error_report(sk);
307         return err;
308 }
309
310 EXPORT_SYMBOL_GPL(dccp_disconnect);
311
312 /*
313  *      Wait for a DCCP event.
314  *
315  *      Note that we don't need to lock the socket, as the upper poll layers
316  *      take care of normal races (between the test and the event) and we don't
317  *      go look at any of the socket buffers directly.
318  */
319 unsigned int dccp_poll(struct file *file, struct socket *sock,
320                        poll_table *wait)
321 {
322         unsigned int mask;
323         struct sock *sk = sock->sk;
324
325         sock_poll_wait(file, sk_sleep(sk), wait);
326         if (sk->sk_state == DCCP_LISTEN)
327                 return inet_csk_listen_poll(sk);
328
329         /* Socket is not locked. We are protected from async events
330            by poll logic and correct handling of state changes
331            made by another threads is impossible in any case.
332          */
333
334         mask = 0;
335         if (sk->sk_err)
336                 mask = POLLERR;
337
338         if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
339                 mask |= POLLHUP;
340         if (sk->sk_shutdown & RCV_SHUTDOWN)
341                 mask |= POLLIN | POLLRDNORM | POLLRDHUP;
342
343         /* Connected? */
344         if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
345                 if (atomic_read(&sk->sk_rmem_alloc) > 0)
346                         mask |= POLLIN | POLLRDNORM;
347
348                 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
349                         if (sk_stream_is_writeable(sk)) {
350                                 mask |= POLLOUT | POLLWRNORM;
351                         } else {  /* send SIGIO later */
352                                 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
353                                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
354
355                                 /* Race breaker. If space is freed after
356                                  * wspace test but before the flags are set,
357                                  * IO signal will be lost.
358                                  */
359                                 if (sk_stream_is_writeable(sk))
360                                         mask |= POLLOUT | POLLWRNORM;
361                         }
362                 }
363         }
364         return mask;
365 }
366
367 EXPORT_SYMBOL_GPL(dccp_poll);
368
369 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
370 {
371         int rc = -ENOTCONN;
372
373         lock_sock(sk);
374
375         if (sk->sk_state == DCCP_LISTEN)
376                 goto out;
377
378         switch (cmd) {
379         case SIOCINQ: {
380                 struct sk_buff *skb;
381                 unsigned long amount = 0;
382
383                 skb = skb_peek(&sk->sk_receive_queue);
384                 if (skb != NULL) {
385                         /*
386                          * We will only return the amount of this packet since
387                          * that is all that will be read.
388                          */
389                         amount = skb->len;
390                 }
391                 rc = put_user(amount, (int __user *)arg);
392         }
393                 break;
394         default:
395                 rc = -ENOIOCTLCMD;
396                 break;
397         }
398 out:
399         release_sock(sk);
400         return rc;
401 }
402
403 EXPORT_SYMBOL_GPL(dccp_ioctl);
404
405 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
406                                    char __user *optval, unsigned int optlen)
407 {
408         struct dccp_sock *dp = dccp_sk(sk);
409         struct dccp_service_list *sl = NULL;
410
411         if (service == DCCP_SERVICE_INVALID_VALUE ||
412             optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
413                 return -EINVAL;
414
415         if (optlen > sizeof(service)) {
416                 sl = kmalloc(optlen, GFP_KERNEL);
417                 if (sl == NULL)
418                         return -ENOMEM;
419
420                 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
421                 if (copy_from_user(sl->dccpsl_list,
422                                    optval + sizeof(service),
423                                    optlen - sizeof(service)) ||
424                     dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
425                         kfree(sl);
426                         return -EFAULT;
427                 }
428         }
429
430         lock_sock(sk);
431         dp->dccps_service = service;
432
433         kfree(dp->dccps_service_list);
434
435         dp->dccps_service_list = sl;
436         release_sock(sk);
437         return 0;
438 }
439
440 static int dccp_setsockopt_cscov(struct sock *sk, int cscov, bool rx)
441 {
442         u8 *list, len;
443         int i, rc;
444
445         if (cscov < 0 || cscov > 15)
446                 return -EINVAL;
447         /*
448          * Populate a list of permissible values, in the range cscov...15. This
449          * is necessary since feature negotiation of single values only works if
450          * both sides incidentally choose the same value. Since the list starts
451          * lowest-value first, negotiation will pick the smallest shared value.
452          */
453         if (cscov == 0)
454                 return 0;
455         len = 16 - cscov;
456
457         list = kmalloc(len, GFP_KERNEL);
458         if (list == NULL)
459                 return -ENOBUFS;
460
461         for (i = 0; i < len; i++)
462                 list[i] = cscov++;
463
464         rc = dccp_feat_register_sp(sk, DCCPF_MIN_CSUM_COVER, rx, list, len);
465
466         if (rc == 0) {
467                 if (rx)
468                         dccp_sk(sk)->dccps_pcrlen = cscov;
469                 else
470                         dccp_sk(sk)->dccps_pcslen = cscov;
471         }
472         kfree(list);
473         return rc;
474 }
475
476 static int dccp_setsockopt_ccid(struct sock *sk, int type,
477                                 char __user *optval, unsigned int optlen)
478 {
479         u8 *val;
480         int rc = 0;
481
482         if (optlen < 1 || optlen > DCCP_FEAT_MAX_SP_VALS)
483                 return -EINVAL;
484
485         val = memdup_user(optval, optlen);
486         if (IS_ERR(val))
487                 return PTR_ERR(val);
488
489         lock_sock(sk);
490         if (type == DCCP_SOCKOPT_TX_CCID || type == DCCP_SOCKOPT_CCID)
491                 rc = dccp_feat_register_sp(sk, DCCPF_CCID, 1, val, optlen);
492
493         if (!rc && (type == DCCP_SOCKOPT_RX_CCID || type == DCCP_SOCKOPT_CCID))
494                 rc = dccp_feat_register_sp(sk, DCCPF_CCID, 0, val, optlen);
495         release_sock(sk);
496
497         kfree(val);
498         return rc;
499 }
500
501 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
502                 char __user *optval, unsigned int optlen)
503 {
504         struct dccp_sock *dp = dccp_sk(sk);
505         int val, err = 0;
506
507         switch (optname) {
508         case DCCP_SOCKOPT_PACKET_SIZE:
509                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
510                 return 0;
511         case DCCP_SOCKOPT_CHANGE_L:
512         case DCCP_SOCKOPT_CHANGE_R:
513                 DCCP_WARN("sockopt(CHANGE_L/R) is deprecated: fix your app\n");
514                 return 0;
515         case DCCP_SOCKOPT_CCID:
516         case DCCP_SOCKOPT_RX_CCID:
517         case DCCP_SOCKOPT_TX_CCID:
518                 return dccp_setsockopt_ccid(sk, optname, optval, optlen);
519         }
520
521         if (optlen < (int)sizeof(int))
522                 return -EINVAL;
523
524         if (get_user(val, (int __user *)optval))
525                 return -EFAULT;
526
527         if (optname == DCCP_SOCKOPT_SERVICE)
528                 return dccp_setsockopt_service(sk, val, optval, optlen);
529
530         lock_sock(sk);
531         switch (optname) {
532         case DCCP_SOCKOPT_SERVER_TIMEWAIT:
533                 if (dp->dccps_role != DCCP_ROLE_SERVER)
534                         err = -EOPNOTSUPP;
535                 else
536                         dp->dccps_server_timewait = (val != 0);
537                 break;
538         case DCCP_SOCKOPT_SEND_CSCOV:
539                 err = dccp_setsockopt_cscov(sk, val, false);
540                 break;
541         case DCCP_SOCKOPT_RECV_CSCOV:
542                 err = dccp_setsockopt_cscov(sk, val, true);
543                 break;
544         case DCCP_SOCKOPT_QPOLICY_ID:
545                 if (sk->sk_state != DCCP_CLOSED)
546                         err = -EISCONN;
547                 else if (val < 0 || val >= DCCPQ_POLICY_MAX)
548                         err = -EINVAL;
549                 else
550                         dp->dccps_qpolicy = val;
551                 break;
552         case DCCP_SOCKOPT_QPOLICY_TXQLEN:
553                 if (val < 0)
554                         err = -EINVAL;
555                 else
556                         dp->dccps_tx_qlen = val;
557                 break;
558         default:
559                 err = -ENOPROTOOPT;
560                 break;
561         }
562         release_sock(sk);
563
564         return err;
565 }
566
567 int dccp_setsockopt(struct sock *sk, int level, int optname,
568                     char __user *optval, unsigned int optlen)
569 {
570         if (level != SOL_DCCP)
571                 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
572                                                              optname, optval,
573                                                              optlen);
574         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
575 }
576
577 EXPORT_SYMBOL_GPL(dccp_setsockopt);
578
579 #ifdef CONFIG_COMPAT
580 int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
581                            char __user *optval, unsigned int optlen)
582 {
583         if (level != SOL_DCCP)
584                 return inet_csk_compat_setsockopt(sk, level, optname,
585                                                   optval, optlen);
586         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
587 }
588
589 EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
590 #endif
591
592 static int dccp_getsockopt_service(struct sock *sk, int len,
593                                    __be32 __user *optval,
594                                    int __user *optlen)
595 {
596         const struct dccp_sock *dp = dccp_sk(sk);
597         const struct dccp_service_list *sl;
598         int err = -ENOENT, slen = 0, total_len = sizeof(u32);
599
600         lock_sock(sk);
601         if ((sl = dp->dccps_service_list) != NULL) {
602                 slen = sl->dccpsl_nr * sizeof(u32);
603                 total_len += slen;
604         }
605
606         err = -EINVAL;
607         if (total_len > len)
608                 goto out;
609
610         err = 0;
611         if (put_user(total_len, optlen) ||
612             put_user(dp->dccps_service, optval) ||
613             (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
614                 err = -EFAULT;
615 out:
616         release_sock(sk);
617         return err;
618 }
619
620 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
621                     char __user *optval, int __user *optlen)
622 {
623         struct dccp_sock *dp;
624         int val, len;
625
626         if (get_user(len, optlen))
627                 return -EFAULT;
628
629         if (len < (int)sizeof(int))
630                 return -EINVAL;
631
632         dp = dccp_sk(sk);
633
634         switch (optname) {
635         case DCCP_SOCKOPT_PACKET_SIZE:
636                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
637                 return 0;
638         case DCCP_SOCKOPT_SERVICE:
639                 return dccp_getsockopt_service(sk, len,
640                                                (__be32 __user *)optval, optlen);
641         case DCCP_SOCKOPT_GET_CUR_MPS:
642                 val = dp->dccps_mss_cache;
643                 break;
644         case DCCP_SOCKOPT_AVAILABLE_CCIDS:
645                 return ccid_getsockopt_builtin_ccids(sk, len, optval, optlen);
646         case DCCP_SOCKOPT_TX_CCID:
647                 val = ccid_get_current_tx_ccid(dp);
648                 if (val < 0)
649                         return -ENOPROTOOPT;
650                 break;
651         case DCCP_SOCKOPT_RX_CCID:
652                 val = ccid_get_current_rx_ccid(dp);
653                 if (val < 0)
654                         return -ENOPROTOOPT;
655                 break;
656         case DCCP_SOCKOPT_SERVER_TIMEWAIT:
657                 val = dp->dccps_server_timewait;
658                 break;
659         case DCCP_SOCKOPT_SEND_CSCOV:
660                 val = dp->dccps_pcslen;
661                 break;
662         case DCCP_SOCKOPT_RECV_CSCOV:
663                 val = dp->dccps_pcrlen;
664                 break;
665         case DCCP_SOCKOPT_QPOLICY_ID:
666                 val = dp->dccps_qpolicy;
667                 break;
668         case DCCP_SOCKOPT_QPOLICY_TXQLEN:
669                 val = dp->dccps_tx_qlen;
670                 break;
671         case 128 ... 191:
672                 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
673                                              len, (u32 __user *)optval, optlen);
674         case 192 ... 255:
675                 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
676                                              len, (u32 __user *)optval, optlen);
677         default:
678                 return -ENOPROTOOPT;
679         }
680
681         len = sizeof(val);
682         if (put_user(len, optlen) || copy_to_user(optval, &val, len))
683                 return -EFAULT;
684
685         return 0;
686 }
687
688 int dccp_getsockopt(struct sock *sk, int level, int optname,
689                     char __user *optval, int __user *optlen)
690 {
691         if (level != SOL_DCCP)
692                 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
693                                                              optname, optval,
694                                                              optlen);
695         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
696 }
697
698 EXPORT_SYMBOL_GPL(dccp_getsockopt);
699
700 #ifdef CONFIG_COMPAT
701 int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
702                            char __user *optval, int __user *optlen)
703 {
704         if (level != SOL_DCCP)
705                 return inet_csk_compat_getsockopt(sk, level, optname,
706                                                   optval, optlen);
707         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
708 }
709
710 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
711 #endif
712
713 static int dccp_msghdr_parse(struct msghdr *msg, struct sk_buff *skb)
714 {
715         struct cmsghdr *cmsg;
716
717         /*
718          * Assign an (opaque) qpolicy priority value to skb->priority.
719          *
720          * We are overloading this skb field for use with the qpolicy subystem.
721          * The skb->priority is normally used for the SO_PRIORITY option, which
722          * is initialised from sk_priority. Since the assignment of sk_priority
723          * to skb->priority happens later (on layer 3), we overload this field
724          * for use with queueing priorities as long as the skb is on layer 4.
725          * The default priority value (if nothing is set) is 0.
726          */
727         skb->priority = 0;
728
729         for_each_cmsghdr(cmsg, msg) {
730                 if (!CMSG_OK(msg, cmsg))
731                         return -EINVAL;
732
733                 if (cmsg->cmsg_level != SOL_DCCP)
734                         continue;
735
736                 if (cmsg->cmsg_type <= DCCP_SCM_QPOLICY_MAX &&
737                     !dccp_qpolicy_param_ok(skb->sk, cmsg->cmsg_type))
738                         return -EINVAL;
739
740                 switch (cmsg->cmsg_type) {
741                 case DCCP_SCM_PRIORITY:
742                         if (cmsg->cmsg_len != CMSG_LEN(sizeof(__u32)))
743                                 return -EINVAL;
744                         skb->priority = *(__u32 *)CMSG_DATA(cmsg);
745                         break;
746                 default:
747                         return -EINVAL;
748                 }
749         }
750         return 0;
751 }
752
753 int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
754 {
755         const struct dccp_sock *dp = dccp_sk(sk);
756         const int flags = msg->msg_flags;
757         const int noblock = flags & MSG_DONTWAIT;
758         struct sk_buff *skb;
759         int rc, size;
760         long timeo;
761
762         if (len > dp->dccps_mss_cache)
763                 return -EMSGSIZE;
764
765         lock_sock(sk);
766
767         if (dccp_qpolicy_full(sk)) {
768                 rc = -EAGAIN;
769                 goto out_release;
770         }
771
772         timeo = sock_sndtimeo(sk, noblock);
773
774         /*
775          * We have to use sk_stream_wait_connect here to set sk_write_pending,
776          * so that the trick in dccp_rcv_request_sent_state_process.
777          */
778         /* Wait for a connection to finish. */
779         if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
780                 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
781                         goto out_release;
782
783         size = sk->sk_prot->max_header + len;
784         release_sock(sk);
785         skb = sock_alloc_send_skb(sk, size, noblock, &rc);
786         lock_sock(sk);
787         if (skb == NULL)
788                 goto out_release;
789
790         if (sk->sk_state == DCCP_CLOSED) {
791                 rc = -ENOTCONN;
792                 goto out_discard;
793         }
794
795         skb_reserve(skb, sk->sk_prot->max_header);
796         rc = memcpy_from_msg(skb_put(skb, len), msg, len);
797         if (rc != 0)
798                 goto out_discard;
799
800         rc = dccp_msghdr_parse(msg, skb);
801         if (rc != 0)
802                 goto out_discard;
803
804         dccp_qpolicy_push(sk, skb);
805         /*
806          * The xmit_timer is set if the TX CCID is rate-based and will expire
807          * when congestion control permits to release further packets into the
808          * network. Window-based CCIDs do not use this timer.
809          */
810         if (!timer_pending(&dp->dccps_xmit_timer))
811                 dccp_write_xmit(sk);
812 out_release:
813         release_sock(sk);
814         return rc ? : len;
815 out_discard:
816         kfree_skb(skb);
817         goto out_release;
818 }
819
820 EXPORT_SYMBOL_GPL(dccp_sendmsg);
821
822 int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
823                  int flags, int *addr_len)
824 {
825         const struct dccp_hdr *dh;
826         long timeo;
827
828         lock_sock(sk);
829
830         if (sk->sk_state == DCCP_LISTEN) {
831                 len = -ENOTCONN;
832                 goto out;
833         }
834
835         timeo = sock_rcvtimeo(sk, nonblock);
836
837         do {
838                 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
839
840                 if (skb == NULL)
841                         goto verify_sock_status;
842
843                 dh = dccp_hdr(skb);
844
845                 switch (dh->dccph_type) {
846                 case DCCP_PKT_DATA:
847                 case DCCP_PKT_DATAACK:
848                         goto found_ok_skb;
849
850                 case DCCP_PKT_CLOSE:
851                 case DCCP_PKT_CLOSEREQ:
852                         if (!(flags & MSG_PEEK))
853                                 dccp_finish_passive_close(sk);
854                         /* fall through */
855                 case DCCP_PKT_RESET:
856                         dccp_pr_debug("found fin (%s) ok!\n",
857                                       dccp_packet_name(dh->dccph_type));
858                         len = 0;
859                         goto found_fin_ok;
860                 default:
861                         dccp_pr_debug("packet_type=%s\n",
862                                       dccp_packet_name(dh->dccph_type));
863                         sk_eat_skb(sk, skb);
864                 }
865 verify_sock_status:
866                 if (sock_flag(sk, SOCK_DONE)) {
867                         len = 0;
868                         break;
869                 }
870
871                 if (sk->sk_err) {
872                         len = sock_error(sk);
873                         break;
874                 }
875
876                 if (sk->sk_shutdown & RCV_SHUTDOWN) {
877                         len = 0;
878                         break;
879                 }
880
881                 if (sk->sk_state == DCCP_CLOSED) {
882                         if (!sock_flag(sk, SOCK_DONE)) {
883                                 /* This occurs when user tries to read
884                                  * from never connected socket.
885                                  */
886                                 len = -ENOTCONN;
887                                 break;
888                         }
889                         len = 0;
890                         break;
891                 }
892
893                 if (!timeo) {
894                         len = -EAGAIN;
895                         break;
896                 }
897
898                 if (signal_pending(current)) {
899                         len = sock_intr_errno(timeo);
900                         break;
901                 }
902
903                 sk_wait_data(sk, &timeo, NULL);
904                 continue;
905         found_ok_skb:
906                 if (len > skb->len)
907                         len = skb->len;
908                 else if (len < skb->len)
909                         msg->msg_flags |= MSG_TRUNC;
910
911                 if (skb_copy_datagram_msg(skb, 0, msg, len)) {
912                         /* Exception. Bailout! */
913                         len = -EFAULT;
914                         break;
915                 }
916                 if (flags & MSG_TRUNC)
917                         len = skb->len;
918         found_fin_ok:
919                 if (!(flags & MSG_PEEK))
920                         sk_eat_skb(sk, skb);
921                 break;
922         } while (1);
923 out:
924         release_sock(sk);
925         return len;
926 }
927
928 EXPORT_SYMBOL_GPL(dccp_recvmsg);
929
930 int inet_dccp_listen(struct socket *sock, int backlog)
931 {
932         struct sock *sk = sock->sk;
933         unsigned char old_state;
934         int err;
935
936         lock_sock(sk);
937
938         err = -EINVAL;
939         if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
940                 goto out;
941
942         old_state = sk->sk_state;
943         if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
944                 goto out;
945
946         /* Really, if the socket is already in listen state
947          * we can only allow the backlog to be adjusted.
948          */
949         if (old_state != DCCP_LISTEN) {
950                 /*
951                  * FIXME: here it probably should be sk->sk_prot->listen_start
952                  * see tcp_listen_start
953                  */
954                 err = dccp_listen_start(sk, backlog);
955                 if (err)
956                         goto out;
957         }
958         sk->sk_max_ack_backlog = backlog;
959         err = 0;
960
961 out:
962         release_sock(sk);
963         return err;
964 }
965
966 EXPORT_SYMBOL_GPL(inet_dccp_listen);
967
968 static void dccp_terminate_connection(struct sock *sk)
969 {
970         u8 next_state = DCCP_CLOSED;
971
972         switch (sk->sk_state) {
973         case DCCP_PASSIVE_CLOSE:
974         case DCCP_PASSIVE_CLOSEREQ:
975                 dccp_finish_passive_close(sk);
976                 break;
977         case DCCP_PARTOPEN:
978                 dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk);
979                 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
980                 /* fall through */
981         case DCCP_OPEN:
982                 dccp_send_close(sk, 1);
983
984                 if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER &&
985                     !dccp_sk(sk)->dccps_server_timewait)
986                         next_state = DCCP_ACTIVE_CLOSEREQ;
987                 else
988                         next_state = DCCP_CLOSING;
989                 /* fall through */
990         default:
991                 dccp_set_state(sk, next_state);
992         }
993 }
994
995 void dccp_close(struct sock *sk, long timeout)
996 {
997         struct dccp_sock *dp = dccp_sk(sk);
998         struct sk_buff *skb;
999         u32 data_was_unread = 0;
1000         int state;
1001
1002         lock_sock(sk);
1003
1004         sk->sk_shutdown = SHUTDOWN_MASK;
1005
1006         if (sk->sk_state == DCCP_LISTEN) {
1007                 dccp_set_state(sk, DCCP_CLOSED);
1008
1009                 /* Special case. */
1010                 inet_csk_listen_stop(sk);
1011
1012                 goto adjudge_to_death;
1013         }
1014
1015         sk_stop_timer(sk, &dp->dccps_xmit_timer);
1016
1017         /*
1018          * We need to flush the recv. buffs.  We do this only on the
1019          * descriptor close, not protocol-sourced closes, because the
1020           *reader process may not have drained the data yet!
1021          */
1022         while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
1023                 data_was_unread += skb->len;
1024                 __kfree_skb(skb);
1025         }
1026
1027         /* If socket has been already reset kill it. */
1028         if (sk->sk_state == DCCP_CLOSED)
1029                 goto adjudge_to_death;
1030
1031         if (data_was_unread) {
1032                 /* Unread data was tossed, send an appropriate Reset Code */
1033                 DCCP_WARN("ABORT with %u bytes unread\n", data_was_unread);
1034                 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
1035                 dccp_set_state(sk, DCCP_CLOSED);
1036         } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
1037                 /* Check zero linger _after_ checking for unread data. */
1038                 sk->sk_prot->disconnect(sk, 0);
1039         } else if (sk->sk_state != DCCP_CLOSED) {
1040                 /*
1041                  * Normal connection termination. May need to wait if there are
1042                  * still packets in the TX queue that are delayed by the CCID.
1043                  */
1044                 dccp_flush_write_queue(sk, &timeout);
1045                 dccp_terminate_connection(sk);
1046         }
1047
1048         /*
1049          * Flush write queue. This may be necessary in several cases:
1050          * - we have been closed by the peer but still have application data;
1051          * - abortive termination (unread data or zero linger time),
1052          * - normal termination but queue could not be flushed within time limit
1053          */
1054         __skb_queue_purge(&sk->sk_write_queue);
1055
1056         sk_stream_wait_close(sk, timeout);
1057
1058 adjudge_to_death:
1059         state = sk->sk_state;
1060         sock_hold(sk);
1061         sock_orphan(sk);
1062
1063         /*
1064          * It is the last release_sock in its life. It will remove backlog.
1065          */
1066         release_sock(sk);
1067         /*
1068          * Now socket is owned by kernel and we acquire BH lock
1069          * to finish close. No need to check for user refs.
1070          */
1071         local_bh_disable();
1072         bh_lock_sock(sk);
1073         WARN_ON(sock_owned_by_user(sk));
1074
1075         percpu_counter_inc(sk->sk_prot->orphan_count);
1076
1077         /* Have we already been destroyed by a softirq or backlog? */
1078         if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
1079                 goto out;
1080
1081         if (sk->sk_state == DCCP_CLOSED)
1082                 inet_csk_destroy_sock(sk);
1083
1084         /* Otherwise, socket is reprieved until protocol close. */
1085
1086 out:
1087         bh_unlock_sock(sk);
1088         local_bh_enable();
1089         sock_put(sk);
1090 }
1091
1092 EXPORT_SYMBOL_GPL(dccp_close);
1093
1094 void dccp_shutdown(struct sock *sk, int how)
1095 {
1096         dccp_pr_debug("called shutdown(%x)\n", how);
1097 }
1098
1099 EXPORT_SYMBOL_GPL(dccp_shutdown);
1100
1101 static inline int __init dccp_mib_init(void)
1102 {
1103         dccp_statistics = alloc_percpu(struct dccp_mib);
1104         if (!dccp_statistics)
1105                 return -ENOMEM;
1106         return 0;
1107 }
1108
1109 static inline void dccp_mib_exit(void)
1110 {
1111         free_percpu(dccp_statistics);
1112 }
1113
1114 static int thash_entries;
1115 module_param(thash_entries, int, 0444);
1116 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1117
1118 #ifdef CONFIG_IP_DCCP_DEBUG
1119 bool dccp_debug;
1120 module_param(dccp_debug, bool, 0644);
1121 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
1122
1123 EXPORT_SYMBOL_GPL(dccp_debug);
1124 #endif
1125
1126 static int __init dccp_init(void)
1127 {
1128         unsigned long goal;
1129         int ehash_order, bhash_order, i;
1130         int rc;
1131
1132         BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
1133                      FIELD_SIZEOF(struct sk_buff, cb));
1134         rc = percpu_counter_init(&dccp_orphan_count, 0, GFP_KERNEL);
1135         if (rc)
1136                 goto out_fail;
1137         rc = -ENOBUFS;
1138         inet_hashinfo_init(&dccp_hashinfo);
1139         dccp_hashinfo.bind_bucket_cachep =
1140                 kmem_cache_create("dccp_bind_bucket",
1141                                   sizeof(struct inet_bind_bucket), 0,
1142                                   SLAB_HWCACHE_ALIGN, NULL);
1143         if (!dccp_hashinfo.bind_bucket_cachep)
1144                 goto out_free_percpu;
1145
1146         /*
1147          * Size and allocate the main established and bind bucket
1148          * hash tables.
1149          *
1150          * The methodology is similar to that of the buffer cache.
1151          */
1152         if (totalram_pages >= (128 * 1024))
1153                 goal = totalram_pages >> (21 - PAGE_SHIFT);
1154         else
1155                 goal = totalram_pages >> (23 - PAGE_SHIFT);
1156
1157         if (thash_entries)
1158                 goal = (thash_entries *
1159                         sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1160         for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1161                 ;
1162         do {
1163                 unsigned long hash_size = (1UL << ehash_order) * PAGE_SIZE /
1164                                         sizeof(struct inet_ehash_bucket);
1165
1166                 while (hash_size & (hash_size - 1))
1167                         hash_size--;
1168                 dccp_hashinfo.ehash_mask = hash_size - 1;
1169                 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1170                         __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, ehash_order);
1171         } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1172
1173         if (!dccp_hashinfo.ehash) {
1174                 DCCP_CRIT("Failed to allocate DCCP established hash table");
1175                 goto out_free_bind_bucket_cachep;
1176         }
1177
1178         for (i = 0; i <= dccp_hashinfo.ehash_mask; i++)
1179                 INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].chain, i);
1180
1181         if (inet_ehash_locks_alloc(&dccp_hashinfo))
1182                         goto out_free_dccp_ehash;
1183
1184         bhash_order = ehash_order;
1185
1186         do {
1187                 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1188                                         sizeof(struct inet_bind_hashbucket);
1189                 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1190                     bhash_order > 0)
1191                         continue;
1192                 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1193                         __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, bhash_order);
1194         } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1195
1196         if (!dccp_hashinfo.bhash) {
1197                 DCCP_CRIT("Failed to allocate DCCP bind hash table");
1198                 goto out_free_dccp_locks;
1199         }
1200
1201         for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1202                 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1203                 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1204         }
1205
1206         rc = dccp_mib_init();
1207         if (rc)
1208                 goto out_free_dccp_bhash;
1209
1210         rc = dccp_ackvec_init();
1211         if (rc)
1212                 goto out_free_dccp_mib;
1213
1214         rc = dccp_sysctl_init();
1215         if (rc)
1216                 goto out_ackvec_exit;
1217
1218         rc = ccid_initialize_builtins();
1219         if (rc)
1220                 goto out_sysctl_exit;
1221
1222         dccp_timestamping_init();
1223
1224         return 0;
1225
1226 out_sysctl_exit:
1227         dccp_sysctl_exit();
1228 out_ackvec_exit:
1229         dccp_ackvec_exit();
1230 out_free_dccp_mib:
1231         dccp_mib_exit();
1232 out_free_dccp_bhash:
1233         free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1234 out_free_dccp_locks:
1235         inet_ehash_locks_free(&dccp_hashinfo);
1236 out_free_dccp_ehash:
1237         free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1238 out_free_bind_bucket_cachep:
1239         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1240 out_free_percpu:
1241         percpu_counter_destroy(&dccp_orphan_count);
1242 out_fail:
1243         dccp_hashinfo.bhash = NULL;
1244         dccp_hashinfo.ehash = NULL;
1245         dccp_hashinfo.bind_bucket_cachep = NULL;
1246         return rc;
1247 }
1248
1249 static void __exit dccp_fini(void)
1250 {
1251         ccid_cleanup_builtins();
1252         dccp_mib_exit();
1253         free_pages((unsigned long)dccp_hashinfo.bhash,
1254                    get_order(dccp_hashinfo.bhash_size *
1255                              sizeof(struct inet_bind_hashbucket)));
1256         free_pages((unsigned long)dccp_hashinfo.ehash,
1257                    get_order((dccp_hashinfo.ehash_mask + 1) *
1258                              sizeof(struct inet_ehash_bucket)));
1259         inet_ehash_locks_free(&dccp_hashinfo);
1260         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1261         dccp_ackvec_exit();
1262         dccp_sysctl_exit();
1263         percpu_counter_destroy(&dccp_orphan_count);
1264 }
1265
1266 module_init(dccp_init);
1267 module_exit(dccp_fini);
1268
1269 MODULE_LICENSE("GPL");
1270 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1271 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");