GNU Linux-libre 4.9.318-gnu1
[releases.git] / net / rxrpc / input.c
1 /* RxRPC packet reception
2  *
3  * Copyright (C) 2007, 2016 Red Hat, Inc. All Rights Reserved.
4  * Written by David Howells (dhowells@redhat.com)
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version
9  * 2 of the License, or (at your option) any later version.
10  */
11
12 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13
14 #include <linux/module.h>
15 #include <linux/net.h>
16 #include <linux/skbuff.h>
17 #include <linux/errqueue.h>
18 #include <linux/udp.h>
19 #include <linux/in.h>
20 #include <linux/in6.h>
21 #include <linux/icmp.h>
22 #include <linux/gfp.h>
23 #include <net/sock.h>
24 #include <net/af_rxrpc.h>
25 #include <net/ip.h>
26 #include <net/udp.h>
27 #include <net/net_namespace.h>
28 #include "ar-internal.h"
29
30 static void rxrpc_proto_abort(const char *why,
31                               struct rxrpc_call *call, rxrpc_seq_t seq)
32 {
33         if (rxrpc_abort_call(why, call, seq, RX_PROTOCOL_ERROR, EBADMSG)) {
34                 set_bit(RXRPC_CALL_EV_ABORT, &call->events);
35                 rxrpc_queue_call(call);
36         }
37 }
38
39 /*
40  * Do TCP-style congestion management [RFC 5681].
41  */
42 static void rxrpc_congestion_management(struct rxrpc_call *call,
43                                         struct sk_buff *skb,
44                                         struct rxrpc_ack_summary *summary,
45                                         rxrpc_serial_t acked_serial)
46 {
47         enum rxrpc_congest_change change = rxrpc_cong_no_change;
48         unsigned int cumulative_acks = call->cong_cumul_acks;
49         unsigned int cwnd = call->cong_cwnd;
50         bool resend = false;
51
52         summary->flight_size =
53                 (call->tx_top - call->tx_hard_ack) - summary->nr_acks;
54
55         if (test_and_clear_bit(RXRPC_CALL_RETRANS_TIMEOUT, &call->flags)) {
56                 summary->retrans_timeo = true;
57                 call->cong_ssthresh = max_t(unsigned int,
58                                             summary->flight_size / 2, 2);
59                 cwnd = 1;
60                 if (cwnd >= call->cong_ssthresh &&
61                     call->cong_mode == RXRPC_CALL_SLOW_START) {
62                         call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE;
63                         call->cong_tstamp = skb->tstamp;
64                         cumulative_acks = 0;
65                 }
66         }
67
68         cumulative_acks += summary->nr_new_acks;
69         cumulative_acks += summary->nr_rot_new_acks;
70         if (cumulative_acks > 255)
71                 cumulative_acks = 255;
72
73         summary->mode = call->cong_mode;
74         summary->cwnd = call->cong_cwnd;
75         summary->ssthresh = call->cong_ssthresh;
76         summary->cumulative_acks = cumulative_acks;
77         summary->dup_acks = call->cong_dup_acks;
78
79         switch (call->cong_mode) {
80         case RXRPC_CALL_SLOW_START:
81                 if (summary->nr_nacks > 0)
82                         goto packet_loss_detected;
83                 if (summary->cumulative_acks > 0)
84                         cwnd += 1;
85                 if (cwnd >= call->cong_ssthresh) {
86                         call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE;
87                         call->cong_tstamp = skb->tstamp;
88                 }
89                 goto out;
90
91         case RXRPC_CALL_CONGEST_AVOIDANCE:
92                 if (summary->nr_nacks > 0)
93                         goto packet_loss_detected;
94
95                 /* We analyse the number of packets that get ACK'd per RTT
96                  * period and increase the window if we managed to fill it.
97                  */
98                 if (call->peer->rtt_usage == 0)
99                         goto out;
100                 if (ktime_before(skb->tstamp,
101                                  ktime_add_ns(call->cong_tstamp,
102                                               call->peer->rtt)))
103                         goto out_no_clear_ca;
104                 change = rxrpc_cong_rtt_window_end;
105                 call->cong_tstamp = skb->tstamp;
106                 if (cumulative_acks >= cwnd)
107                         cwnd++;
108                 goto out;
109
110         case RXRPC_CALL_PACKET_LOSS:
111                 if (summary->nr_nacks == 0)
112                         goto resume_normality;
113
114                 if (summary->new_low_nack) {
115                         change = rxrpc_cong_new_low_nack;
116                         call->cong_dup_acks = 1;
117                         if (call->cong_extra > 1)
118                                 call->cong_extra = 1;
119                         goto send_extra_data;
120                 }
121
122                 call->cong_dup_acks++;
123                 if (call->cong_dup_acks < 3)
124                         goto send_extra_data;
125
126                 change = rxrpc_cong_begin_retransmission;
127                 call->cong_mode = RXRPC_CALL_FAST_RETRANSMIT;
128                 call->cong_ssthresh = max_t(unsigned int,
129                                             summary->flight_size / 2, 2);
130                 cwnd = call->cong_ssthresh + 3;
131                 call->cong_extra = 0;
132                 call->cong_dup_acks = 0;
133                 resend = true;
134                 goto out;
135
136         case RXRPC_CALL_FAST_RETRANSMIT:
137                 if (!summary->new_low_nack) {
138                         if (summary->nr_new_acks == 0)
139                                 cwnd += 1;
140                         call->cong_dup_acks++;
141                         if (call->cong_dup_acks == 2) {
142                                 change = rxrpc_cong_retransmit_again;
143                                 call->cong_dup_acks = 0;
144                                 resend = true;
145                         }
146                 } else {
147                         change = rxrpc_cong_progress;
148                         cwnd = call->cong_ssthresh;
149                         if (summary->nr_nacks == 0)
150                                 goto resume_normality;
151                 }
152                 goto out;
153
154         default:
155                 BUG();
156                 goto out;
157         }
158
159 resume_normality:
160         change = rxrpc_cong_cleared_nacks;
161         call->cong_dup_acks = 0;
162         call->cong_extra = 0;
163         call->cong_tstamp = skb->tstamp;
164         if (cwnd < call->cong_ssthresh)
165                 call->cong_mode = RXRPC_CALL_SLOW_START;
166         else
167                 call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE;
168 out:
169         cumulative_acks = 0;
170 out_no_clear_ca:
171         if (cwnd >= RXRPC_RXTX_BUFF_SIZE - 1)
172                 cwnd = RXRPC_RXTX_BUFF_SIZE - 1;
173         call->cong_cwnd = cwnd;
174         call->cong_cumul_acks = cumulative_acks;
175         trace_rxrpc_congest(call, summary, acked_serial, change);
176         if (resend && !test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events))
177                 rxrpc_queue_call(call);
178         return;
179
180 packet_loss_detected:
181         change = rxrpc_cong_saw_nack;
182         call->cong_mode = RXRPC_CALL_PACKET_LOSS;
183         call->cong_dup_acks = 0;
184         goto send_extra_data;
185
186 send_extra_data:
187         /* Send some previously unsent DATA if we have some to advance the ACK
188          * state.
189          */
190         if (call->rxtx_annotations[call->tx_top & RXRPC_RXTX_BUFF_MASK] &
191             RXRPC_TX_ANNO_LAST ||
192             summary->nr_acks != call->tx_top - call->tx_hard_ack) {
193                 call->cong_extra++;
194                 wake_up(&call->waitq);
195         }
196         goto out_no_clear_ca;
197 }
198
199 /*
200  * Ping the other end to fill our RTT cache and to retrieve the rwind
201  * and MTU parameters.
202  */
203 static void rxrpc_send_ping(struct rxrpc_call *call, struct sk_buff *skb,
204                             int skew)
205 {
206         struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
207         ktime_t now = skb->tstamp;
208
209         if (call->peer->rtt_usage < 3 ||
210             ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), now))
211                 rxrpc_propose_ACK(call, RXRPC_ACK_PING, skew, sp->hdr.serial,
212                                   true, true,
213                                   rxrpc_propose_ack_ping_for_params);
214 }
215
216 /*
217  * Apply a hard ACK by advancing the Tx window.
218  */
219 static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to,
220                                    struct rxrpc_ack_summary *summary)
221 {
222         struct sk_buff *skb, *list = NULL;
223         bool rot_last = false;
224         int ix;
225         u8 annotation;
226
227         if (call->acks_lowest_nak == call->tx_hard_ack) {
228                 call->acks_lowest_nak = to;
229         } else if (before_eq(call->acks_lowest_nak, to)) {
230                 summary->new_low_nack = true;
231                 call->acks_lowest_nak = to;
232         }
233
234         spin_lock(&call->lock);
235
236         while (before(call->tx_hard_ack, to)) {
237                 call->tx_hard_ack++;
238                 ix = call->tx_hard_ack & RXRPC_RXTX_BUFF_MASK;
239                 skb = call->rxtx_buffer[ix];
240                 annotation = call->rxtx_annotations[ix];
241                 rxrpc_see_skb(skb, rxrpc_skb_tx_rotated);
242                 call->rxtx_buffer[ix] = NULL;
243                 call->rxtx_annotations[ix] = 0;
244                 skb->next = list;
245                 list = skb;
246
247                 if (annotation & RXRPC_TX_ANNO_LAST) {
248                         set_bit(RXRPC_CALL_TX_LAST, &call->flags);
249                         rot_last = true;
250                 }
251                 if ((annotation & RXRPC_TX_ANNO_MASK) != RXRPC_TX_ANNO_ACK)
252                         summary->nr_rot_new_acks++;
253         }
254
255         spin_unlock(&call->lock);
256
257         trace_rxrpc_transmit(call, (rot_last ?
258                                     rxrpc_transmit_rotate_last :
259                                     rxrpc_transmit_rotate));
260         wake_up(&call->waitq);
261
262         while (list) {
263                 skb = list;
264                 list = skb->next;
265                 skb->next = NULL;
266                 rxrpc_free_skb(skb, rxrpc_skb_tx_freed);
267         }
268
269         return rot_last;
270 }
271
272 /*
273  * End the transmission phase of a call.
274  *
275  * This occurs when we get an ACKALL packet, the first DATA packet of a reply,
276  * or a final ACK packet.
277  */
278 static bool rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun,
279                                const char *abort_why)
280 {
281
282         ASSERT(test_bit(RXRPC_CALL_TX_LAST, &call->flags));
283
284         write_lock(&call->state_lock);
285
286         switch (call->state) {
287         case RXRPC_CALL_CLIENT_SEND_REQUEST:
288         case RXRPC_CALL_CLIENT_AWAIT_REPLY:
289                 if (reply_begun)
290                         call->state = RXRPC_CALL_CLIENT_RECV_REPLY;
291                 else
292                         call->state = RXRPC_CALL_CLIENT_AWAIT_REPLY;
293                 break;
294
295         case RXRPC_CALL_SERVER_AWAIT_ACK:
296                 __rxrpc_call_completed(call);
297                 rxrpc_notify_socket(call);
298                 break;
299
300         default:
301                 goto bad_state;
302         }
303
304         write_unlock(&call->state_lock);
305         if (call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY) {
306                 rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, 0, false, true,
307                                   rxrpc_propose_ack_client_tx_end);
308                 trace_rxrpc_transmit(call, rxrpc_transmit_await_reply);
309         } else {
310                 trace_rxrpc_transmit(call, rxrpc_transmit_end);
311         }
312         _leave(" = ok");
313         return true;
314
315 bad_state:
316         write_unlock(&call->state_lock);
317         kdebug("end_tx %s", rxrpc_call_states[call->state]);
318         rxrpc_proto_abort(abort_why, call, call->tx_top);
319         return false;
320 }
321
322 /*
323  * Begin the reply reception phase of a call.
324  */
325 static bool rxrpc_receiving_reply(struct rxrpc_call *call)
326 {
327         struct rxrpc_ack_summary summary = { 0 };
328         rxrpc_seq_t top = READ_ONCE(call->tx_top);
329
330         if (call->ackr_reason) {
331                 spin_lock_bh(&call->lock);
332                 call->ackr_reason = 0;
333                 call->resend_at = call->expire_at;
334                 call->ack_at = call->expire_at;
335                 spin_unlock_bh(&call->lock);
336                 rxrpc_set_timer(call, rxrpc_timer_init_for_reply,
337                                 ktime_get_real());
338         }
339
340         if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags)) {
341                 if (!rxrpc_rotate_tx_window(call, top, &summary)) {
342                         rxrpc_proto_abort("TXL", call, top);
343                         return false;
344                 }
345         }
346         if (!rxrpc_end_tx_phase(call, true, "ETD"))
347                 return false;
348         call->tx_phase = false;
349         return true;
350 }
351
352 /*
353  * Scan a jumbo packet to validate its structure and to work out how many
354  * subpackets it contains.
355  *
356  * A jumbo packet is a collection of consecutive packets glued together with
357  * little headers between that indicate how to change the initial header for
358  * each subpacket.
359  *
360  * RXRPC_JUMBO_PACKET must be set on all but the last subpacket - and all but
361  * the last are RXRPC_JUMBO_DATALEN in size.  The last subpacket may be of any
362  * size.
363  */
364 static bool rxrpc_validate_jumbo(struct sk_buff *skb)
365 {
366         struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
367         unsigned int offset = sizeof(struct rxrpc_wire_header);
368         unsigned int len = skb->len;
369         int nr_jumbo = 1;
370         u8 flags = sp->hdr.flags;
371
372         do {
373                 nr_jumbo++;
374                 if (len - offset < RXRPC_JUMBO_SUBPKTLEN)
375                         goto protocol_error;
376                 if (flags & RXRPC_LAST_PACKET)
377                         goto protocol_error;
378                 offset += RXRPC_JUMBO_DATALEN;
379                 if (skb_copy_bits(skb, offset, &flags, 1) < 0)
380                         goto protocol_error;
381                 offset += sizeof(struct rxrpc_jumbo_header);
382         } while (flags & RXRPC_JUMBO_PACKET);
383
384         sp->nr_jumbo = nr_jumbo;
385         return true;
386
387 protocol_error:
388         return false;
389 }
390
391 /*
392  * Handle reception of a duplicate packet.
393  *
394  * We have to take care to avoid an attack here whereby we're given a series of
395  * jumbograms, each with a sequence number one before the preceding one and
396  * filled up to maximum UDP size.  If they never send us the first packet in
397  * the sequence, they can cause us to have to hold on to around 2MiB of kernel
398  * space until the call times out.
399  *
400  * We limit the space usage by only accepting three duplicate jumbo packets per
401  * call.  After that, we tell the other side we're no longer accepting jumbos
402  * (that information is encoded in the ACK packet).
403  */
404 static void rxrpc_input_dup_data(struct rxrpc_call *call, rxrpc_seq_t seq,
405                                  u8 annotation, bool *_jumbo_bad)
406 {
407         /* Discard normal packets that are duplicates. */
408         if (annotation == 0)
409                 return;
410
411         /* Skip jumbo subpackets that are duplicates.  When we've had three or
412          * more partially duplicate jumbo packets, we refuse to take any more
413          * jumbos for this call.
414          */
415         if (!*_jumbo_bad) {
416                 call->nr_jumbo_bad++;
417                 *_jumbo_bad = true;
418         }
419 }
420
421 /*
422  * Process a DATA packet, adding the packet to the Rx ring.
423  */
424 static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb,
425                              u16 skew)
426 {
427         struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
428         unsigned int offset = sizeof(struct rxrpc_wire_header);
429         unsigned int ix;
430         rxrpc_serial_t serial = sp->hdr.serial, ack_serial = 0;
431         rxrpc_seq_t seq = sp->hdr.seq, hard_ack;
432         bool immediate_ack = false, jumbo_bad = false, queued;
433         u16 len;
434         u8 ack = 0, flags, annotation = 0;
435
436         _enter("{%u,%u},{%u,%u}",
437                call->rx_hard_ack, call->rx_top, skb->len, seq);
438
439         _proto("Rx DATA %%%u { #%u f=%02x }",
440                sp->hdr.serial, seq, sp->hdr.flags);
441
442         if (call->state >= RXRPC_CALL_COMPLETE)
443                 return;
444
445         /* Received data implicitly ACKs all of the request packets we sent
446          * when we're acting as a client.
447          */
448         if ((call->state == RXRPC_CALL_CLIENT_SEND_REQUEST ||
449              call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY) &&
450             !rxrpc_receiving_reply(call))
451                 return;
452
453         call->ackr_prev_seq = seq;
454
455         hard_ack = READ_ONCE(call->rx_hard_ack);
456         if (after(seq, hard_ack + call->rx_winsize)) {
457                 ack = RXRPC_ACK_EXCEEDS_WINDOW;
458                 ack_serial = serial;
459                 goto ack;
460         }
461
462         flags = sp->hdr.flags;
463         if (flags & RXRPC_JUMBO_PACKET) {
464                 if (call->nr_jumbo_bad > 3) {
465                         ack = RXRPC_ACK_NOSPACE;
466                         ack_serial = serial;
467                         goto ack;
468                 }
469                 annotation = 1;
470         }
471
472 next_subpacket:
473         queued = false;
474         ix = seq & RXRPC_RXTX_BUFF_MASK;
475         len = skb->len;
476         if (flags & RXRPC_JUMBO_PACKET)
477                 len = RXRPC_JUMBO_DATALEN;
478
479         if (flags & RXRPC_LAST_PACKET) {
480                 if (test_bit(RXRPC_CALL_RX_LAST, &call->flags) &&
481                     seq != call->rx_top)
482                         return rxrpc_proto_abort("LSN", call, seq);
483         } else {
484                 if (test_bit(RXRPC_CALL_RX_LAST, &call->flags) &&
485                     after_eq(seq, call->rx_top))
486                         return rxrpc_proto_abort("LSA", call, seq);
487         }
488
489         if (before_eq(seq, hard_ack)) {
490                 ack = RXRPC_ACK_DUPLICATE;
491                 ack_serial = serial;
492                 goto skip;
493         }
494
495         if (flags & RXRPC_REQUEST_ACK && !ack) {
496                 ack = RXRPC_ACK_REQUESTED;
497                 ack_serial = serial;
498         }
499
500         if (call->rxtx_buffer[ix]) {
501                 rxrpc_input_dup_data(call, seq, annotation, &jumbo_bad);
502                 if (ack != RXRPC_ACK_DUPLICATE) {
503                         ack = RXRPC_ACK_DUPLICATE;
504                         ack_serial = serial;
505                 }
506                 immediate_ack = true;
507                 goto skip;
508         }
509
510         /* Queue the packet.  We use a couple of memory barriers here as need
511          * to make sure that rx_top is perceived to be set after the buffer
512          * pointer and that the buffer pointer is set after the annotation and
513          * the skb data.
514          *
515          * Barriers against rxrpc_recvmsg_data() and rxrpc_rotate_rx_window()
516          * and also rxrpc_fill_out_ack().
517          */
518         rxrpc_get_skb(skb, rxrpc_skb_rx_got);
519         call->rxtx_annotations[ix] = annotation;
520         smp_wmb();
521         call->rxtx_buffer[ix] = skb;
522         if (after(seq, call->rx_top)) {
523                 smp_store_release(&call->rx_top, seq);
524         } else if (before(seq, call->rx_top)) {
525                 /* Send an immediate ACK if we fill in a hole */
526                 if (!ack) {
527                         ack = RXRPC_ACK_DELAY;
528                         ack_serial = serial;
529                 }
530                 immediate_ack = true;
531         }
532         if (flags & RXRPC_LAST_PACKET) {
533                 set_bit(RXRPC_CALL_RX_LAST, &call->flags);
534                 trace_rxrpc_receive(call, rxrpc_receive_queue_last, serial, seq);
535         } else {
536                 trace_rxrpc_receive(call, rxrpc_receive_queue, serial, seq);
537         }
538         queued = true;
539
540         if (after_eq(seq, call->rx_expect_next)) {
541                 if (after(seq, call->rx_expect_next)) {
542                         _net("OOS %u > %u", seq, call->rx_expect_next);
543                         ack = RXRPC_ACK_OUT_OF_SEQUENCE;
544                         ack_serial = serial;
545                 }
546                 call->rx_expect_next = seq + 1;
547         }
548
549 skip:
550         offset += len;
551         if (flags & RXRPC_JUMBO_PACKET) {
552                 if (skb_copy_bits(skb, offset, &flags, 1) < 0)
553                         return rxrpc_proto_abort("XJF", call, seq);
554                 offset += sizeof(struct rxrpc_jumbo_header);
555                 seq++;
556                 serial++;
557                 annotation++;
558                 if (flags & RXRPC_JUMBO_PACKET)
559                         annotation |= RXRPC_RX_ANNO_JLAST;
560                 if (after(seq, hard_ack + call->rx_winsize)) {
561                         ack = RXRPC_ACK_EXCEEDS_WINDOW;
562                         ack_serial = serial;
563                         if (!jumbo_bad) {
564                                 call->nr_jumbo_bad++;
565                                 jumbo_bad = true;
566                         }
567                         goto ack;
568                 }
569
570                 _proto("Rx DATA Jumbo %%%u", serial);
571                 goto next_subpacket;
572         }
573
574         if (queued && flags & RXRPC_LAST_PACKET && !ack) {
575                 ack = RXRPC_ACK_DELAY;
576                 ack_serial = serial;
577         }
578
579 ack:
580         if (ack)
581                 rxrpc_propose_ACK(call, ack, skew, ack_serial,
582                                   immediate_ack, true,
583                                   rxrpc_propose_ack_input_data);
584
585         rxrpc_notify_socket(call);
586         _leave(" [queued]");
587 }
588
589 /*
590  * Process a requested ACK.
591  */
592 static void rxrpc_input_requested_ack(struct rxrpc_call *call,
593                                       ktime_t resp_time,
594                                       rxrpc_serial_t orig_serial,
595                                       rxrpc_serial_t ack_serial)
596 {
597         struct rxrpc_skb_priv *sp;
598         struct sk_buff *skb;
599         ktime_t sent_at;
600         int ix;
601
602         for (ix = 0; ix < RXRPC_RXTX_BUFF_SIZE; ix++) {
603                 skb = call->rxtx_buffer[ix];
604                 if (!skb)
605                         continue;
606
607                 sp = rxrpc_skb(skb);
608                 if (sp->hdr.serial != orig_serial)
609                         continue;
610                 smp_rmb();
611                 sent_at = skb->tstamp;
612                 goto found;
613         }
614         return;
615
616 found:
617         rxrpc_peer_add_rtt(call, rxrpc_rtt_rx_requested_ack,
618                            orig_serial, ack_serial, sent_at, resp_time);
619 }
620
621 /*
622  * Process a ping response.
623  */
624 static void rxrpc_input_ping_response(struct rxrpc_call *call,
625                                       ktime_t resp_time,
626                                       rxrpc_serial_t orig_serial,
627                                       rxrpc_serial_t ack_serial)
628 {
629         rxrpc_serial_t ping_serial;
630         ktime_t ping_time;
631
632         ping_time = call->ping_time;
633         smp_rmb();
634         ping_serial = call->ping_serial;
635
636         if (!test_bit(RXRPC_CALL_PINGING, &call->flags) ||
637             before(orig_serial, ping_serial))
638                 return;
639         clear_bit(RXRPC_CALL_PINGING, &call->flags);
640         if (after(orig_serial, ping_serial))
641                 return;
642
643         rxrpc_peer_add_rtt(call, rxrpc_rtt_rx_ping_response,
644                            orig_serial, ack_serial, ping_time, resp_time);
645 }
646
647 /*
648  * Process the extra information that may be appended to an ACK packet
649  */
650 static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb,
651                                 struct rxrpc_ackinfo *ackinfo)
652 {
653         struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
654         struct rxrpc_peer *peer;
655         unsigned int mtu;
656         bool wake = false;
657         u32 rwind = ntohl(ackinfo->rwind);
658
659         _proto("Rx ACK %%%u Info { rx=%u max=%u rwin=%u jm=%u }",
660                sp->hdr.serial,
661                ntohl(ackinfo->rxMTU), ntohl(ackinfo->maxMTU),
662                rwind, ntohl(ackinfo->jumbo_max));
663
664         if (call->tx_winsize != rwind) {
665                 if (rwind > RXRPC_RXTX_BUFF_SIZE - 1)
666                         rwind = RXRPC_RXTX_BUFF_SIZE - 1;
667                 if (rwind > call->tx_winsize)
668                         wake = true;
669                 call->tx_winsize = rwind;
670         }
671
672         if (call->cong_ssthresh > rwind)
673                 call->cong_ssthresh = rwind;
674
675         mtu = min(ntohl(ackinfo->rxMTU), ntohl(ackinfo->maxMTU));
676
677         peer = call->peer;
678         if (mtu < peer->maxdata) {
679                 spin_lock_bh(&peer->lock);
680                 peer->maxdata = mtu;
681                 peer->mtu = mtu + peer->hdrsize;
682                 spin_unlock_bh(&peer->lock);
683                 _net("Net MTU %u (maxdata %u)", peer->mtu, peer->maxdata);
684         }
685
686         if (wake)
687                 wake_up(&call->waitq);
688 }
689
690 /*
691  * Process individual soft ACKs.
692  *
693  * Each ACK in the array corresponds to one packet and can be either an ACK or
694  * a NAK.  If we get find an explicitly NAK'd packet we resend immediately;
695  * packets that lie beyond the end of the ACK list are scheduled for resend by
696  * the timer on the basis that the peer might just not have processed them at
697  * the time the ACK was sent.
698  */
699 static void rxrpc_input_soft_acks(struct rxrpc_call *call, u8 *acks,
700                                   rxrpc_seq_t seq, int nr_acks,
701                                   struct rxrpc_ack_summary *summary)
702 {
703         int ix;
704         u8 annotation, anno_type;
705
706         for (; nr_acks > 0; nr_acks--, seq++) {
707                 ix = seq & RXRPC_RXTX_BUFF_MASK;
708                 annotation = call->rxtx_annotations[ix];
709                 anno_type = annotation & RXRPC_TX_ANNO_MASK;
710                 annotation &= ~RXRPC_TX_ANNO_MASK;
711                 switch (*acks++) {
712                 case RXRPC_ACK_TYPE_ACK:
713                         summary->nr_acks++;
714                         if (anno_type == RXRPC_TX_ANNO_ACK)
715                                 continue;
716                         summary->nr_new_acks++;
717                         call->rxtx_annotations[ix] =
718                                 RXRPC_TX_ANNO_ACK | annotation;
719                         break;
720                 case RXRPC_ACK_TYPE_NACK:
721                         if (!summary->nr_nacks &&
722                             call->acks_lowest_nak != seq) {
723                                 call->acks_lowest_nak = seq;
724                                 summary->new_low_nack = true;
725                         }
726                         summary->nr_nacks++;
727                         if (anno_type == RXRPC_TX_ANNO_NAK)
728                                 continue;
729                         summary->nr_new_nacks++;
730                         if (anno_type == RXRPC_TX_ANNO_RETRANS)
731                                 continue;
732                         call->rxtx_annotations[ix] =
733                                 RXRPC_TX_ANNO_NAK | annotation;
734                         break;
735                 default:
736                         return rxrpc_proto_abort("SFT", call, 0);
737                 }
738         }
739 }
740
741 /*
742  * Process an ACK packet.
743  *
744  * ack.firstPacket is the sequence number of the first soft-ACK'd/NAK'd packet
745  * in the ACK array.  Anything before that is hard-ACK'd and may be discarded.
746  *
747  * A hard-ACK means that a packet has been processed and may be discarded; a
748  * soft-ACK means that the packet may be discarded and retransmission
749  * requested.  A phase is complete when all packets are hard-ACK'd.
750  */
751 static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb,
752                             u16 skew)
753 {
754         struct rxrpc_ack_summary summary = { 0 };
755         struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
756         union {
757                 struct rxrpc_ackpacket ack;
758                 struct rxrpc_ackinfo info;
759                 u8 acks[RXRPC_MAXACKS];
760         } buf;
761         rxrpc_serial_t acked_serial;
762         rxrpc_seq_t first_soft_ack, hard_ack;
763         int nr_acks, offset, ioffset;
764
765         _enter("");
766
767         offset = sizeof(struct rxrpc_wire_header);
768         if (skb_copy_bits(skb, offset, &buf.ack, sizeof(buf.ack)) < 0) {
769                 _debug("extraction failure");
770                 return rxrpc_proto_abort("XAK", call, 0);
771         }
772         offset += sizeof(buf.ack);
773
774         acked_serial = ntohl(buf.ack.serial);
775         first_soft_ack = ntohl(buf.ack.firstPacket);
776         hard_ack = first_soft_ack - 1;
777         nr_acks = buf.ack.nAcks;
778         summary.ack_reason = (buf.ack.reason < RXRPC_ACK__INVALID ?
779                               buf.ack.reason : RXRPC_ACK__INVALID);
780
781         trace_rxrpc_rx_ack(call, first_soft_ack, summary.ack_reason, nr_acks);
782
783         _proto("Rx ACK %%%u { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }",
784                sp->hdr.serial,
785                ntohs(buf.ack.maxSkew),
786                first_soft_ack,
787                ntohl(buf.ack.previousPacket),
788                acked_serial,
789                rxrpc_ack_names[summary.ack_reason],
790                buf.ack.nAcks);
791
792         if (buf.ack.reason == RXRPC_ACK_PING_RESPONSE)
793                 rxrpc_input_ping_response(call, skb->tstamp, acked_serial,
794                                           sp->hdr.serial);
795         if (buf.ack.reason == RXRPC_ACK_REQUESTED)
796                 rxrpc_input_requested_ack(call, skb->tstamp, acked_serial,
797                                           sp->hdr.serial);
798
799         if (buf.ack.reason == RXRPC_ACK_PING) {
800                 _proto("Rx ACK %%%u PING Request", sp->hdr.serial);
801                 rxrpc_propose_ACK(call, RXRPC_ACK_PING_RESPONSE,
802                                   skew, sp->hdr.serial, true, true,
803                                   rxrpc_propose_ack_respond_to_ping);
804         } else if (sp->hdr.flags & RXRPC_REQUEST_ACK) {
805                 rxrpc_propose_ACK(call, RXRPC_ACK_REQUESTED,
806                                   skew, sp->hdr.serial, true, true,
807                                   rxrpc_propose_ack_respond_to_ack);
808         }
809
810         /* Discard any out-of-order or duplicate ACKs. */
811         if (before_eq(sp->hdr.serial, call->acks_latest)) {
812                 _debug("discard ACK %d <= %d",
813                        sp->hdr.serial, call->acks_latest);
814                 return;
815         }
816         call->acks_latest_ts = skb->tstamp;
817         call->acks_latest = sp->hdr.serial;
818
819         /* Parse rwind and mtu sizes if provided. */
820         ioffset = offset + nr_acks + 3;
821         if (skb->len >= ioffset + sizeof(buf.info)) {
822                 if (skb_copy_bits(skb, ioffset, &buf.info, sizeof(buf.info)) < 0)
823                         return rxrpc_proto_abort("XAI", call, 0);
824                 rxrpc_input_ackinfo(call, skb, &buf.info);
825         }
826
827         if (first_soft_ack == 0)
828                 return rxrpc_proto_abort("AK0", call, 0);
829
830         /* Ignore ACKs unless we are or have just been transmitting. */
831         switch (call->state) {
832         case RXRPC_CALL_CLIENT_SEND_REQUEST:
833         case RXRPC_CALL_CLIENT_AWAIT_REPLY:
834         case RXRPC_CALL_SERVER_SEND_REPLY:
835         case RXRPC_CALL_SERVER_AWAIT_ACK:
836                 break;
837         default:
838                 return;
839         }
840
841         if (before(hard_ack, call->tx_hard_ack) ||
842             after(hard_ack, call->tx_top))
843                 return rxrpc_proto_abort("AKW", call, 0);
844         if (nr_acks > call->tx_top - hard_ack)
845                 return rxrpc_proto_abort("AKN", call, 0);
846
847         if (after(hard_ack, call->tx_hard_ack)) {
848                 if (rxrpc_rotate_tx_window(call, hard_ack, &summary)) {
849                         rxrpc_end_tx_phase(call, false, "ETA");
850                         return;
851                 }
852         }
853
854         if (nr_acks > 0) {
855                 if (skb_copy_bits(skb, offset, buf.acks, nr_acks) < 0)
856                         return rxrpc_proto_abort("XSA", call, 0);
857                 rxrpc_input_soft_acks(call, buf.acks, first_soft_ack, nr_acks,
858                                       &summary);
859         }
860
861         if (call->rxtx_annotations[call->tx_top & RXRPC_RXTX_BUFF_MASK] &
862             RXRPC_TX_ANNO_LAST &&
863             summary.nr_acks == call->tx_top - hard_ack &&
864             rxrpc_is_client_call(call))
865                 rxrpc_propose_ACK(call, RXRPC_ACK_PING, skew, sp->hdr.serial,
866                                   false, true,
867                                   rxrpc_propose_ack_ping_for_lost_reply);
868
869         return rxrpc_congestion_management(call, skb, &summary, acked_serial);
870 }
871
872 /*
873  * Process an ACKALL packet.
874  */
875 static void rxrpc_input_ackall(struct rxrpc_call *call, struct sk_buff *skb)
876 {
877         struct rxrpc_ack_summary summary = { 0 };
878         struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
879
880         _proto("Rx ACKALL %%%u", sp->hdr.serial);
881
882         if (rxrpc_rotate_tx_window(call, call->tx_top, &summary))
883                 rxrpc_end_tx_phase(call, false, "ETL");
884 }
885
886 /*
887  * Process an ABORT packet.
888  */
889 static void rxrpc_input_abort(struct rxrpc_call *call, struct sk_buff *skb)
890 {
891         struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
892         __be32 wtmp;
893         u32 abort_code = RX_CALL_DEAD;
894
895         _enter("");
896
897         if (skb->len >= 4 &&
898             skb_copy_bits(skb, sizeof(struct rxrpc_wire_header),
899                           &wtmp, sizeof(wtmp)) >= 0)
900                 abort_code = ntohl(wtmp);
901
902         _proto("Rx ABORT %%%u { %x }", sp->hdr.serial, abort_code);
903
904         if (rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED,
905                                       abort_code, ECONNABORTED))
906                 rxrpc_notify_socket(call);
907 }
908
909 /*
910  * Process an incoming call packet.
911  */
912 static void rxrpc_input_call_packet(struct rxrpc_call *call,
913                                     struct sk_buff *skb, u16 skew)
914 {
915         struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
916
917         _enter("%p,%p", call, skb);
918
919         switch (sp->hdr.type) {
920         case RXRPC_PACKET_TYPE_DATA:
921                 rxrpc_input_data(call, skb, skew);
922                 break;
923
924         case RXRPC_PACKET_TYPE_ACK:
925                 rxrpc_input_ack(call, skb, skew);
926                 break;
927
928         case RXRPC_PACKET_TYPE_BUSY:
929                 _proto("Rx BUSY %%%u", sp->hdr.serial);
930
931                 /* Just ignore BUSY packets from the server; the retry and
932                  * lifespan timers will take care of business.  BUSY packets
933                  * from the client don't make sense.
934                  */
935                 break;
936
937         case RXRPC_PACKET_TYPE_ABORT:
938                 rxrpc_input_abort(call, skb);
939                 break;
940
941         case RXRPC_PACKET_TYPE_ACKALL:
942                 rxrpc_input_ackall(call, skb);
943                 break;
944
945         default:
946                 _proto("Rx %s %%%u", rxrpc_pkts[sp->hdr.type], sp->hdr.serial);
947                 break;
948         }
949
950         _leave("");
951 }
952
953 /*
954  * Handle a new call on a channel implicitly completing the preceding call on
955  * that channel.
956  *
957  * TODO: If callNumber > call_id + 1, renegotiate security.
958  */
959 static void rxrpc_input_implicit_end_call(struct rxrpc_connection *conn,
960                                           struct rxrpc_call *call)
961 {
962         switch (call->state) {
963         case RXRPC_CALL_SERVER_AWAIT_ACK:
964                 rxrpc_call_completed(call);
965                 break;
966         case RXRPC_CALL_COMPLETE:
967                 break;
968         default:
969                 if (rxrpc_abort_call("IMP", call, 0, RX_CALL_DEAD, ESHUTDOWN)) {
970                         set_bit(RXRPC_CALL_EV_ABORT, &call->events);
971                         rxrpc_queue_call(call);
972                 }
973                 break;
974         }
975
976         __rxrpc_disconnect_call(conn, call);
977         rxrpc_notify_socket(call);
978 }
979
980 /*
981  * post connection-level events to the connection
982  * - this includes challenges, responses, some aborts and call terminal packet
983  *   retransmission.
984  */
985 static void rxrpc_post_packet_to_conn(struct rxrpc_connection *conn,
986                                       struct sk_buff *skb)
987 {
988         _enter("%p,%p", conn, skb);
989
990         skb_queue_tail(&conn->rx_queue, skb);
991         rxrpc_queue_conn(conn);
992 }
993
994 /*
995  * post endpoint-level events to the local endpoint
996  * - this includes debug and version messages
997  */
998 static void rxrpc_post_packet_to_local(struct rxrpc_local *local,
999                                        struct sk_buff *skb)
1000 {
1001         _enter("%p,%p", local, skb);
1002
1003         skb_queue_tail(&local->event_queue, skb);
1004         rxrpc_queue_local(local);
1005 }
1006
1007 /*
1008  * put a packet up for transport-level abort
1009  */
1010 static void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb)
1011 {
1012         CHECK_SLAB_OKAY(&local->usage);
1013
1014         skb_queue_tail(&local->reject_queue, skb);
1015         rxrpc_queue_local(local);
1016 }
1017
1018 /*
1019  * Extract the wire header from a packet and translate the byte order.
1020  */
1021 static noinline
1022 int rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb)
1023 {
1024         struct rxrpc_wire_header whdr;
1025
1026         /* dig out the RxRPC connection details */
1027         if (skb_copy_bits(skb, 0, &whdr, sizeof(whdr)) < 0)
1028                 return -EBADMSG;
1029
1030         memset(sp, 0, sizeof(*sp));
1031         sp->hdr.epoch           = ntohl(whdr.epoch);
1032         sp->hdr.cid             = ntohl(whdr.cid);
1033         sp->hdr.callNumber      = ntohl(whdr.callNumber);
1034         sp->hdr.seq             = ntohl(whdr.seq);
1035         sp->hdr.serial          = ntohl(whdr.serial);
1036         sp->hdr.flags           = whdr.flags;
1037         sp->hdr.type            = whdr.type;
1038         sp->hdr.userStatus      = whdr.userStatus;
1039         sp->hdr.securityIndex   = whdr.securityIndex;
1040         sp->hdr._rsvd           = ntohs(whdr._rsvd);
1041         sp->hdr.serviceId       = ntohs(whdr.serviceId);
1042         return 0;
1043 }
1044
1045 /*
1046  * handle data received on the local endpoint
1047  * - may be called in interrupt context
1048  *
1049  * The socket is locked by the caller and this prevents the socket from being
1050  * shut down and the local endpoint from going away, thus sk_user_data will not
1051  * be cleared until this function returns.
1052  */
1053 void rxrpc_data_ready(struct sock *udp_sk)
1054 {
1055         struct rxrpc_connection *conn;
1056         struct rxrpc_channel *chan;
1057         struct rxrpc_call *call;
1058         struct rxrpc_skb_priv *sp;
1059         struct rxrpc_local *local = udp_sk->sk_user_data;
1060         struct sk_buff *skb;
1061         unsigned int channel;
1062         int ret, skew;
1063
1064         _enter("%p", udp_sk);
1065
1066         ASSERT(!irqs_disabled());
1067
1068         skb = skb_recv_datagram(udp_sk, 0, 1, &ret);
1069         if (!skb) {
1070                 if (ret == -EAGAIN)
1071                         return;
1072                 _debug("UDP socket error %d", ret);
1073                 return;
1074         }
1075
1076         rxrpc_new_skb(skb, rxrpc_skb_rx_received);
1077
1078         _net("recv skb %p", skb);
1079
1080         /* we'll probably need to checksum it (didn't call sock_recvmsg) */
1081         if (skb_checksum_complete(skb)) {
1082                 rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
1083                 __UDP_INC_STATS(&init_net, UDP_MIB_INERRORS, 0);
1084                 _leave(" [CSUM failed]");
1085                 return;
1086         }
1087
1088         __UDP_INC_STATS(&init_net, UDP_MIB_INDATAGRAMS, 0);
1089
1090         /* The socket buffer we have is owned by UDP, with UDP's data all over
1091          * it, but we really want our own data there.
1092          */
1093         skb_orphan(skb);
1094         sp = rxrpc_skb(skb);
1095
1096         /* dig out the RxRPC connection details */
1097         if (rxrpc_extract_header(sp, skb) < 0)
1098                 goto bad_message;
1099
1100         if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) {
1101                 static int lose;
1102                 if ((lose++ & 7) == 7) {
1103                         trace_rxrpc_rx_lose(sp);
1104                         rxrpc_lose_skb(skb, rxrpc_skb_rx_lost);
1105                         return;
1106                 }
1107         }
1108
1109         trace_rxrpc_rx_packet(sp);
1110
1111         _net("Rx RxRPC %s ep=%x call=%x:%x",
1112              sp->hdr.flags & RXRPC_CLIENT_INITIATED ? "ToServer" : "ToClient",
1113              sp->hdr.epoch, sp->hdr.cid, sp->hdr.callNumber);
1114
1115         if (sp->hdr.type >= RXRPC_N_PACKET_TYPES ||
1116             !((RXRPC_SUPPORTED_PACKET_TYPES >> sp->hdr.type) & 1)) {
1117                 _proto("Rx Bad Packet Type %u", sp->hdr.type);
1118                 goto bad_message;
1119         }
1120
1121         switch (sp->hdr.type) {
1122         case RXRPC_PACKET_TYPE_VERSION:
1123                 rxrpc_post_packet_to_local(local, skb);
1124                 goto out;
1125
1126         case RXRPC_PACKET_TYPE_BUSY:
1127                 if (sp->hdr.flags & RXRPC_CLIENT_INITIATED)
1128                         goto discard;
1129
1130         case RXRPC_PACKET_TYPE_DATA:
1131                 if (sp->hdr.callNumber == 0)
1132                         goto bad_message;
1133                 if (sp->hdr.flags & RXRPC_JUMBO_PACKET &&
1134                     !rxrpc_validate_jumbo(skb))
1135                         goto bad_message;
1136                 break;
1137         }
1138
1139         rcu_read_lock();
1140
1141         conn = rxrpc_find_connection_rcu(local, skb);
1142         if (conn) {
1143                 if (sp->hdr.securityIndex != conn->security_ix)
1144                         goto wrong_security;
1145
1146                 if (sp->hdr.callNumber == 0) {
1147                         /* Connection-level packet */
1148                         _debug("CONN %p {%d}", conn, conn->debug_id);
1149                         rxrpc_post_packet_to_conn(conn, skb);
1150                         goto out_unlock;
1151                 }
1152
1153                 /* Note the serial number skew here */
1154                 skew = (int)sp->hdr.serial - (int)conn->hi_serial;
1155                 if (skew >= 0) {
1156                         if (skew > 0)
1157                                 conn->hi_serial = sp->hdr.serial;
1158                 } else {
1159                         skew = -skew;
1160                         skew = min(skew, 65535);
1161                 }
1162
1163                 /* Call-bound packets are routed by connection channel. */
1164                 channel = sp->hdr.cid & RXRPC_CHANNELMASK;
1165                 chan = &conn->channels[channel];
1166
1167                 /* Ignore really old calls */
1168                 if (sp->hdr.callNumber < chan->last_call)
1169                         goto discard_unlock;
1170
1171                 if (sp->hdr.callNumber == chan->last_call) {
1172                         if (chan->call ||
1173                             sp->hdr.type == RXRPC_PACKET_TYPE_ABORT)
1174                                 goto discard_unlock;
1175
1176                         /* For the previous service call, if completed
1177                          * successfully, we discard all further packets.
1178                          */
1179                         if (rxrpc_conn_is_service(conn) &&
1180                             chan->last_type == RXRPC_PACKET_TYPE_ACK)
1181                                 goto discard_unlock;
1182
1183                         /* But otherwise we need to retransmit the final packet
1184                          * from data cached in the connection record.
1185                          */
1186                         rxrpc_post_packet_to_conn(conn, skb);
1187                         goto out_unlock;
1188                 }
1189
1190                 call = rcu_dereference(chan->call);
1191
1192                 if (sp->hdr.callNumber > chan->call_id) {
1193                         if (!(sp->hdr.flags & RXRPC_CLIENT_INITIATED)) {
1194                                 rcu_read_unlock();
1195                                 goto reject_packet;
1196                         }
1197                         if (call)
1198                                 rxrpc_input_implicit_end_call(conn, call);
1199                         call = NULL;
1200                 }
1201         } else {
1202                 skew = 0;
1203                 call = NULL;
1204         }
1205
1206         if (!call || atomic_read(&call->usage) == 0) {
1207                 if (!(sp->hdr.type & RXRPC_CLIENT_INITIATED) ||
1208                     sp->hdr.callNumber == 0 ||
1209                     sp->hdr.type != RXRPC_PACKET_TYPE_DATA)
1210                         goto bad_message_unlock;
1211                 if (sp->hdr.seq != 1)
1212                         goto discard_unlock;
1213                 call = rxrpc_new_incoming_call(local, conn, skb);
1214                 if (!call) {
1215                         rcu_read_unlock();
1216                         goto reject_packet;
1217                 }
1218                 rxrpc_send_ping(call, skb, skew);
1219         }
1220
1221         rxrpc_input_call_packet(call, skb, skew);
1222         goto discard_unlock;
1223
1224 discard_unlock:
1225         rcu_read_unlock();
1226 discard:
1227         rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
1228 out:
1229         trace_rxrpc_rx_done(0, 0);
1230         return;
1231
1232 out_unlock:
1233         rcu_read_unlock();
1234         goto out;
1235
1236 wrong_security:
1237         rcu_read_unlock();
1238         trace_rxrpc_abort("SEC", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
1239                           RXKADINCONSISTENCY, EBADMSG);
1240         skb->priority = RXKADINCONSISTENCY;
1241         goto post_abort;
1242
1243 bad_message_unlock:
1244         rcu_read_unlock();
1245 bad_message:
1246         trace_rxrpc_abort("BAD", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
1247                           RX_PROTOCOL_ERROR, EBADMSG);
1248         skb->priority = RX_PROTOCOL_ERROR;
1249 post_abort:
1250         skb->mark = RXRPC_SKB_MARK_LOCAL_ABORT;
1251 reject_packet:
1252         trace_rxrpc_rx_done(skb->mark, skb->priority);
1253         rxrpc_reject_packet(local, skb);
1254         _leave(" [badmsg]");
1255 }