GNU Linux-libre 4.19.242-gnu1
[releases.git] / drivers / net / ipvlan / ipvlan_core.c
1 /* Copyright (c) 2014 Mahesh Bandewar <maheshb@google.com>
2  *
3  * This program is free software; you can redistribute it and/or
4  * modify it under the terms of the GNU General Public License as
5  * published by the Free Software Foundation; either version 2 of
6  * the License, or (at your option) any later version.
7  *
8  */
9
10 #include "ipvlan.h"
11
12 static u32 ipvlan_jhash_secret __read_mostly;
13
14 void ipvlan_init_secret(void)
15 {
16         net_get_random_once(&ipvlan_jhash_secret, sizeof(ipvlan_jhash_secret));
17 }
18
19 void ipvlan_count_rx(const struct ipvl_dev *ipvlan,
20                             unsigned int len, bool success, bool mcast)
21 {
22         if (likely(success)) {
23                 struct ipvl_pcpu_stats *pcptr;
24
25                 pcptr = this_cpu_ptr(ipvlan->pcpu_stats);
26                 u64_stats_update_begin(&pcptr->syncp);
27                 pcptr->rx_pkts++;
28                 pcptr->rx_bytes += len;
29                 if (mcast)
30                         pcptr->rx_mcast++;
31                 u64_stats_update_end(&pcptr->syncp);
32         } else {
33                 this_cpu_inc(ipvlan->pcpu_stats->rx_errs);
34         }
35 }
36 EXPORT_SYMBOL_GPL(ipvlan_count_rx);
37
38 #if IS_ENABLED(CONFIG_IPV6)
39 static u8 ipvlan_get_v6_hash(const void *iaddr)
40 {
41         const struct in6_addr *ip6_addr = iaddr;
42
43         return __ipv6_addr_jhash(ip6_addr, ipvlan_jhash_secret) &
44                IPVLAN_HASH_MASK;
45 }
46 #else
47 static u8 ipvlan_get_v6_hash(const void *iaddr)
48 {
49         return 0;
50 }
51 #endif
52
53 static u8 ipvlan_get_v4_hash(const void *iaddr)
54 {
55         const struct in_addr *ip4_addr = iaddr;
56
57         return jhash_1word(ip4_addr->s_addr, ipvlan_jhash_secret) &
58                IPVLAN_HASH_MASK;
59 }
60
61 static bool addr_equal(bool is_v6, struct ipvl_addr *addr, const void *iaddr)
62 {
63         if (!is_v6 && addr->atype == IPVL_IPV4) {
64                 struct in_addr *i4addr = (struct in_addr *)iaddr;
65
66                 return addr->ip4addr.s_addr == i4addr->s_addr;
67 #if IS_ENABLED(CONFIG_IPV6)
68         } else if (is_v6 && addr->atype == IPVL_IPV6) {
69                 struct in6_addr *i6addr = (struct in6_addr *)iaddr;
70
71                 return ipv6_addr_equal(&addr->ip6addr, i6addr);
72 #endif
73         }
74
75         return false;
76 }
77
78 static struct ipvl_addr *ipvlan_ht_addr_lookup(const struct ipvl_port *port,
79                                                const void *iaddr, bool is_v6)
80 {
81         struct ipvl_addr *addr;
82         u8 hash;
83
84         hash = is_v6 ? ipvlan_get_v6_hash(iaddr) :
85                ipvlan_get_v4_hash(iaddr);
86         hlist_for_each_entry_rcu(addr, &port->hlhead[hash], hlnode)
87                 if (addr_equal(is_v6, addr, iaddr))
88                         return addr;
89         return NULL;
90 }
91
92 void ipvlan_ht_addr_add(struct ipvl_dev *ipvlan, struct ipvl_addr *addr)
93 {
94         struct ipvl_port *port = ipvlan->port;
95         u8 hash;
96
97         hash = (addr->atype == IPVL_IPV6) ?
98                ipvlan_get_v6_hash(&addr->ip6addr) :
99                ipvlan_get_v4_hash(&addr->ip4addr);
100         if (hlist_unhashed(&addr->hlnode))
101                 hlist_add_head_rcu(&addr->hlnode, &port->hlhead[hash]);
102 }
103
104 void ipvlan_ht_addr_del(struct ipvl_addr *addr)
105 {
106         hlist_del_init_rcu(&addr->hlnode);
107 }
108
109 struct ipvl_addr *ipvlan_find_addr(const struct ipvl_dev *ipvlan,
110                                    const void *iaddr, bool is_v6)
111 {
112         struct ipvl_addr *addr, *ret = NULL;
113
114         rcu_read_lock();
115         list_for_each_entry_rcu(addr, &ipvlan->addrs, anode) {
116                 if (addr_equal(is_v6, addr, iaddr)) {
117                         ret = addr;
118                         break;
119                 }
120         }
121         rcu_read_unlock();
122         return ret;
123 }
124
125 bool ipvlan_addr_busy(struct ipvl_port *port, void *iaddr, bool is_v6)
126 {
127         struct ipvl_dev *ipvlan;
128         bool ret = false;
129
130         rcu_read_lock();
131         list_for_each_entry_rcu(ipvlan, &port->ipvlans, pnode) {
132                 if (ipvlan_find_addr(ipvlan, iaddr, is_v6)) {
133                         ret = true;
134                         break;
135                 }
136         }
137         rcu_read_unlock();
138         return ret;
139 }
140
141 static void *ipvlan_get_L3_hdr(struct ipvl_port *port, struct sk_buff *skb, int *type)
142 {
143         void *lyr3h = NULL;
144
145         switch (skb->protocol) {
146         case htons(ETH_P_ARP): {
147                 struct arphdr *arph;
148
149                 if (unlikely(!pskb_may_pull(skb, arp_hdr_len(port->dev))))
150                         return NULL;
151
152                 arph = arp_hdr(skb);
153                 *type = IPVL_ARP;
154                 lyr3h = arph;
155                 break;
156         }
157         case htons(ETH_P_IP): {
158                 u32 pktlen;
159                 struct iphdr *ip4h;
160
161                 if (unlikely(!pskb_may_pull(skb, sizeof(*ip4h))))
162                         return NULL;
163
164                 ip4h = ip_hdr(skb);
165                 pktlen = ntohs(ip4h->tot_len);
166                 if (ip4h->ihl < 5 || ip4h->version != 4)
167                         return NULL;
168                 if (skb->len < pktlen || pktlen < (ip4h->ihl * 4))
169                         return NULL;
170
171                 *type = IPVL_IPV4;
172                 lyr3h = ip4h;
173                 break;
174         }
175 #if IS_ENABLED(CONFIG_IPV6)
176         case htons(ETH_P_IPV6): {
177                 struct ipv6hdr *ip6h;
178
179                 if (unlikely(!pskb_may_pull(skb, sizeof(*ip6h))))
180                         return NULL;
181
182                 ip6h = ipv6_hdr(skb);
183                 if (ip6h->version != 6)
184                         return NULL;
185
186                 *type = IPVL_IPV6;
187                 lyr3h = ip6h;
188                 /* Only Neighbour Solicitation pkts need different treatment */
189                 if (ipv6_addr_any(&ip6h->saddr) &&
190                     ip6h->nexthdr == NEXTHDR_ICMP) {
191                         struct icmp6hdr *icmph;
192
193                         if (unlikely(!pskb_may_pull(skb, sizeof(*ip6h) + sizeof(*icmph))))
194                                 return NULL;
195
196                         ip6h = ipv6_hdr(skb);
197                         icmph = (struct icmp6hdr *)(ip6h + 1);
198
199                         if (icmph->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION) {
200                                 /* Need to access the ipv6 address in body */
201                                 if (unlikely(!pskb_may_pull(skb, sizeof(*ip6h) + sizeof(*icmph)
202                                                 + sizeof(struct in6_addr))))
203                                         return NULL;
204
205                                 ip6h = ipv6_hdr(skb);
206                                 icmph = (struct icmp6hdr *)(ip6h + 1);
207                         }
208
209                         *type = IPVL_ICMPV6;
210                         lyr3h = icmph;
211                 }
212                 break;
213         }
214 #endif
215         default:
216                 return NULL;
217         }
218
219         return lyr3h;
220 }
221
222 unsigned int ipvlan_mac_hash(const unsigned char *addr)
223 {
224         u32 hash = jhash_1word(__get_unaligned_cpu32(addr+2),
225                                ipvlan_jhash_secret);
226
227         return hash & IPVLAN_MAC_FILTER_MASK;
228 }
229
230 void ipvlan_process_multicast(struct work_struct *work)
231 {
232         struct ipvl_port *port = container_of(work, struct ipvl_port, wq);
233         struct ethhdr *ethh;
234         struct ipvl_dev *ipvlan;
235         struct sk_buff *skb, *nskb;
236         struct sk_buff_head list;
237         unsigned int len;
238         unsigned int mac_hash;
239         int ret;
240         u8 pkt_type;
241         bool tx_pkt;
242
243         __skb_queue_head_init(&list);
244
245         spin_lock_bh(&port->backlog.lock);
246         skb_queue_splice_tail_init(&port->backlog, &list);
247         spin_unlock_bh(&port->backlog.lock);
248
249         while ((skb = __skb_dequeue(&list)) != NULL) {
250                 struct net_device *dev = skb->dev;
251                 bool consumed = false;
252
253                 ethh = eth_hdr(skb);
254                 tx_pkt = IPVL_SKB_CB(skb)->tx_pkt;
255                 mac_hash = ipvlan_mac_hash(ethh->h_dest);
256
257                 if (ether_addr_equal(ethh->h_dest, port->dev->broadcast))
258                         pkt_type = PACKET_BROADCAST;
259                 else
260                         pkt_type = PACKET_MULTICAST;
261
262                 rcu_read_lock();
263                 list_for_each_entry_rcu(ipvlan, &port->ipvlans, pnode) {
264                         if (tx_pkt && (ipvlan->dev == skb->dev))
265                                 continue;
266                         if (!test_bit(mac_hash, ipvlan->mac_filters))
267                                 continue;
268                         if (!(ipvlan->dev->flags & IFF_UP))
269                                 continue;
270                         ret = NET_RX_DROP;
271                         len = skb->len + ETH_HLEN;
272                         nskb = skb_clone(skb, GFP_ATOMIC);
273                         local_bh_disable();
274                         if (nskb) {
275                                 consumed = true;
276                                 nskb->pkt_type = pkt_type;
277                                 nskb->dev = ipvlan->dev;
278                                 if (tx_pkt)
279                                         ret = dev_forward_skb(ipvlan->dev, nskb);
280                                 else
281                                         ret = netif_rx(nskb);
282                         }
283                         ipvlan_count_rx(ipvlan, len, ret == NET_RX_SUCCESS, true);
284                         local_bh_enable();
285                 }
286                 rcu_read_unlock();
287
288                 if (tx_pkt) {
289                         /* If the packet originated here, send it out. */
290                         skb->dev = port->dev;
291                         skb->pkt_type = pkt_type;
292                         dev_queue_xmit(skb);
293                 } else {
294                         if (consumed)
295                                 consume_skb(skb);
296                         else
297                                 kfree_skb(skb);
298                 }
299                 if (dev)
300                         dev_put(dev);
301                 cond_resched();
302         }
303 }
304
305 static void ipvlan_skb_crossing_ns(struct sk_buff *skb, struct net_device *dev)
306 {
307         bool xnet = true;
308
309         if (dev)
310                 xnet = !net_eq(dev_net(skb->dev), dev_net(dev));
311
312         skb_scrub_packet(skb, xnet);
313         if (dev)
314                 skb->dev = dev;
315 }
316
317 static int ipvlan_rcv_frame(struct ipvl_addr *addr, struct sk_buff **pskb,
318                             bool local)
319 {
320         struct ipvl_dev *ipvlan = addr->master;
321         struct net_device *dev = ipvlan->dev;
322         unsigned int len;
323         rx_handler_result_t ret = RX_HANDLER_CONSUMED;
324         bool success = false;
325         struct sk_buff *skb = *pskb;
326
327         len = skb->len + ETH_HLEN;
328         /* Only packets exchanged between two local slaves need to have
329          * device-up check as well as skb-share check.
330          */
331         if (local) {
332                 if (unlikely(!(dev->flags & IFF_UP))) {
333                         kfree_skb(skb);
334                         goto out;
335                 }
336
337                 skb = skb_share_check(skb, GFP_ATOMIC);
338                 if (!skb)
339                         goto out;
340
341                 *pskb = skb;
342         }
343
344         if (local) {
345                 skb->pkt_type = PACKET_HOST;
346                 if (dev_forward_skb(ipvlan->dev, skb) == NET_RX_SUCCESS)
347                         success = true;
348         } else {
349                 skb->dev = dev;
350                 ret = RX_HANDLER_ANOTHER;
351                 success = true;
352         }
353
354 out:
355         ipvlan_count_rx(ipvlan, len, success, false);
356         return ret;
357 }
358
359 static struct ipvl_addr *ipvlan_addr_lookup(struct ipvl_port *port,
360                                             void *lyr3h, int addr_type,
361                                             bool use_dest)
362 {
363         struct ipvl_addr *addr = NULL;
364
365         switch (addr_type) {
366 #if IS_ENABLED(CONFIG_IPV6)
367         case IPVL_IPV6: {
368                 struct ipv6hdr *ip6h;
369                 struct in6_addr *i6addr;
370
371                 ip6h = (struct ipv6hdr *)lyr3h;
372                 i6addr = use_dest ? &ip6h->daddr : &ip6h->saddr;
373                 addr = ipvlan_ht_addr_lookup(port, i6addr, true);
374                 break;
375         }
376         case IPVL_ICMPV6: {
377                 struct nd_msg *ndmh;
378                 struct in6_addr *i6addr;
379
380                 /* Make sure that the NeighborSolicitation ICMPv6 packets
381                  * are handled to avoid DAD issue.
382                  */
383                 ndmh = (struct nd_msg *)lyr3h;
384                 if (ndmh->icmph.icmp6_type == NDISC_NEIGHBOUR_SOLICITATION) {
385                         i6addr = &ndmh->target;
386                         addr = ipvlan_ht_addr_lookup(port, i6addr, true);
387                 }
388                 break;
389         }
390 #endif
391         case IPVL_IPV4: {
392                 struct iphdr *ip4h;
393                 __be32 *i4addr;
394
395                 ip4h = (struct iphdr *)lyr3h;
396                 i4addr = use_dest ? &ip4h->daddr : &ip4h->saddr;
397                 addr = ipvlan_ht_addr_lookup(port, i4addr, false);
398                 break;
399         }
400         case IPVL_ARP: {
401                 struct arphdr *arph;
402                 unsigned char *arp_ptr;
403                 __be32 dip;
404
405                 arph = (struct arphdr *)lyr3h;
406                 arp_ptr = (unsigned char *)(arph + 1);
407                 if (use_dest)
408                         arp_ptr += (2 * port->dev->addr_len) + 4;
409                 else
410                         arp_ptr += port->dev->addr_len;
411
412                 memcpy(&dip, arp_ptr, 4);
413                 addr = ipvlan_ht_addr_lookup(port, &dip, false);
414                 break;
415         }
416         }
417
418         return addr;
419 }
420
421 static int ipvlan_process_v4_outbound(struct sk_buff *skb)
422 {
423         const struct iphdr *ip4h = ip_hdr(skb);
424         struct net_device *dev = skb->dev;
425         struct net *net = dev_net(dev);
426         struct rtable *rt;
427         int err, ret = NET_XMIT_DROP;
428         struct flowi4 fl4 = {
429                 .flowi4_oif = dev->ifindex,
430                 .flowi4_tos = RT_TOS(ip4h->tos),
431                 .flowi4_flags = FLOWI_FLAG_ANYSRC,
432                 .flowi4_mark = skb->mark,
433                 .daddr = ip4h->daddr,
434                 .saddr = ip4h->saddr,
435         };
436
437         rt = ip_route_output_flow(net, &fl4, NULL);
438         if (IS_ERR(rt))
439                 goto err;
440
441         if (rt->rt_type != RTN_UNICAST && rt->rt_type != RTN_LOCAL) {
442                 ip_rt_put(rt);
443                 goto err;
444         }
445         skb_dst_set(skb, &rt->dst);
446         err = ip_local_out(net, skb->sk, skb);
447         if (unlikely(net_xmit_eval(err)))
448                 dev->stats.tx_errors++;
449         else
450                 ret = NET_XMIT_SUCCESS;
451         goto out;
452 err:
453         dev->stats.tx_errors++;
454         kfree_skb(skb);
455 out:
456         return ret;
457 }
458
459 #if IS_ENABLED(CONFIG_IPV6)
460 static int ipvlan_process_v6_outbound(struct sk_buff *skb)
461 {
462         const struct ipv6hdr *ip6h = ipv6_hdr(skb);
463         struct net_device *dev = skb->dev;
464         struct net *net = dev_net(dev);
465         struct dst_entry *dst;
466         int err, ret = NET_XMIT_DROP;
467         struct flowi6 fl6 = {
468                 .flowi6_oif = dev->ifindex,
469                 .daddr = ip6h->daddr,
470                 .saddr = ip6h->saddr,
471                 .flowi6_flags = FLOWI_FLAG_ANYSRC,
472                 .flowlabel = ip6_flowinfo(ip6h),
473                 .flowi6_mark = skb->mark,
474                 .flowi6_proto = ip6h->nexthdr,
475         };
476
477         dst = ip6_route_output(net, NULL, &fl6);
478         if (dst->error) {
479                 ret = dst->error;
480                 dst_release(dst);
481                 goto err;
482         }
483         skb_dst_set(skb, dst);
484         err = ip6_local_out(net, skb->sk, skb);
485         if (unlikely(net_xmit_eval(err)))
486                 dev->stats.tx_errors++;
487         else
488                 ret = NET_XMIT_SUCCESS;
489         goto out;
490 err:
491         dev->stats.tx_errors++;
492         kfree_skb(skb);
493 out:
494         return ret;
495 }
496 #else
497 static int ipvlan_process_v6_outbound(struct sk_buff *skb)
498 {
499         return NET_XMIT_DROP;
500 }
501 #endif
502
503 static int ipvlan_process_outbound(struct sk_buff *skb)
504 {
505         struct ethhdr *ethh = eth_hdr(skb);
506         int ret = NET_XMIT_DROP;
507
508         /* The ipvlan is a pseudo-L2 device, so the packets that we receive
509          * will have L2; which need to discarded and processed further
510          * in the net-ns of the main-device.
511          */
512         if (skb_mac_header_was_set(skb)) {
513                 /* In this mode we dont care about
514                  * multicast and broadcast traffic */
515                 if (is_multicast_ether_addr(ethh->h_dest)) {
516                         pr_debug_ratelimited(
517                                 "Dropped {multi|broad}cast of type=[%x]\n",
518                                 ntohs(skb->protocol));
519                         kfree_skb(skb);
520                         goto out;
521                 }
522
523                 skb_pull(skb, sizeof(*ethh));
524                 skb->mac_header = (typeof(skb->mac_header))~0U;
525                 skb_reset_network_header(skb);
526         }
527
528         if (skb->protocol == htons(ETH_P_IPV6))
529                 ret = ipvlan_process_v6_outbound(skb);
530         else if (skb->protocol == htons(ETH_P_IP))
531                 ret = ipvlan_process_v4_outbound(skb);
532         else {
533                 pr_warn_ratelimited("Dropped outbound packet type=%x\n",
534                                     ntohs(skb->protocol));
535                 kfree_skb(skb);
536         }
537 out:
538         return ret;
539 }
540
541 static void ipvlan_multicast_enqueue(struct ipvl_port *port,
542                                      struct sk_buff *skb, bool tx_pkt)
543 {
544         if (skb->protocol == htons(ETH_P_PAUSE)) {
545                 kfree_skb(skb);
546                 return;
547         }
548
549         /* Record that the deferred packet is from TX or RX path. By
550          * looking at mac-addresses on packet will lead to erronus decisions.
551          * (This would be true for a loopback-mode on master device or a
552          * hair-pin mode of the switch.)
553          */
554         IPVL_SKB_CB(skb)->tx_pkt = tx_pkt;
555
556         spin_lock(&port->backlog.lock);
557         if (skb_queue_len(&port->backlog) < IPVLAN_QBACKLOG_LIMIT) {
558                 if (skb->dev)
559                         dev_hold(skb->dev);
560                 __skb_queue_tail(&port->backlog, skb);
561                 spin_unlock(&port->backlog.lock);
562                 schedule_work(&port->wq);
563         } else {
564                 spin_unlock(&port->backlog.lock);
565                 atomic_long_inc(&skb->dev->rx_dropped);
566                 kfree_skb(skb);
567         }
568 }
569
570 static int ipvlan_xmit_mode_l3(struct sk_buff *skb, struct net_device *dev)
571 {
572         const struct ipvl_dev *ipvlan = netdev_priv(dev);
573         void *lyr3h;
574         struct ipvl_addr *addr;
575         int addr_type;
576
577         lyr3h = ipvlan_get_L3_hdr(ipvlan->port, skb, &addr_type);
578         if (!lyr3h)
579                 goto out;
580
581         if (!ipvlan_is_vepa(ipvlan->port)) {
582                 addr = ipvlan_addr_lookup(ipvlan->port, lyr3h, addr_type, true);
583                 if (addr) {
584                         if (ipvlan_is_private(ipvlan->port)) {
585                                 consume_skb(skb);
586                                 return NET_XMIT_DROP;
587                         }
588                         return ipvlan_rcv_frame(addr, &skb, true);
589                 }
590         }
591 out:
592         ipvlan_skb_crossing_ns(skb, ipvlan->phy_dev);
593         return ipvlan_process_outbound(skb);
594 }
595
596 static int ipvlan_xmit_mode_l2(struct sk_buff *skb, struct net_device *dev)
597 {
598         const struct ipvl_dev *ipvlan = netdev_priv(dev);
599         struct ethhdr *eth = eth_hdr(skb);
600         struct ipvl_addr *addr;
601         void *lyr3h;
602         int addr_type;
603
604         if (!ipvlan_is_vepa(ipvlan->port) &&
605             ether_addr_equal(eth->h_dest, eth->h_source)) {
606                 lyr3h = ipvlan_get_L3_hdr(ipvlan->port, skb, &addr_type);
607                 if (lyr3h) {
608                         addr = ipvlan_addr_lookup(ipvlan->port, lyr3h, addr_type, true);
609                         if (addr) {
610                                 if (ipvlan_is_private(ipvlan->port)) {
611                                         consume_skb(skb);
612                                         return NET_XMIT_DROP;
613                                 }
614                                 return ipvlan_rcv_frame(addr, &skb, true);
615                         }
616                 }
617                 skb = skb_share_check(skb, GFP_ATOMIC);
618                 if (!skb)
619                         return NET_XMIT_DROP;
620
621                 /* Packet definitely does not belong to any of the
622                  * virtual devices, but the dest is local. So forward
623                  * the skb for the main-dev. At the RX side we just return
624                  * RX_PASS for it to be processed further on the stack.
625                  */
626                 return dev_forward_skb(ipvlan->phy_dev, skb);
627
628         } else if (is_multicast_ether_addr(eth->h_dest)) {
629                 ipvlan_skb_crossing_ns(skb, NULL);
630                 ipvlan_multicast_enqueue(ipvlan->port, skb, true);
631                 return NET_XMIT_SUCCESS;
632         }
633
634         skb->dev = ipvlan->phy_dev;
635         return dev_queue_xmit(skb);
636 }
637
638 int ipvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev)
639 {
640         struct ipvl_dev *ipvlan = netdev_priv(dev);
641         struct ipvl_port *port = ipvlan_port_get_rcu_bh(ipvlan->phy_dev);
642
643         if (!port)
644                 goto out;
645
646         if (unlikely(!pskb_may_pull(skb, sizeof(struct ethhdr))))
647                 goto out;
648
649         switch(port->mode) {
650         case IPVLAN_MODE_L2:
651                 return ipvlan_xmit_mode_l2(skb, dev);
652         case IPVLAN_MODE_L3:
653         case IPVLAN_MODE_L3S:
654                 return ipvlan_xmit_mode_l3(skb, dev);
655         }
656
657         /* Should not reach here */
658         WARN_ONCE(true, "ipvlan_queue_xmit() called for mode = [%hx]\n",
659                           port->mode);
660 out:
661         kfree_skb(skb);
662         return NET_XMIT_DROP;
663 }
664
665 static bool ipvlan_external_frame(struct sk_buff *skb, struct ipvl_port *port)
666 {
667         struct ethhdr *eth = eth_hdr(skb);
668         struct ipvl_addr *addr;
669         void *lyr3h;
670         int addr_type;
671
672         if (ether_addr_equal(eth->h_source, skb->dev->dev_addr)) {
673                 lyr3h = ipvlan_get_L3_hdr(port, skb, &addr_type);
674                 if (!lyr3h)
675                         return true;
676
677                 addr = ipvlan_addr_lookup(port, lyr3h, addr_type, false);
678                 if (addr)
679                         return false;
680         }
681
682         return true;
683 }
684
685 static rx_handler_result_t ipvlan_handle_mode_l3(struct sk_buff **pskb,
686                                                  struct ipvl_port *port)
687 {
688         void *lyr3h;
689         int addr_type;
690         struct ipvl_addr *addr;
691         struct sk_buff *skb = *pskb;
692         rx_handler_result_t ret = RX_HANDLER_PASS;
693
694         lyr3h = ipvlan_get_L3_hdr(port, skb, &addr_type);
695         if (!lyr3h)
696                 goto out;
697
698         addr = ipvlan_addr_lookup(port, lyr3h, addr_type, true);
699         if (addr)
700                 ret = ipvlan_rcv_frame(addr, pskb, false);
701
702 out:
703         return ret;
704 }
705
706 static rx_handler_result_t ipvlan_handle_mode_l2(struct sk_buff **pskb,
707                                                  struct ipvl_port *port)
708 {
709         struct sk_buff *skb = *pskb;
710         struct ethhdr *eth = eth_hdr(skb);
711         rx_handler_result_t ret = RX_HANDLER_PASS;
712
713         if (is_multicast_ether_addr(eth->h_dest)) {
714                 if (ipvlan_external_frame(skb, port)) {
715                         struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
716
717                         /* External frames are queued for device local
718                          * distribution, but a copy is given to master
719                          * straight away to avoid sending duplicates later
720                          * when work-queue processes this frame. This is
721                          * achieved by returning RX_HANDLER_PASS.
722                          */
723                         if (nskb) {
724                                 ipvlan_skb_crossing_ns(nskb, NULL);
725                                 ipvlan_multicast_enqueue(port, nskb, false);
726                         }
727                 }
728         } else {
729                 /* Perform like l3 mode for non-multicast packet */
730                 ret = ipvlan_handle_mode_l3(pskb, port);
731         }
732
733         return ret;
734 }
735
736 rx_handler_result_t ipvlan_handle_frame(struct sk_buff **pskb)
737 {
738         struct sk_buff *skb = *pskb;
739         struct ipvl_port *port = ipvlan_port_get_rcu(skb->dev);
740
741         if (!port)
742                 return RX_HANDLER_PASS;
743
744         switch (port->mode) {
745         case IPVLAN_MODE_L2:
746                 return ipvlan_handle_mode_l2(pskb, port);
747         case IPVLAN_MODE_L3:
748                 return ipvlan_handle_mode_l3(pskb, port);
749         case IPVLAN_MODE_L3S:
750                 return RX_HANDLER_PASS;
751         }
752
753         /* Should not reach here */
754         WARN_ONCE(true, "ipvlan_handle_frame() called for mode = [%hx]\n",
755                           port->mode);
756         kfree_skb(skb);
757         return RX_HANDLER_CONSUMED;
758 }
759
760 static struct ipvl_addr *ipvlan_skb_to_addr(struct sk_buff *skb,
761                                             struct net_device *dev)
762 {
763         struct ipvl_addr *addr = NULL;
764         struct ipvl_port *port;
765         void *lyr3h;
766         int addr_type;
767
768         if (!dev || !netif_is_ipvlan_port(dev))
769                 goto out;
770
771         port = ipvlan_port_get_rcu(dev);
772         if (!port || port->mode != IPVLAN_MODE_L3S)
773                 goto out;
774
775         lyr3h = ipvlan_get_L3_hdr(port, skb, &addr_type);
776         if (!lyr3h)
777                 goto out;
778
779         addr = ipvlan_addr_lookup(port, lyr3h, addr_type, true);
780 out:
781         return addr;
782 }
783
784 struct sk_buff *ipvlan_l3_rcv(struct net_device *dev, struct sk_buff *skb,
785                               u16 proto)
786 {
787         struct ipvl_addr *addr;
788         struct net_device *sdev;
789
790         addr = ipvlan_skb_to_addr(skb, dev);
791         if (!addr)
792                 goto out;
793
794         sdev = addr->master->dev;
795         switch (proto) {
796         case AF_INET:
797         {
798                 int err;
799                 struct iphdr *ip4h = ip_hdr(skb);
800
801                 err = ip_route_input_noref(skb, ip4h->daddr, ip4h->saddr,
802                                            ip4h->tos, sdev);
803                 if (unlikely(err))
804                         goto out;
805                 break;
806         }
807 #if IS_ENABLED(CONFIG_IPV6)
808         case AF_INET6:
809         {
810                 struct dst_entry *dst;
811                 struct ipv6hdr *ip6h = ipv6_hdr(skb);
812                 int flags = RT6_LOOKUP_F_HAS_SADDR;
813                 struct flowi6 fl6 = {
814                         .flowi6_iif   = sdev->ifindex,
815                         .daddr        = ip6h->daddr,
816                         .saddr        = ip6h->saddr,
817                         .flowlabel    = ip6_flowinfo(ip6h),
818                         .flowi6_mark  = skb->mark,
819                         .flowi6_proto = ip6h->nexthdr,
820                 };
821
822                 skb_dst_drop(skb);
823                 dst = ip6_route_input_lookup(dev_net(sdev), sdev, &fl6,
824                                              skb, flags);
825                 skb_dst_set(skb, dst);
826                 break;
827         }
828 #endif
829         default:
830                 break;
831         }
832
833 out:
834         return skb;
835 }
836
837 unsigned int ipvlan_nf_input(void *priv, struct sk_buff *skb,
838                              const struct nf_hook_state *state)
839 {
840         struct ipvl_addr *addr;
841         unsigned int len;
842
843         addr = ipvlan_skb_to_addr(skb, skb->dev);
844         if (!addr)
845                 goto out;
846
847         skb->dev = addr->master->dev;
848         len = skb->len + ETH_HLEN;
849         ipvlan_count_rx(addr->master, len, true, false);
850 out:
851         return NF_ACCEPT;
852 }