1 // SPDX-License-Identifier: GPL-2.0-only
3 * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
5 * Development of this code funded by Astaro AG (http://www.astaro.com/)
8 #include <asm/unaligned.h>
9 #include <linux/kernel.h>
10 #include <linux/netlink.h>
11 #include <linux/netfilter.h>
12 #include <linux/netfilter/nf_tables.h>
13 #include <linux/dccp.h>
14 #include <linux/sctp.h>
15 #include <net/netfilter/nf_tables_core.h>
16 #include <net/netfilter/nf_tables.h>
17 #include <net/sctp/sctp.h>
30 static unsigned int optlen(const u8 *opt, unsigned int offset)
32 /* Beware zero-length options: make finite progress */
33 if (opt[offset] <= TCPOPT_NOP || opt[offset + 1] == 0)
36 return opt[offset + 1];
39 static int nft_skb_copy_to_reg(const struct sk_buff *skb, int offset, u32 *dest, unsigned int len)
41 if (len % NFT_REG32_SIZE)
42 dest[len / NFT_REG32_SIZE] = 0;
44 return skb_copy_bits(skb, offset, dest, len);
47 static void nft_exthdr_ipv6_eval(const struct nft_expr *expr,
48 struct nft_regs *regs,
49 const struct nft_pktinfo *pkt)
51 struct nft_exthdr *priv = nft_expr_priv(expr);
52 u32 *dest = ®s->data[priv->dreg];
53 unsigned int offset = 0;
56 if (pkt->skb->protocol != htons(ETH_P_IPV6))
59 err = ipv6_find_hdr(pkt->skb, &offset, priv->type, NULL, NULL);
60 if (priv->flags & NFT_EXTHDR_F_PRESENT) {
61 nft_reg_store8(dest, err >= 0);
66 offset += priv->offset;
68 if (nft_skb_copy_to_reg(pkt->skb, offset, dest, priv->len) < 0)
72 regs->verdict.code = NFT_BREAK;
75 /* find the offset to specified option.
77 * If target header is found, its offset is set in *offset and return option
78 * number. Otherwise, return negative error.
80 * If the first fragment doesn't contain the End of Options it is considered
83 static int ipv4_find_option(struct net *net, struct sk_buff *skb,
84 unsigned int *offset, int target)
86 unsigned char optbuf[sizeof(struct ip_options) + 40];
87 struct ip_options *opt = (struct ip_options *)optbuf;
88 struct iphdr *iph, _iph;
94 iph = skb_header_pointer(skb, 0, sizeof(_iph), &_iph);
97 start = sizeof(struct iphdr);
99 optlen = iph->ihl * 4 - (int)sizeof(struct iphdr);
103 memset(opt, 0, sizeof(struct ip_options));
104 /* Copy the options since __ip_options_compile() modifies
107 if (skb_copy_bits(skb, start, opt->__data, optlen))
109 opt->optlen = optlen;
111 if (__ip_options_compile(net, opt, NULL, &info))
119 found = target == IPOPT_SSRR ? opt->is_strictroute :
120 !opt->is_strictroute;
122 *offset = opt->srr + start;
127 *offset = opt->rr + start;
131 if (!opt->router_alert)
133 *offset = opt->router_alert + start;
139 return found ? target : -ENOENT;
142 static void nft_exthdr_ipv4_eval(const struct nft_expr *expr,
143 struct nft_regs *regs,
144 const struct nft_pktinfo *pkt)
146 struct nft_exthdr *priv = nft_expr_priv(expr);
147 u32 *dest = ®s->data[priv->dreg];
148 struct sk_buff *skb = pkt->skb;
152 if (skb->protocol != htons(ETH_P_IP))
155 err = ipv4_find_option(nft_net(pkt), skb, &offset, priv->type);
156 if (priv->flags & NFT_EXTHDR_F_PRESENT) {
157 nft_reg_store8(dest, err >= 0);
159 } else if (err < 0) {
162 offset += priv->offset;
164 if (nft_skb_copy_to_reg(pkt->skb, offset, dest, priv->len) < 0)
168 regs->verdict.code = NFT_BREAK;
172 nft_tcp_header_pointer(const struct nft_pktinfo *pkt,
173 unsigned int len, void *buffer, unsigned int *tcphdr_len)
177 if (pkt->tprot != IPPROTO_TCP || pkt->fragoff)
180 tcph = skb_header_pointer(pkt->skb, nft_thoff(pkt), sizeof(*tcph), buffer);
184 *tcphdr_len = __tcp_hdrlen(tcph);
185 if (*tcphdr_len < sizeof(*tcph) || *tcphdr_len > len)
188 return skb_header_pointer(pkt->skb, nft_thoff(pkt), *tcphdr_len, buffer);
191 static void nft_exthdr_tcp_eval(const struct nft_expr *expr,
192 struct nft_regs *regs,
193 const struct nft_pktinfo *pkt)
195 u8 buff[sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE];
196 struct nft_exthdr *priv = nft_expr_priv(expr);
197 unsigned int i, optl, tcphdr_len, offset;
198 u32 *dest = ®s->data[priv->dreg];
202 tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff, &tcphdr_len);
207 for (i = sizeof(*tcph); i < tcphdr_len - 1; i += optl) {
208 optl = optlen(opt, i);
210 if (priv->type != opt[i])
213 if (i + optl > tcphdr_len || priv->len + priv->offset > optl)
216 offset = i + priv->offset;
217 if (priv->flags & NFT_EXTHDR_F_PRESENT) {
218 nft_reg_store8(dest, 1);
220 if (priv->len % NFT_REG32_SIZE)
221 dest[priv->len / NFT_REG32_SIZE] = 0;
222 memcpy(dest, opt + offset, priv->len);
229 if (priv->flags & NFT_EXTHDR_F_PRESENT)
232 regs->verdict.code = NFT_BREAK;
235 static void nft_exthdr_tcp_set_eval(const struct nft_expr *expr,
236 struct nft_regs *regs,
237 const struct nft_pktinfo *pkt)
239 u8 buff[sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE];
240 struct nft_exthdr *priv = nft_expr_priv(expr);
241 unsigned int i, optl, tcphdr_len, offset;
245 tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff, &tcphdr_len);
249 if (skb_ensure_writable(pkt->skb, nft_thoff(pkt) + tcphdr_len))
252 tcph = (struct tcphdr *)(pkt->skb->data + nft_thoff(pkt));
255 for (i = sizeof(*tcph); i < tcphdr_len - 1; i += optl) {
261 optl = optlen(opt, i);
263 if (priv->type != opt[i])
266 if (i + optl > tcphdr_len || priv->len + priv->offset > optl)
269 offset = i + priv->offset;
273 old.v16 = (__force __be16)get_unaligned((u16 *)(opt + offset));
274 new.v16 = (__force __be16)nft_reg_load16(
275 ®s->data[priv->sreg]);
277 switch (priv->type) {
279 /* increase can cause connection to stall */
280 if (ntohs(old.v16) <= ntohs(new.v16))
285 if (old.v16 == new.v16)
288 put_unaligned(new.v16, (__be16*)(opt + offset));
289 inet_proto_csum_replace2(&tcph->check, pkt->skb,
290 old.v16, new.v16, false);
293 new.v32 = nft_reg_load_be32(®s->data[priv->sreg]);
294 old.v32 = (__force __be32)get_unaligned((u32 *)(opt + offset));
296 if (old.v32 == new.v32)
299 put_unaligned(new.v32, (__be32*)(opt + offset));
300 inet_proto_csum_replace4(&tcph->check, pkt->skb,
301 old.v32, new.v32, false);
312 regs->verdict.code = NFT_BREAK;
315 static void nft_exthdr_tcp_strip_eval(const struct nft_expr *expr,
316 struct nft_regs *regs,
317 const struct nft_pktinfo *pkt)
319 u8 buff[sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE];
320 struct nft_exthdr *priv = nft_expr_priv(expr);
321 unsigned int i, tcphdr_len, optl;
325 tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff, &tcphdr_len);
329 if (skb_ensure_writable(pkt->skb, nft_thoff(pkt) + tcphdr_len))
332 tcph = (struct tcphdr *)(pkt->skb->data + nft_thoff(pkt));
335 for (i = sizeof(*tcph); i < tcphdr_len - 1; i += optl) {
338 optl = optlen(opt, i);
339 if (priv->type != opt[i])
342 if (i + optl > tcphdr_len)
345 for (j = 0; j < optl; ++j) {
349 if ((i + j) % 2 == 0) {
353 inet_proto_csum_replace2(&tcph->check, pkt->skb, htons(o),
356 memset(opt + i, TCPOPT_NOP, optl);
360 /* option not found, continue. This allows to do multiple
361 * option removals per rule.
365 regs->verdict.code = NFT_BREAK;
368 /* can't remove, no choice but to drop */
369 regs->verdict.code = NF_DROP;
372 static void nft_exthdr_sctp_eval(const struct nft_expr *expr,
373 struct nft_regs *regs,
374 const struct nft_pktinfo *pkt)
376 unsigned int offset = nft_thoff(pkt) + sizeof(struct sctphdr);
377 struct nft_exthdr *priv = nft_expr_priv(expr);
378 u32 *dest = ®s->data[priv->dreg];
379 const struct sctp_chunkhdr *sch;
380 struct sctp_chunkhdr _sch;
382 if (pkt->tprot != IPPROTO_SCTP)
386 sch = skb_header_pointer(pkt->skb, offset, sizeof(_sch), &_sch);
387 if (!sch || !sch->length)
390 if (sch->type == priv->type) {
391 if (priv->flags & NFT_EXTHDR_F_PRESENT) {
392 nft_reg_store8(dest, true);
395 if (priv->offset + priv->len > ntohs(sch->length) ||
396 offset + ntohs(sch->length) > pkt->skb->len)
399 if (nft_skb_copy_to_reg(pkt->skb, offset + priv->offset,
400 dest, priv->len) < 0)
404 offset += SCTP_PAD4(ntohs(sch->length));
405 } while (offset < pkt->skb->len);
407 if (priv->flags & NFT_EXTHDR_F_PRESENT)
408 nft_reg_store8(dest, false);
410 regs->verdict.code = NFT_BREAK;
413 static void nft_exthdr_dccp_eval(const struct nft_expr *expr,
414 struct nft_regs *regs,
415 const struct nft_pktinfo *pkt)
417 struct nft_exthdr *priv = nft_expr_priv(expr);
418 unsigned int thoff, dataoff, optoff, optlen, i;
419 u32 *dest = ®s->data[priv->dreg];
420 const struct dccp_hdr *dh;
423 if (pkt->tprot != IPPROTO_DCCP || pkt->fragoff)
426 thoff = nft_thoff(pkt);
428 dh = skb_header_pointer(pkt->skb, thoff, sizeof(_dh), &_dh);
432 dataoff = dh->dccph_doff * sizeof(u32);
433 optoff = __dccp_hdr_len(dh);
434 if (dataoff <= optoff)
437 optlen = dataoff - optoff;
439 for (i = 0; i < optlen; ) {
440 /* Options 0 (DCCPO_PADDING) - 31 (DCCPO_MAX_RESERVED) are 1B in
441 * the length; the remaining options are at least 2B long. In
442 * all cases, the first byte contains the option type. In
443 * multi-byte options, the second byte contains the option
444 * length, which must be at least two: 1 for the type plus 1 for
445 * the length plus 0-253 for any following option data. We
446 * aren't interested in the option data, only the type and the
447 * length, so we don't need to read more than two bytes at a
450 unsigned int buflen = optlen - i;
454 if (buflen > sizeof(buf))
455 buflen = sizeof(buf);
457 bufp = skb_header_pointer(pkt->skb, thoff + optoff + i, buflen,
464 if (type == priv->type) {
465 nft_reg_store8(dest, 1);
469 if (type <= DCCPO_MAX_RESERVED) {
489 static const struct nla_policy nft_exthdr_policy[NFTA_EXTHDR_MAX + 1] = {
490 [NFTA_EXTHDR_DREG] = { .type = NLA_U32 },
491 [NFTA_EXTHDR_TYPE] = { .type = NLA_U8 },
492 [NFTA_EXTHDR_OFFSET] = { .type = NLA_U32 },
493 [NFTA_EXTHDR_LEN] = { .type = NLA_U32 },
494 [NFTA_EXTHDR_FLAGS] = { .type = NLA_U32 },
495 [NFTA_EXTHDR_OP] = { .type = NLA_U32 },
496 [NFTA_EXTHDR_SREG] = { .type = NLA_U32 },
499 static int nft_exthdr_init(const struct nft_ctx *ctx,
500 const struct nft_expr *expr,
501 const struct nlattr * const tb[])
503 struct nft_exthdr *priv = nft_expr_priv(expr);
504 u32 offset, len, flags = 0, op = NFT_EXTHDR_OP_IPV6;
507 if (!tb[NFTA_EXTHDR_DREG] ||
508 !tb[NFTA_EXTHDR_TYPE] ||
509 !tb[NFTA_EXTHDR_OFFSET] ||
510 !tb[NFTA_EXTHDR_LEN])
513 err = nft_parse_u32_check(tb[NFTA_EXTHDR_OFFSET], U8_MAX, &offset);
517 err = nft_parse_u32_check(tb[NFTA_EXTHDR_LEN], U8_MAX, &len);
521 if (tb[NFTA_EXTHDR_FLAGS]) {
522 err = nft_parse_u32_check(tb[NFTA_EXTHDR_FLAGS], U8_MAX, &flags);
526 if (flags & ~NFT_EXTHDR_F_PRESENT)
530 if (tb[NFTA_EXTHDR_OP]) {
531 err = nft_parse_u32_check(tb[NFTA_EXTHDR_OP], U8_MAX, &op);
536 priv->type = nla_get_u8(tb[NFTA_EXTHDR_TYPE]);
537 priv->offset = offset;
542 return nft_parse_register_store(ctx, tb[NFTA_EXTHDR_DREG],
543 &priv->dreg, NULL, NFT_DATA_VALUE,
547 static int nft_exthdr_tcp_set_init(const struct nft_ctx *ctx,
548 const struct nft_expr *expr,
549 const struct nlattr * const tb[])
551 struct nft_exthdr *priv = nft_expr_priv(expr);
552 u32 offset, len, flags = 0, op = NFT_EXTHDR_OP_IPV6;
555 if (!tb[NFTA_EXTHDR_SREG] ||
556 !tb[NFTA_EXTHDR_TYPE] ||
557 !tb[NFTA_EXTHDR_OFFSET] ||
558 !tb[NFTA_EXTHDR_LEN])
561 if (tb[NFTA_EXTHDR_DREG] || tb[NFTA_EXTHDR_FLAGS])
564 err = nft_parse_u32_check(tb[NFTA_EXTHDR_OFFSET], U8_MAX, &offset);
568 err = nft_parse_u32_check(tb[NFTA_EXTHDR_LEN], U8_MAX, &len);
582 err = nft_parse_u32_check(tb[NFTA_EXTHDR_OP], U8_MAX, &op);
586 priv->type = nla_get_u8(tb[NFTA_EXTHDR_TYPE]);
587 priv->offset = offset;
592 return nft_parse_register_load(tb[NFTA_EXTHDR_SREG], &priv->sreg,
596 static int nft_exthdr_tcp_strip_init(const struct nft_ctx *ctx,
597 const struct nft_expr *expr,
598 const struct nlattr * const tb[])
600 struct nft_exthdr *priv = nft_expr_priv(expr);
602 if (tb[NFTA_EXTHDR_SREG] ||
603 tb[NFTA_EXTHDR_DREG] ||
604 tb[NFTA_EXTHDR_FLAGS] ||
605 tb[NFTA_EXTHDR_OFFSET] ||
609 if (!tb[NFTA_EXTHDR_TYPE])
612 priv->type = nla_get_u8(tb[NFTA_EXTHDR_TYPE]);
613 priv->op = NFT_EXTHDR_OP_TCPOPT;
618 static int nft_exthdr_ipv4_init(const struct nft_ctx *ctx,
619 const struct nft_expr *expr,
620 const struct nlattr * const tb[])
622 struct nft_exthdr *priv = nft_expr_priv(expr);
623 int err = nft_exthdr_init(ctx, expr, tb);
628 switch (priv->type) {
640 static int nft_exthdr_dccp_init(const struct nft_ctx *ctx,
641 const struct nft_expr *expr,
642 const struct nlattr * const tb[])
644 struct nft_exthdr *priv = nft_expr_priv(expr);
645 int err = nft_exthdr_init(ctx, expr, tb);
650 if (!(priv->flags & NFT_EXTHDR_F_PRESENT))
656 static int nft_exthdr_dump_common(struct sk_buff *skb, const struct nft_exthdr *priv)
658 if (nla_put_u8(skb, NFTA_EXTHDR_TYPE, priv->type))
659 goto nla_put_failure;
660 if (nla_put_be32(skb, NFTA_EXTHDR_OFFSET, htonl(priv->offset)))
661 goto nla_put_failure;
662 if (nla_put_be32(skb, NFTA_EXTHDR_LEN, htonl(priv->len)))
663 goto nla_put_failure;
664 if (nla_put_be32(skb, NFTA_EXTHDR_FLAGS, htonl(priv->flags)))
665 goto nla_put_failure;
666 if (nla_put_be32(skb, NFTA_EXTHDR_OP, htonl(priv->op)))
667 goto nla_put_failure;
674 static int nft_exthdr_dump(struct sk_buff *skb, const struct nft_expr *expr)
676 const struct nft_exthdr *priv = nft_expr_priv(expr);
678 if (nft_dump_register(skb, NFTA_EXTHDR_DREG, priv->dreg))
681 return nft_exthdr_dump_common(skb, priv);
684 static int nft_exthdr_dump_set(struct sk_buff *skb, const struct nft_expr *expr)
686 const struct nft_exthdr *priv = nft_expr_priv(expr);
688 if (nft_dump_register(skb, NFTA_EXTHDR_SREG, priv->sreg))
691 return nft_exthdr_dump_common(skb, priv);
694 static int nft_exthdr_dump_strip(struct sk_buff *skb, const struct nft_expr *expr)
696 const struct nft_exthdr *priv = nft_expr_priv(expr);
698 return nft_exthdr_dump_common(skb, priv);
701 static bool nft_exthdr_reduce(struct nft_regs_track *track,
702 const struct nft_expr *expr)
704 const struct nft_exthdr *priv = nft_expr_priv(expr);
705 const struct nft_exthdr *exthdr;
707 if (!nft_reg_track_cmp(track, expr, priv->dreg)) {
708 nft_reg_track_update(track, expr, priv->dreg, priv->len);
712 exthdr = nft_expr_priv(track->regs[priv->dreg].selector);
713 if (priv->type != exthdr->type ||
714 priv->op != exthdr->op ||
715 priv->flags != exthdr->flags ||
716 priv->offset != exthdr->offset ||
717 priv->len != exthdr->len) {
718 nft_reg_track_update(track, expr, priv->dreg, priv->len);
722 if (!track->regs[priv->dreg].bitwise)
725 return nft_expr_reduce_bitwise(track, expr);
728 static const struct nft_expr_ops nft_exthdr_ipv6_ops = {
729 .type = &nft_exthdr_type,
730 .size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)),
731 .eval = nft_exthdr_ipv6_eval,
732 .init = nft_exthdr_init,
733 .dump = nft_exthdr_dump,
734 .reduce = nft_exthdr_reduce,
737 static const struct nft_expr_ops nft_exthdr_ipv4_ops = {
738 .type = &nft_exthdr_type,
739 .size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)),
740 .eval = nft_exthdr_ipv4_eval,
741 .init = nft_exthdr_ipv4_init,
742 .dump = nft_exthdr_dump,
743 .reduce = nft_exthdr_reduce,
746 static const struct nft_expr_ops nft_exthdr_tcp_ops = {
747 .type = &nft_exthdr_type,
748 .size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)),
749 .eval = nft_exthdr_tcp_eval,
750 .init = nft_exthdr_init,
751 .dump = nft_exthdr_dump,
752 .reduce = nft_exthdr_reduce,
755 static const struct nft_expr_ops nft_exthdr_tcp_set_ops = {
756 .type = &nft_exthdr_type,
757 .size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)),
758 .eval = nft_exthdr_tcp_set_eval,
759 .init = nft_exthdr_tcp_set_init,
760 .dump = nft_exthdr_dump_set,
761 .reduce = NFT_REDUCE_READONLY,
764 static const struct nft_expr_ops nft_exthdr_tcp_strip_ops = {
765 .type = &nft_exthdr_type,
766 .size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)),
767 .eval = nft_exthdr_tcp_strip_eval,
768 .init = nft_exthdr_tcp_strip_init,
769 .dump = nft_exthdr_dump_strip,
770 .reduce = NFT_REDUCE_READONLY,
773 static const struct nft_expr_ops nft_exthdr_sctp_ops = {
774 .type = &nft_exthdr_type,
775 .size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)),
776 .eval = nft_exthdr_sctp_eval,
777 .init = nft_exthdr_init,
778 .dump = nft_exthdr_dump,
779 .reduce = nft_exthdr_reduce,
782 static const struct nft_expr_ops nft_exthdr_dccp_ops = {
783 .type = &nft_exthdr_type,
784 .size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)),
785 .eval = nft_exthdr_dccp_eval,
786 .init = nft_exthdr_dccp_init,
787 .dump = nft_exthdr_dump,
788 .reduce = nft_exthdr_reduce,
791 static const struct nft_expr_ops *
792 nft_exthdr_select_ops(const struct nft_ctx *ctx,
793 const struct nlattr * const tb[])
797 if (!tb[NFTA_EXTHDR_OP])
798 return &nft_exthdr_ipv6_ops;
800 if (tb[NFTA_EXTHDR_SREG] && tb[NFTA_EXTHDR_DREG])
801 return ERR_PTR(-EOPNOTSUPP);
803 op = ntohl(nla_get_be32(tb[NFTA_EXTHDR_OP]));
805 case NFT_EXTHDR_OP_TCPOPT:
806 if (tb[NFTA_EXTHDR_SREG])
807 return &nft_exthdr_tcp_set_ops;
808 if (tb[NFTA_EXTHDR_DREG])
809 return &nft_exthdr_tcp_ops;
810 return &nft_exthdr_tcp_strip_ops;
811 case NFT_EXTHDR_OP_IPV6:
812 if (tb[NFTA_EXTHDR_DREG])
813 return &nft_exthdr_ipv6_ops;
815 case NFT_EXTHDR_OP_IPV4:
816 if (ctx->family != NFPROTO_IPV6) {
817 if (tb[NFTA_EXTHDR_DREG])
818 return &nft_exthdr_ipv4_ops;
821 case NFT_EXTHDR_OP_SCTP:
822 if (tb[NFTA_EXTHDR_DREG])
823 return &nft_exthdr_sctp_ops;
825 case NFT_EXTHDR_OP_DCCP:
826 if (tb[NFTA_EXTHDR_DREG])
827 return &nft_exthdr_dccp_ops;
831 return ERR_PTR(-EOPNOTSUPP);
834 struct nft_expr_type nft_exthdr_type __read_mostly = {
836 .select_ops = nft_exthdr_select_ops,
837 .policy = nft_exthdr_policy,
838 .maxattr = NFTA_EXTHDR_MAX,
839 .owner = THIS_MODULE,