Skip to content

Commit bd569dd

Browse files
committed
Florian Westphal says: ==================== netfilter: updates for net-next 1) Don't respond to ICMP_UNREACH errors with another ICMP_UNREACH error. 2) Support fetching the current bridge ethernet address. This allows a more flexible approach to packet redirection on bridges without need to use hardcoded addresses. From Fernando Fernandez Mancera. 3) Zap a few no-longer needed conditionals from ipvs packet path and convert to READ/WRITE_ONCE to avoid KCSAN warnings. From Zhang Tengfei. 4) Remove a no-longer-used macro argument in ipset, from Zhen Ni. * tag 'nf-next-25-09-11' of https://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf-next: netfilter: nf_reject: don't reply to icmp error messages ipvs: Use READ_ONCE/WRITE_ONCE for ipvs->enable netfilter: nft_meta_bridge: introduce NFT_META_BRI_IIFHWADDR support netfilter: ipset: Remove unused htable_bits in macro ahash_region selftest:net: fixed spelling mistakes ==================== Link: https://patch.msgid.link/20250911143819.14753-1-fw@strlen.de Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2 parents 3456820 + db99b2f commit bd569dd

File tree

10 files changed

+91
-26
lines changed

10 files changed

+91
-26
lines changed

include/uapi/linux/netfilter/nf_tables.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -959,6 +959,7 @@ enum nft_exthdr_attributes {
959959
* @NFT_META_SDIF: slave device interface index
960960
* @NFT_META_SDIFNAME: slave device interface name
961961
* @NFT_META_BRI_BROUTE: packet br_netfilter_broute bit
962+
* @NFT_META_BRI_IIFHWADDR: packet input bridge interface ethernet address
962963
*/
963964
enum nft_meta_keys {
964965
NFT_META_LEN,
@@ -999,6 +1000,7 @@ enum nft_meta_keys {
9991000
NFT_META_SDIFNAME,
10001001
NFT_META_BRI_BROUTE,
10011002
__NFT_META_IIFTYPE,
1003+
NFT_META_BRI_IIFHWADDR,
10021004
};
10031005

10041006
/**

net/bridge/netfilter/nft_meta_bridge.c

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,13 @@ static void nft_meta_bridge_get_eval(const struct nft_expr *expr,
5959
nft_reg_store_be16(dest, htons(p_proto));
6060
return;
6161
}
62+
case NFT_META_BRI_IIFHWADDR:
63+
br_dev = nft_meta_get_bridge(in);
64+
if (!br_dev)
65+
goto err;
66+
67+
memcpy(dest, br_dev->dev_addr, ETH_ALEN);
68+
return;
6269
default:
6370
return nft_meta_get_eval(expr, regs, pkt);
6471
}
@@ -86,6 +93,9 @@ static int nft_meta_bridge_get_init(const struct nft_ctx *ctx,
8693
case NFT_META_BRI_IIFVPROTO:
8794
len = sizeof(u16);
8895
break;
96+
case NFT_META_BRI_IIFHWADDR:
97+
len = ETH_ALEN;
98+
break;
8999
default:
90100
return nft_meta_get_init(ctx, expr, tb);
91101
}
@@ -175,6 +185,7 @@ static int nft_meta_bridge_set_validate(const struct nft_ctx *ctx,
175185

176186
switch (priv->key) {
177187
case NFT_META_BRI_BROUTE:
188+
case NFT_META_BRI_IIFHWADDR:
178189
hooks = 1 << NF_BR_PRE_ROUTING;
179190
break;
180191
default:

net/ipv4/netfilter/nf_reject_ipv4.c

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,27 @@ struct sk_buff *nf_reject_skb_v4_tcp_reset(struct net *net,
8080
}
8181
EXPORT_SYMBOL_GPL(nf_reject_skb_v4_tcp_reset);
8282

83+
static bool nf_skb_is_icmp_unreach(const struct sk_buff *skb)
84+
{
85+
const struct iphdr *iph = ip_hdr(skb);
86+
u8 *tp, _type;
87+
int thoff;
88+
89+
if (iph->protocol != IPPROTO_ICMP)
90+
return false;
91+
92+
thoff = skb_network_offset(skb) + sizeof(*iph);
93+
94+
tp = skb_header_pointer(skb,
95+
thoff + offsetof(struct icmphdr, type),
96+
sizeof(_type), &_type);
97+
98+
if (!tp)
99+
return false;
100+
101+
return *tp == ICMP_DEST_UNREACH;
102+
}
103+
83104
struct sk_buff *nf_reject_skb_v4_unreach(struct net *net,
84105
struct sk_buff *oldskb,
85106
const struct net_device *dev,
@@ -100,6 +121,10 @@ struct sk_buff *nf_reject_skb_v4_unreach(struct net *net,
100121
if (ip_hdr(oldskb)->frag_off & htons(IP_OFFSET))
101122
return NULL;
102123

124+
/* don't reply to ICMP_DEST_UNREACH with ICMP_DEST_UNREACH. */
125+
if (nf_skb_is_icmp_unreach(oldskb))
126+
return NULL;
127+
103128
/* RFC says return as much as we can without exceeding 576 bytes. */
104129
len = min_t(unsigned int, 536, oldskb->len);
105130

net/ipv6/netfilter/nf_reject_ipv6.c

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,32 @@ struct sk_buff *nf_reject_skb_v6_tcp_reset(struct net *net,
104104
}
105105
EXPORT_SYMBOL_GPL(nf_reject_skb_v6_tcp_reset);
106106

107+
static bool nf_skb_is_icmp6_unreach(const struct sk_buff *skb)
108+
{
109+
const struct ipv6hdr *ip6h = ipv6_hdr(skb);
110+
u8 proto = ip6h->nexthdr;
111+
u8 _type, *tp;
112+
int thoff;
113+
__be16 fo;
114+
115+
thoff = ipv6_skip_exthdr(skb, ((u8 *)(ip6h + 1) - skb->data), &proto, &fo);
116+
117+
if (thoff < 0 || thoff >= skb->len || fo != 0)
118+
return false;
119+
120+
if (proto != IPPROTO_ICMPV6)
121+
return false;
122+
123+
tp = skb_header_pointer(skb,
124+
thoff + offsetof(struct icmp6hdr, icmp6_type),
125+
sizeof(_type), &_type);
126+
127+
if (!tp)
128+
return false;
129+
130+
return *tp == ICMPV6_DEST_UNREACH;
131+
}
132+
107133
struct sk_buff *nf_reject_skb_v6_unreach(struct net *net,
108134
struct sk_buff *oldskb,
109135
const struct net_device *dev,
@@ -117,6 +143,10 @@ struct sk_buff *nf_reject_skb_v6_unreach(struct net *net,
117143
if (!nf_reject_ip6hdr_validate(oldskb))
118144
return NULL;
119145

146+
/* Don't reply to ICMPV6_DEST_UNREACH with ICMPV6_DEST_UNREACH */
147+
if (nf_skb_is_icmp6_unreach(oldskb))
148+
return NULL;
149+
120150
/* Include "As much of invoking packet as possible without the ICMPv6
121151
* packet exceeding the minimum IPv6 MTU" in the ICMP payload.
122152
*/

net/netfilter/ipset/ip_set_hash_gen.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ struct hbucket {
6363
: jhash_size((htable_bits) - HTABLE_REGION_BITS))
6464
#define ahash_sizeof_regions(htable_bits) \
6565
(ahash_numof_locks(htable_bits) * sizeof(struct ip_set_region))
66-
#define ahash_region(n, htable_bits) \
66+
#define ahash_region(n) \
6767
((n) / jhash_size(HTABLE_REGION_BITS))
6868
#define ahash_bucket_start(h, htable_bits) \
6969
((htable_bits) < HTABLE_REGION_BITS ? 0 \
@@ -702,7 +702,7 @@ mtype_resize(struct ip_set *set, bool retried)
702702
#endif
703703
key = HKEY(data, h->initval, htable_bits);
704704
m = __ipset_dereference(hbucket(t, key));
705-
nr = ahash_region(key, htable_bits);
705+
nr = ahash_region(key);
706706
if (!m) {
707707
m = kzalloc(sizeof(*m) +
708708
AHASH_INIT_SIZE * dsize,
@@ -852,7 +852,7 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
852852
rcu_read_lock_bh();
853853
t = rcu_dereference_bh(h->table);
854854
key = HKEY(value, h->initval, t->htable_bits);
855-
r = ahash_region(key, t->htable_bits);
855+
r = ahash_region(key);
856856
atomic_inc(&t->uref);
857857
elements = t->hregion[r].elements;
858858
maxelem = t->maxelem;
@@ -1050,7 +1050,7 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
10501050
rcu_read_lock_bh();
10511051
t = rcu_dereference_bh(h->table);
10521052
key = HKEY(value, h->initval, t->htable_bits);
1053-
r = ahash_region(key, t->htable_bits);
1053+
r = ahash_region(key);
10541054
atomic_inc(&t->uref);
10551055
rcu_read_unlock_bh();
10561056

net/netfilter/ipvs/ip_vs_conn.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -885,7 +885,7 @@ static void ip_vs_conn_expire(struct timer_list *t)
885885
* conntrack cleanup for the net.
886886
*/
887887
smp_rmb();
888-
if (ipvs->enable)
888+
if (READ_ONCE(ipvs->enable))
889889
ip_vs_conn_drop_conntrack(cp);
890890
}
891891

@@ -1439,7 +1439,7 @@ void ip_vs_expire_nodest_conn_flush(struct netns_ipvs *ipvs)
14391439
cond_resched_rcu();
14401440

14411441
/* netns clean up started, abort delayed work */
1442-
if (!ipvs->enable)
1442+
if (!READ_ONCE(ipvs->enable))
14431443
break;
14441444
}
14451445
rcu_read_unlock();

net/netfilter/ipvs/ip_vs_core.c

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1353,9 +1353,6 @@ ip_vs_out_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *stat
13531353
if (unlikely(!skb_dst(skb)))
13541354
return NF_ACCEPT;
13551355

1356-
if (!ipvs->enable)
1357-
return NF_ACCEPT;
1358-
13591356
ip_vs_fill_iph_skb(af, skb, false, &iph);
13601357
#ifdef CONFIG_IP_VS_IPV6
13611358
if (af == AF_INET6) {
@@ -1940,7 +1937,7 @@ ip_vs_in_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state
19401937
return NF_ACCEPT;
19411938
}
19421939
/* ipvs enabled in this netns ? */
1943-
if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable))
1940+
if (unlikely(sysctl_backup_only(ipvs)))
19441941
return NF_ACCEPT;
19451942

19461943
ip_vs_fill_iph_skb(af, skb, false, &iph);
@@ -2108,7 +2105,7 @@ ip_vs_forward_icmp(void *priv, struct sk_buff *skb,
21082105
int r;
21092106

21102107
/* ipvs enabled in this netns ? */
2111-
if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable))
2108+
if (unlikely(sysctl_backup_only(ipvs)))
21122109
return NF_ACCEPT;
21132110

21142111
if (state->pf == NFPROTO_IPV4) {
@@ -2295,7 +2292,7 @@ static int __net_init __ip_vs_init(struct net *net)
22952292
return -ENOMEM;
22962293

22972294
/* Hold the beast until a service is registered */
2298-
ipvs->enable = 0;
2295+
WRITE_ONCE(ipvs->enable, 0);
22992296
ipvs->net = net;
23002297
/* Counters used for creating unique names */
23012298
ipvs->gen = atomic_read(&ipvs_netns_cnt);
@@ -2367,7 +2364,7 @@ static void __net_exit __ip_vs_dev_cleanup_batch(struct list_head *net_list)
23672364
ipvs = net_ipvs(net);
23682365
ip_vs_unregister_hooks(ipvs, AF_INET);
23692366
ip_vs_unregister_hooks(ipvs, AF_INET6);
2370-
ipvs->enable = 0; /* Disable packet reception */
2367+
WRITE_ONCE(ipvs->enable, 0); /* Disable packet reception */
23712368
smp_wmb();
23722369
ip_vs_sync_net_cleanup(ipvs);
23732370
}

net/netfilter/ipvs/ip_vs_ctl.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -256,7 +256,7 @@ static void est_reload_work_handler(struct work_struct *work)
256256
struct ip_vs_est_kt_data *kd = ipvs->est_kt_arr[id];
257257

258258
/* netns clean up started, abort delayed work */
259-
if (!ipvs->enable)
259+
if (!READ_ONCE(ipvs->enable))
260260
goto unlock;
261261
if (!kd)
262262
continue;
@@ -1483,9 +1483,9 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u,
14831483

14841484
*svc_p = svc;
14851485

1486-
if (!ipvs->enable) {
1486+
if (!READ_ONCE(ipvs->enable)) {
14871487
/* Now there is a service - full throttle */
1488-
ipvs->enable = 1;
1488+
WRITE_ONCE(ipvs->enable, 1);
14891489

14901490
/* Start estimation for first time */
14911491
ip_vs_est_reload_start(ipvs);

net/netfilter/ipvs/ip_vs_est.c

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,7 @@ static int ip_vs_estimation_kthread(void *data)
231231
void ip_vs_est_reload_start(struct netns_ipvs *ipvs)
232232
{
233233
/* Ignore reloads before first service is added */
234-
if (!ipvs->enable)
234+
if (!READ_ONCE(ipvs->enable))
235235
return;
236236
ip_vs_est_stopped_recalc(ipvs);
237237
/* Bump the kthread configuration genid */
@@ -306,7 +306,7 @@ static int ip_vs_est_add_kthread(struct netns_ipvs *ipvs)
306306
int i;
307307

308308
if ((unsigned long)ipvs->est_kt_count >= ipvs->est_max_threads &&
309-
ipvs->enable && ipvs->est_max_threads)
309+
READ_ONCE(ipvs->enable) && ipvs->est_max_threads)
310310
return -EINVAL;
311311

312312
mutex_lock(&ipvs->est_mutex);
@@ -343,7 +343,7 @@ static int ip_vs_est_add_kthread(struct netns_ipvs *ipvs)
343343
}
344344

345345
/* Start kthread tasks only when services are present */
346-
if (ipvs->enable && !ip_vs_est_stopped(ipvs)) {
346+
if (READ_ONCE(ipvs->enable) && !ip_vs_est_stopped(ipvs)) {
347347
ret = ip_vs_est_kthread_start(ipvs, kd);
348348
if (ret < 0)
349349
goto out;
@@ -486,7 +486,7 @@ int ip_vs_start_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats)
486486
struct ip_vs_estimator *est = &stats->est;
487487
int ret;
488488

489-
if (!ipvs->est_max_threads && ipvs->enable)
489+
if (!ipvs->est_max_threads && READ_ONCE(ipvs->enable))
490490
ipvs->est_max_threads = ip_vs_est_max_threads(ipvs);
491491

492492
est->ktid = -1;
@@ -663,7 +663,7 @@ static int ip_vs_est_calc_limits(struct netns_ipvs *ipvs, int *chain_max)
663663
/* Wait for cpufreq frequency transition */
664664
wait_event_idle_timeout(wq, kthread_should_stop(),
665665
HZ / 50);
666-
if (!ipvs->enable || kthread_should_stop())
666+
if (!READ_ONCE(ipvs->enable) || kthread_should_stop())
667667
goto stop;
668668
}
669669

@@ -681,7 +681,7 @@ static int ip_vs_est_calc_limits(struct netns_ipvs *ipvs, int *chain_max)
681681
rcu_read_unlock();
682682
local_bh_enable();
683683

684-
if (!ipvs->enable || kthread_should_stop())
684+
if (!READ_ONCE(ipvs->enable) || kthread_should_stop())
685685
goto stop;
686686
cond_resched();
687687

@@ -757,7 +757,7 @@ static void ip_vs_est_calc_phase(struct netns_ipvs *ipvs)
757757
mutex_lock(&ipvs->est_mutex);
758758
for (id = 1; id < ipvs->est_kt_count; id++) {
759759
/* netns clean up started, abort */
760-
if (!ipvs->enable)
760+
if (!READ_ONCE(ipvs->enable))
761761
goto unlock2;
762762
kd = ipvs->est_kt_arr[id];
763763
if (!kd)
@@ -787,7 +787,7 @@ static void ip_vs_est_calc_phase(struct netns_ipvs *ipvs)
787787
id = ipvs->est_kt_count;
788788

789789
next_kt:
790-
if (!ipvs->enable || kthread_should_stop())
790+
if (!READ_ONCE(ipvs->enable) || kthread_should_stop())
791791
goto unlock;
792792
id--;
793793
if (id < 0)

tools/testing/selftests/net/netfilter/nft_nat.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -569,7 +569,7 @@ test_redirect6()
569569
ip netns exec "$ns0" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
570570

571571
if ! ip netns exec "$ns2" ping -q -c 1 dead:1::99 > /dev/null;then
572-
echo "ERROR: cannnot ping $ns1 from $ns2 via ipv6"
572+
echo "ERROR: cannot ping $ns1 from $ns2 via ipv6"
573573
lret=1
574574
fi
575575

@@ -859,7 +859,7 @@ EOF
859859
# from router:service bypass connection tracking.
860860
test_port_shadow_notrack "$family"
861861

862-
# test nat based mitigation: fowarded packets coming from service port
862+
# test nat based mitigation: forwarded packets coming from service port
863863
# are masqueraded with random highport.
864864
test_port_shadow_pat "$family"
865865

0 commit comments

Comments
 (0)