Skip to content

Commit 93b1754

Browse files
committed
mptcp: make fallback action and fallback decision atomic
JIRA: https://issues.redhat.com/browse/RHEL-115624 Upstream Status: net.git commit f8a1d9b commit f8a1d9b Author: Paolo Abeni <pabeni@redhat.com> Date: Mon Jul 14 18:41:44 2025 +0200 mptcp: make fallback action and fallback decision atomic Syzkaller reported the following splat: WARNING: CPU: 1 PID: 7704 at net/mptcp/protocol.h:1223 __mptcp_do_fallback net/mptcp/protocol.h:1223 [inline] WARNING: CPU: 1 PID: 7704 at net/mptcp/protocol.h:1223 mptcp_do_fallback net/mptcp/protocol.h:1244 [inline] WARNING: CPU: 1 PID: 7704 at net/mptcp/protocol.h:1223 check_fully_established net/mptcp/options.c:982 [inline] WARNING: CPU: 1 PID: 7704 at net/mptcp/protocol.h:1223 mptcp_incoming_options+0x21a8/0x2510 net/mptcp/options.c:1153 Modules linked in: CPU: 1 UID: 0 PID: 7704 Comm: syz.3.1419 Not tainted 6.16.0-rc3-gbd5ce2324dba #20 PREEMPT(voluntary) Hardware name: QEMU Ubuntu 24.04 PC (i440FX + PIIX, 1996), BIOS 1.16.3-debian-1.16.3-2 04/01/2014 RIP: 0010:__mptcp_do_fallback net/mptcp/protocol.h:1223 [inline] RIP: 0010:mptcp_do_fallback net/mptcp/protocol.h:1244 [inline] RIP: 0010:check_fully_established net/mptcp/options.c:982 [inline] RIP: 0010:mptcp_incoming_options+0x21a8/0x2510 net/mptcp/options.c:1153 Code: 24 18 e8 bb 2a 00 fd e9 1b df ff ff e8 b1 21 0f 00 e8 ec 5f c4 fc 44 0f b7 ac 24 b0 00 00 00 e9 54 f1 ff ff e8 d9 5f c4 fc 90 <0f> 0b 90 e9 b8 f4 ff ff e8 8b 2a 00 fd e9 8d e6 ff ff e8 81 2a 00 RSP: 0018:ffff8880a3f08448 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff8880180a8000 RCX: ffffffff84afcf45 RDX: ffff888090223700 RSI: ffffffff84afdaa7 RDI: 0000000000000001 RBP: ffff888017955780 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 R13: ffff8880180a8910 R14: ffff8880a3e9d058 R15: 0000000000000000 FS: 00005555791b8500(0000) GS:ffff88811c495000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000000110c2800b7 CR3: 0000000058e44000 CR4: 0000000000350ef0 Call Trace: <IRQ> tcp_reset+0x26f/0x2b0 net/ipv4/tcp_input.c:4432 tcp_validate_incoming+0x1057/0x1b60 net/ipv4/tcp_input.c:5975 tcp_rcv_established+0x5b5/0x21f0 net/ipv4/tcp_input.c:6166 tcp_v4_do_rcv+0x5dc/0xa70 net/ipv4/tcp_ipv4.c:1925 tcp_v4_rcv+0x3473/0x44a0 net/ipv4/tcp_ipv4.c:2363 ip_protocol_deliver_rcu+0xba/0x480 net/ipv4/ip_input.c:205 ip_local_deliver_finish+0x2f1/0x500 net/ipv4/ip_input.c:233 NF_HOOK include/linux/netfilter.h:317 [inline] NF_HOOK include/linux/netfilter.h:311 [inline] ip_local_deliver+0x1be/0x560 net/ipv4/ip_input.c:254 dst_input include/net/dst.h:469 [inline] ip_rcv_finish net/ipv4/ip_input.c:447 [inline] NF_HOOK include/linux/netfilter.h:317 [inline] NF_HOOK include/linux/netfilter.h:311 [inline] ip_rcv+0x514/0x810 net/ipv4/ip_input.c:567 __netif_receive_skb_one_core+0x197/0x1e0 net/core/dev.c:5975 __netif_receive_skb+0x1f/0x120 net/core/dev.c:6088 process_backlog+0x301/0x1360 net/core/dev.c:6440 __napi_poll.constprop.0+0xba/0x550 net/core/dev.c:7453 napi_poll net/core/dev.c:7517 [inline] net_rx_action+0xb44/0x1010 net/core/dev.c:7644 handle_softirqs+0x1d0/0x770 kernel/softirq.c:579 do_softirq+0x3f/0x90 kernel/softirq.c:480 </IRQ> <TASK> __local_bh_enable_ip+0xed/0x110 kernel/softirq.c:407 local_bh_enable include/linux/bottom_half.h:33 [inline] inet_csk_listen_stop+0x2c5/0x1070 net/ipv4/inet_connection_sock.c:1524 mptcp_check_listen_stop.part.0+0x1cc/0x220 net/mptcp/protocol.c:2985 mptcp_check_listen_stop net/mptcp/mib.h:118 [inline] __mptcp_close+0x9b9/0xbd0 net/mptcp/protocol.c:3000 mptcp_close+0x2f/0x140 net/mptcp/protocol.c:3066 inet_release+0xed/0x200 net/ipv4/af_inet.c:435 inet6_release+0x4f/0x70 net/ipv6/af_inet6.c:487 __sock_release+0xb3/0x270 net/socket.c:649 sock_close+0x1c/0x30 net/socket.c:1439 __fput+0x402/0xb70 fs/file_table.c:465 task_work_run+0x150/0x240 kernel/task_work.c:227 resume_user_mode_work include/linux/resume_user_mode.h:50 [inline] exit_to_user_mode_loop+0xd4/0xe0 kernel/entry/common.c:114 exit_to_user_mode_prepare include/linux/entry-common.h:330 [inline] syscall_exit_to_user_mode_work include/linux/entry-common.h:414 [inline] syscall_exit_to_user_mode include/linux/entry-common.h:449 [inline] do_syscall_64+0x245/0x360 arch/x86/entry/syscall_64.c:100 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7fc92f8a36ad Code: ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b0 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007ffcf52802d8 EFLAGS: 00000246 ORIG_RAX: 00000000000001b4 RAX: 0000000000000000 RBX: 00007ffcf52803a8 RCX: 00007fc92f8a36ad RDX: 0000000000000000 RSI: 000000000000001e RDI: 0000000000000003 RBP: 00007fc92fae7ba0 R08: 0000000000000001 R09: 0000002800000000 R10: 00007fc92f700000 R11: 0000000000000246 R12: 00007fc92fae5fac R13: 00007fc92fae5fa0 R14: 0000000000026d00 R15: 0000000000026c51 </TASK> irq event stamp: 4068 hardirqs last enabled at (4076): [<ffffffff81544816>] __up_console_sem+0x76/0x80 kernel/printk/printk.c:344 hardirqs last disabled at (4085): [<ffffffff815447fb>] __up_console_sem+0x5b/0x80 kernel/printk/printk.c:342 softirqs last enabled at (3096): [<ffffffff840e1be0>] local_bh_enable include/linux/bottom_half.h:33 [inline] softirqs last enabled at (3096): [<ffffffff840e1be0>] inet_csk_listen_stop+0x2c0/0x1070 net/ipv4/inet_connection_sock.c:1524 softirqs last disabled at (3097): [<ffffffff813b6b9f>] do_softirq+0x3f/0x90 kernel/softirq.c:480 Since we need to track the 'fallback is possible' condition and the fallback status separately, there are a few possible races open between the check and the actual fallback action. Add a spinlock to protect the fallback related information and use it close all the possible related races. While at it also remove the too-early clearing of allow_infinite_fallback in __mptcp_subflow_connect(): the field will be correctly cleared by subflow_finish_connect() if/when the connection will complete successfully. If fallback is not possible, as per RFC, reset the current subflow. Since the fallback operation can now fail and return value should be checked, rename the helper accordingly. Fixes: 0530020 ("mptcp: track and update contiguous data status") Cc: stable@vger.kernel.org Reported-by: Matthieu Baerts <matttbe@kernel.org> Closes: multipath-tcp/mptcp_net-next#570 Reported-by: syzbot+5cf807c20386d699b524@syzkaller.appspotmail.com Closes: multipath-tcp/mptcp_net-next#555 Signed-off-by: Paolo Abeni <pabeni@redhat.com> Reviewed-by: Matthieu Baerts (NGI0) <matttbe@kernel.org> Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org> Link: https://patch.msgid.link/20250714-net-mptcp-fallback-races-v1-1-391aff963322@kernel.org Signed-off-by: Jakub Kicinski <kuba@kernel.org> Signed-off-by: Davide Caratti <dcaratti@redhat.com>
1 parent c618fde commit 93b1754

File tree

4 files changed

+61
-19
lines changed

4 files changed

+61
-19
lines changed

net/mptcp/options.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -979,8 +979,9 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
979979
if (subflow->mp_join)
980980
goto reset;
981981
subflow->mp_capable = 0;
982+
if (!mptcp_try_fallback(ssk))
983+
goto reset;
982984
pr_fallback(msk);
983-
mptcp_do_fallback(ssk);
984985
return false;
985986
}
986987

net/mptcp/protocol.c

Lines changed: 35 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -622,10 +622,9 @@ static bool mptcp_check_data_fin(struct sock *sk)
622622

623623
static void mptcp_dss_corruption(struct mptcp_sock *msk, struct sock *ssk)
624624
{
625-
if (READ_ONCE(msk->allow_infinite_fallback)) {
625+
if (mptcp_try_fallback(ssk)) {
626626
MPTCP_INC_STATS(sock_net(ssk),
627627
MPTCP_MIB_DSSCORRUPTIONFALLBACK);
628-
mptcp_do_fallback(ssk);
629628
} else {
630629
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DSSCORRUPTIONRESET);
631630
mptcp_subflow_reset(ssk);
@@ -886,6 +885,14 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk)
886885
if (sk->sk_state != TCP_ESTABLISHED)
887886
return false;
888887

888+
spin_lock_bh(&msk->fallback_lock);
889+
if (__mptcp_check_fallback(msk)) {
890+
spin_unlock_bh(&msk->fallback_lock);
891+
return false;
892+
}
893+
mptcp_subflow_joined(msk, ssk);
894+
spin_unlock_bh(&msk->fallback_lock);
895+
889896
/* attach to msk socket only after we are sure we will deal with it
890897
* at close time
891898
*/
@@ -894,7 +901,6 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk)
894901

895902
mptcp_subflow_ctx(ssk)->subflow_id = msk->subflow_id++;
896903
mptcp_sockopt_sync_locked(msk, ssk);
897-
mptcp_subflow_joined(msk, ssk);
898904
mptcp_stop_tout_timer(sk);
899905
__mptcp_propagate_sndbuf(sk, ssk);
900906
return true;
@@ -1233,10 +1239,14 @@ static void mptcp_update_infinite_map(struct mptcp_sock *msk,
12331239
mpext->infinite_map = 1;
12341240
mpext->data_len = 0;
12351241

1242+
if (!mptcp_try_fallback(ssk)) {
1243+
mptcp_subflow_reset(ssk);
1244+
return;
1245+
}
1246+
12361247
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPTX);
12371248
mptcp_subflow_ctx(ssk)->send_infinite_map = 0;
12381249
pr_fallback(msk);
1239-
mptcp_do_fallback(ssk);
12401250
}
12411251

12421252
#define MPTCP_MAX_GSO_SIZE (GSO_LEGACY_MAX_SIZE - (MAX_TCP_HEADER + 1))
@@ -2638,9 +2648,9 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk)
26382648

26392649
static void __mptcp_retrans(struct sock *sk)
26402650
{
2651+
struct mptcp_sendmsg_info info = { .data_lock_held = true, };
26412652
struct mptcp_sock *msk = mptcp_sk(sk);
26422653
struct mptcp_subflow_context *subflow;
2643-
struct mptcp_sendmsg_info info = {};
26442654
struct mptcp_data_frag *dfrag;
26452655
struct sock *ssk;
26462656
int ret, err;
@@ -2685,6 +2695,18 @@ static void __mptcp_retrans(struct sock *sk)
26852695
info.sent = 0;
26862696
info.limit = READ_ONCE(msk->csum_enabled) ? dfrag->data_len :
26872697
dfrag->already_sent;
2698+
2699+
/*
2700+
* make the whole retrans decision, xmit, disallow
2701+
* fallback atomic
2702+
*/
2703+
spin_lock_bh(&msk->fallback_lock);
2704+
if (__mptcp_check_fallback(msk)) {
2705+
spin_unlock_bh(&msk->fallback_lock);
2706+
release_sock(ssk);
2707+
return;
2708+
}
2709+
26882710
while (info.sent < info.limit) {
26892711
ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info);
26902712
if (ret <= 0)
@@ -2700,6 +2722,7 @@ static void __mptcp_retrans(struct sock *sk)
27002722
info.size_goal);
27012723
WRITE_ONCE(msk->allow_infinite_fallback, false);
27022724
}
2725+
spin_unlock_bh(&msk->fallback_lock);
27032726

27042727
release_sock(ssk);
27052728
}
@@ -2833,6 +2856,7 @@ static void __mptcp_init_sock(struct sock *sk)
28332856
msk->subflow_id = 1;
28342857

28352858
mptcp_pm_data_init(msk);
2859+
spin_lock_init(&msk->fallback_lock);
28362860

28372861
/* re-use the csk retrans timer for MPTCP-level retrans */
28382862
timer_setup(&msk->sk.icsk_retransmit_timer, mptcp_retransmit_timer, 0);
@@ -3625,7 +3649,13 @@ bool mptcp_finish_join(struct sock *ssk)
36253649

36263650
/* active subflow, already present inside the conn_list */
36273651
if (!list_empty(&subflow->node)) {
3652+
spin_lock_bh(&msk->fallback_lock);
3653+
if (__mptcp_check_fallback(msk)) {
3654+
spin_unlock_bh(&msk->fallback_lock);
3655+
return false;
3656+
}
36283657
mptcp_subflow_joined(msk, ssk);
3658+
spin_unlock_bh(&msk->fallback_lock);
36293659
mptcp_propagate_sndbuf(parent, ssk);
36303660
return true;
36313661
}

net/mptcp/protocol.h

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -337,6 +337,10 @@ struct mptcp_sock {
337337
u32 subflow_id;
338338
u32 setsockopt_seq;
339339
char ca_name[TCP_CA_NAME_MAX];
340+
341+
spinlock_t fallback_lock; /* protects fallback and
342+
* allow_infinite_fallback
343+
*/
340344
};
341345

342346
#define mptcp_data_lock(sk) spin_lock_bh(&(sk)->sk_lock.slock)
@@ -1169,15 +1173,21 @@ static inline bool mptcp_check_fallback(const struct sock *sk)
11691173
return __mptcp_check_fallback(msk);
11701174
}
11711175

1172-
static inline void __mptcp_do_fallback(struct mptcp_sock *msk)
1176+
static inline bool __mptcp_try_fallback(struct mptcp_sock *msk)
11731177
{
11741178
if (__mptcp_check_fallback(msk)) {
11751179
pr_debug("TCP fallback already done (msk=%p)\n", msk);
1176-
return;
1180+
return true;
11771181
}
1178-
if (WARN_ON_ONCE(!READ_ONCE(msk->allow_infinite_fallback)))
1179-
return;
1182+
spin_lock_bh(&msk->fallback_lock);
1183+
if (!msk->allow_infinite_fallback) {
1184+
spin_unlock_bh(&msk->fallback_lock);
1185+
return false;
1186+
}
1187+
11801188
set_bit(MPTCP_FALLBACK_DONE, &msk->flags);
1189+
spin_unlock_bh(&msk->fallback_lock);
1190+
return true;
11811191
}
11821192

11831193
static inline bool __mptcp_has_initial_subflow(const struct mptcp_sock *msk)
@@ -1189,14 +1199,15 @@ static inline bool __mptcp_has_initial_subflow(const struct mptcp_sock *msk)
11891199
TCPF_SYN_RECV | TCPF_LISTEN));
11901200
}
11911201

1192-
static inline void mptcp_do_fallback(struct sock *ssk)
1202+
static inline bool mptcp_try_fallback(struct sock *ssk)
11931203
{
11941204
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
11951205
struct sock *sk = subflow->conn;
11961206
struct mptcp_sock *msk;
11971207

11981208
msk = mptcp_sk(sk);
1199-
__mptcp_do_fallback(msk);
1209+
if (!__mptcp_try_fallback(msk))
1210+
return false;
12001211
if (READ_ONCE(msk->snd_data_fin_enable) && !(ssk->sk_shutdown & SEND_SHUTDOWN)) {
12011212
gfp_t saved_allocation = ssk->sk_allocation;
12021213

@@ -1208,6 +1219,7 @@ static inline void mptcp_do_fallback(struct sock *ssk)
12081219
tcp_shutdown(ssk, SEND_SHUTDOWN);
12091220
ssk->sk_allocation = saved_allocation;
12101221
}
1222+
return true;
12111223
}
12121224

12131225
#define pr_fallback(a) pr_debug("%s:fallback to TCP (msk=%p)\n", __func__, a)
@@ -1217,7 +1229,7 @@ static inline void mptcp_subflow_early_fallback(struct mptcp_sock *msk,
12171229
{
12181230
pr_fallback(msk);
12191231
subflow->request_mptcp = 0;
1220-
__mptcp_do_fallback(msk);
1232+
WARN_ON_ONCE(!__mptcp_try_fallback(msk));
12211233
}
12221234

12231235
static inline bool mptcp_check_infinite_map(struct sk_buff *skb)

net/mptcp/subflow.c

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -540,9 +540,11 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
540540
mptcp_get_options(skb, &mp_opt);
541541
if (subflow->request_mptcp) {
542542
if (!(mp_opt.suboptions & OPTION_MPTCP_MPC_SYNACK)) {
543+
if (!mptcp_try_fallback(sk))
544+
goto do_reset;
545+
543546
MPTCP_INC_STATS(sock_net(sk),
544547
MPTCP_MIB_MPCAPABLEACTIVEFALLBACK);
545-
mptcp_do_fallback(sk);
546548
pr_fallback(msk);
547549
goto fallback;
548550
}
@@ -1367,7 +1369,7 @@ static bool subflow_check_data_avail(struct sock *ssk)
13671369
return true;
13681370
}
13691371

1370-
if (!READ_ONCE(msk->allow_infinite_fallback)) {
1372+
if (!mptcp_try_fallback(ssk)) {
13711373
/* fatal protocol error, close the socket.
13721374
* subflow_error_report() will introduce the appropriate barriers
13731375
*/
@@ -1383,8 +1385,6 @@ static bool subflow_check_data_avail(struct sock *ssk)
13831385
WRITE_ONCE(subflow->data_avail, false);
13841386
return false;
13851387
}
1386-
1387-
mptcp_do_fallback(ssk);
13881388
}
13891389

13901390
skb = skb_peek(&ssk->sk_receive_queue);
@@ -1627,7 +1627,6 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
16271627
/* discard the subflow socket */
16281628
mptcp_sock_graft(ssk, sk->sk_socket);
16291629
iput(SOCK_INODE(sf));
1630-
WRITE_ONCE(msk->allow_infinite_fallback, false);
16311630
mptcp_stop_tout_timer(sk);
16321631
return 0;
16331632

@@ -1807,7 +1806,7 @@ static void subflow_state_change(struct sock *sk)
18071806

18081807
msk = mptcp_sk(parent);
18091808
if (subflow_simultaneous_connect(sk)) {
1810-
mptcp_do_fallback(sk);
1809+
WARN_ON_ONCE(!mptcp_try_fallback(sk));
18111810
pr_fallback(msk);
18121811
subflow->conn_finished = 1;
18131812
mptcp_propagate_state(parent, sk, subflow, NULL);

0 commit comments

Comments
 (0)