Skip to content

Commit 8ccd116

Browse files
committed
Merge branch 'tcp-provide-better-locality-for-retransmit-timer'
Eric Dumazet says: ==================== tcp: provide better locality for retransmit timer TCP stack uses three timers per flow, currently spread this way: - sk->sk_timer : keepalive timer - icsk->icsk_retransmit_timer : retransmit timer - icsk->icsk_delack_timer : delayed ack timer This series moves the retransmit timer to sk->sk_timer location, to increase data locality in TX paths. keepalive timers are not often used, this change should be neutral for them. After the series we have following fields: - sk->tcp_retransmit_timer : retransmit timer, in sock_write_tx group - icsk->icsk_delack_timer : delayed ack timer - icsk->icsk_keepalive_timer : keepalive timer Moving icsk_delack_timer in a beter location would also be welcomed. ==================== Link: https://patch.msgid.link/20251124175013.1473655-1-edumazet@google.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2 parents 436fa8e + 9a5e533 commit 8ccd116

File tree

13 files changed

+74
-69
lines changed

13 files changed

+74
-69
lines changed

Documentation/networking/net_cachelines/inet_connection_sock.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@ struct inet_sock icsk_inet read_mostly r
1212
struct request_sock_queue icsk_accept_queue
1313
struct inet_bind_bucket icsk_bind_hash read_mostly tcp_set_state
1414
struct inet_bind2_bucket icsk_bind2_hash read_mostly tcp_set_state,inet_put_port
15-
struct timer_list icsk_retransmit_timer read_write inet_csk_reset_xmit_timer,tcp_connect
1615
struct timer_list icsk_delack_timer read_mostly inet_csk_reset_xmit_timer,tcp_connect
16+
struct timer_list icsk_keepalive_timer
1717
u32 icsk_rto read_write tcp_cwnd_validate,tcp_schedule_loss_probe,tcp_connect_init,tcp_connect,tcp_write_xmit,tcp_push_one
1818
u32 icsk_rto_min
1919
u32 icsk_rto_max read_mostly tcp_reset_xmit_timer

include/net/inet_connection_sock.h

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,9 @@ struct inet_connection_sock_af_ops {
5656
* @icsk_accept_queue: FIFO of established children
5757
* @icsk_bind_hash: Bind node
5858
* @icsk_bind2_hash: Bind node in the bhash2 table
59-
* @icsk_retransmit_timer: Resend (no ack)
59+
* @icsk_delack_timer: Delayed ACK timer
60+
* @icsk_keepalive_timer: Keepalive timer
61+
* @mptcp_tout_timer: mptcp timer
6062
* @icsk_rto: Retransmit timeout
6163
* @icsk_pmtu_cookie Last pmtu seen by socket
6264
* @icsk_ca_ops Pluggable congestion control hook
@@ -81,8 +83,11 @@ struct inet_connection_sock {
8183
struct request_sock_queue icsk_accept_queue;
8284
struct inet_bind_bucket *icsk_bind_hash;
8385
struct inet_bind2_bucket *icsk_bind2_hash;
84-
struct timer_list icsk_retransmit_timer;
85-
struct timer_list icsk_delack_timer;
86+
struct timer_list icsk_delack_timer;
87+
union {
88+
struct timer_list icsk_keepalive_timer;
89+
struct timer_list mptcp_tout_timer;
90+
};
8691
__u32 icsk_rto;
8792
__u32 icsk_rto_min;
8893
u32 icsk_rto_max;
@@ -184,10 +189,9 @@ static inline void inet_csk_delack_init(struct sock *sk)
184189
memset(&inet_csk(sk)->icsk_ack, 0, sizeof(inet_csk(sk)->icsk_ack));
185190
}
186191

187-
static inline unsigned long
188-
icsk_timeout(const struct inet_connection_sock *icsk)
192+
static inline unsigned long tcp_timeout_expires(const struct sock *sk)
189193
{
190-
return READ_ONCE(icsk->icsk_retransmit_timer.expires);
194+
return READ_ONCE(sk->tcp_retransmit_timer.expires);
191195
}
192196

193197
static inline unsigned long
@@ -203,7 +207,7 @@ static inline void inet_csk_clear_xmit_timer(struct sock *sk, const int what)
203207
if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0) {
204208
smp_store_release(&icsk->icsk_pending, 0);
205209
#ifdef INET_CSK_CLEAR_TIMERS
206-
sk_stop_timer(sk, &icsk->icsk_retransmit_timer);
210+
sk_stop_timer(sk, &sk->tcp_retransmit_timer);
207211
#endif
208212
} else if (what == ICSK_TIME_DACK) {
209213
smp_store_release(&icsk->icsk_ack.pending, 0);
@@ -235,7 +239,7 @@ static inline void inet_csk_reset_xmit_timer(struct sock *sk, const int what,
235239
if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0 ||
236240
what == ICSK_TIME_LOSS_PROBE || what == ICSK_TIME_REO_TIMEOUT) {
237241
smp_store_release(&icsk->icsk_pending, what);
238-
sk_reset_timer(sk, &icsk->icsk_retransmit_timer, when);
242+
sk_reset_timer(sk, &sk->tcp_retransmit_timer, when);
239243
} else if (what == ICSK_TIME_DACK) {
240244
smp_store_release(&icsk->icsk_ack.pending,
241245
icsk->icsk_ack.pending | ICSK_ACK_TIMER);

include/net/sock.h

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -305,6 +305,8 @@ struct sk_filter;
305305
* @sk_txrehash: enable TX hash rethink
306306
* @sk_filter: socket filtering instructions
307307
* @sk_timer: sock cleanup timer
308+
* @tcp_retransmit_timer: tcp retransmit timer
309+
* @mptcp_retransmit_timer: mptcp retransmit timer
308310
* @sk_stamp: time stamp of last packet received
309311
* @sk_stamp_seq: lock for accessing sk_stamp on 32 bit architectures only
310312
* @sk_tsflags: SO_TIMESTAMPING flags
@@ -481,18 +483,21 @@ struct sock {
481483
struct rb_root tcp_rtx_queue;
482484
};
483485
struct sk_buff_head sk_write_queue;
484-
u32 sk_dst_pending_confirm;
485-
u32 sk_pacing_status; /* see enum sk_pacing */
486486
struct page_frag sk_frag;
487-
struct timer_list sk_timer;
488-
487+
union {
488+
struct timer_list sk_timer;
489+
struct timer_list tcp_retransmit_timer;
490+
struct timer_list mptcp_retransmit_timer;
491+
};
489492
unsigned long sk_pacing_rate; /* bytes per second */
490493
atomic_t sk_zckey;
491494
atomic_t sk_tskey;
492495
unsigned long sk_tx_queue_mapping_jiffies;
493496
__cacheline_group_end(sock_write_tx);
494497

495498
__cacheline_group_begin(sock_read_tx);
499+
u32 sk_dst_pending_confirm;
500+
u32 sk_pacing_status; /* see enum sk_pacing */
496501
unsigned long sk_max_pacing_rate;
497502
long sk_sndtimeo;
498503
u32 sk_priority;

net/core/sock.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4519,14 +4519,14 @@ static int __init sock_struct_check(void)
45194519
CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_send_head);
45204520
CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_write_queue);
45214521
CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_write_pending);
4522-
CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_dst_pending_confirm);
4523-
CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_pacing_status);
45244522
CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_frag);
45254523
CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_timer);
45264524
CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_pacing_rate);
45274525
CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_zckey);
45284526
CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_tskey);
45294527

4528+
CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_dst_pending_confirm);
4529+
CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_pacing_status);
45304530
CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_max_pacing_rate);
45314531
CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_sndtimeo);
45324532
CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_priority);

net/ipv4/inet_connection_sock.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -737,9 +737,9 @@ void inet_csk_init_xmit_timers(struct sock *sk,
737737
{
738738
struct inet_connection_sock *icsk = inet_csk(sk);
739739

740-
timer_setup(&icsk->icsk_retransmit_timer, retransmit_handler, 0);
740+
timer_setup(&sk->tcp_retransmit_timer, retransmit_handler, 0);
741741
timer_setup(&icsk->icsk_delack_timer, delack_handler, 0);
742-
timer_setup(&sk->sk_timer, keepalive_handler, 0);
742+
timer_setup(&icsk->icsk_keepalive_timer, keepalive_handler, 0);
743743
icsk->icsk_pending = icsk->icsk_ack.pending = 0;
744744
}
745745

@@ -750,9 +750,9 @@ void inet_csk_clear_xmit_timers(struct sock *sk)
750750
smp_store_release(&icsk->icsk_pending, 0);
751751
smp_store_release(&icsk->icsk_ack.pending, 0);
752752

753-
sk_stop_timer(sk, &icsk->icsk_retransmit_timer);
753+
sk_stop_timer(sk, &sk->tcp_retransmit_timer);
754754
sk_stop_timer(sk, &icsk->icsk_delack_timer);
755-
sk_stop_timer(sk, &sk->sk_timer);
755+
sk_stop_timer(sk, &icsk->icsk_keepalive_timer);
756756
}
757757

758758
void inet_csk_clear_xmit_timers_sync(struct sock *sk)
@@ -765,9 +765,9 @@ void inet_csk_clear_xmit_timers_sync(struct sock *sk)
765765
smp_store_release(&icsk->icsk_pending, 0);
766766
smp_store_release(&icsk->icsk_ack.pending, 0);
767767

768-
sk_stop_timer_sync(sk, &icsk->icsk_retransmit_timer);
768+
sk_stop_timer_sync(sk, &sk->tcp_retransmit_timer);
769769
sk_stop_timer_sync(sk, &icsk->icsk_delack_timer);
770-
sk_stop_timer_sync(sk, &sk->sk_timer);
770+
sk_stop_timer_sync(sk, &icsk->icsk_keepalive_timer);
771771
}
772772

773773
struct dst_entry *inet_csk_route_req(const struct sock *sk,

net/ipv4/inet_diag.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -287,17 +287,17 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
287287
r->idiag_timer = 1;
288288
r->idiag_retrans = READ_ONCE(icsk->icsk_retransmits);
289289
r->idiag_expires =
290-
jiffies_delta_to_msecs(icsk_timeout(icsk) - jiffies);
290+
jiffies_delta_to_msecs(tcp_timeout_expires(sk) - jiffies);
291291
} else if (icsk_pending == ICSK_TIME_PROBE0) {
292292
r->idiag_timer = 4;
293293
r->idiag_retrans = READ_ONCE(icsk->icsk_probes_out);
294294
r->idiag_expires =
295-
jiffies_delta_to_msecs(icsk_timeout(icsk) - jiffies);
296-
} else if (timer_pending(&sk->sk_timer)) {
295+
jiffies_delta_to_msecs(tcp_timeout_expires(sk) - jiffies);
296+
} else if (timer_pending(&icsk->icsk_keepalive_timer)) {
297297
r->idiag_timer = 2;
298298
r->idiag_retrans = READ_ONCE(icsk->icsk_probes_out);
299299
r->idiag_expires =
300-
jiffies_delta_to_msecs(sk->sk_timer.expires - jiffies);
300+
jiffies_delta_to_msecs(icsk->icsk_keepalive_timer.expires - jiffies);
301301
}
302302

303303
if ((ext & (1 << (INET_DIAG_INFO - 1))) && handler->idiag_info_size) {

net/ipv4/tcp_ipv4.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2869,13 +2869,13 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
28692869
icsk_pending == ICSK_TIME_REO_TIMEOUT ||
28702870
icsk_pending == ICSK_TIME_LOSS_PROBE) {
28712871
timer_active = 1;
2872-
timer_expires = icsk_timeout(icsk);
2872+
timer_expires = tcp_timeout_expires(sk);
28732873
} else if (icsk_pending == ICSK_TIME_PROBE0) {
28742874
timer_active = 4;
2875-
timer_expires = icsk_timeout(icsk);
2876-
} else if (timer_pending(&sk->sk_timer)) {
2875+
timer_expires = tcp_timeout_expires(sk);
2876+
} else if (timer_pending(&icsk->icsk_keepalive_timer)) {
28772877
timer_active = 2;
2878-
timer_expires = sk->sk_timer.expires;
2878+
timer_expires = icsk->icsk_keepalive_timer.expires;
28792879
} else {
28802880
timer_active = 0;
28812881
timer_expires = jiffies;

net/ipv4/tcp_timer.c

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -510,7 +510,7 @@ static bool tcp_rtx_probe0_timed_out(const struct sock *sk,
510510
* and tp->rcv_tstamp might very well have been written recently.
511511
* rcv_delta can thus be negative.
512512
*/
513-
rcv_delta = icsk_timeout(icsk) - tp->rcv_tstamp;
513+
rcv_delta = tcp_timeout_expires(sk) - tp->rcv_tstamp;
514514
if (rcv_delta <= timeout)
515515
return false;
516516

@@ -697,9 +697,9 @@ void tcp_write_timer_handler(struct sock *sk)
697697
!icsk->icsk_pending)
698698
return;
699699

700-
if (time_after(icsk_timeout(icsk), jiffies)) {
701-
sk_reset_timer(sk, &icsk->icsk_retransmit_timer,
702-
icsk_timeout(icsk));
700+
if (time_after(tcp_timeout_expires(sk), jiffies)) {
701+
sk_reset_timer(sk, &sk->tcp_retransmit_timer,
702+
tcp_timeout_expires(sk));
703703
return;
704704
}
705705
tcp_mstamp_refresh(tcp_sk(sk));
@@ -725,12 +725,10 @@ void tcp_write_timer_handler(struct sock *sk)
725725

726726
static void tcp_write_timer(struct timer_list *t)
727727
{
728-
struct inet_connection_sock *icsk =
729-
timer_container_of(icsk, t, icsk_retransmit_timer);
730-
struct sock *sk = &icsk->icsk_inet.sk;
728+
struct sock *sk = timer_container_of(sk, t, tcp_retransmit_timer);
731729

732730
/* Avoid locking the socket when there is no pending event. */
733-
if (!smp_load_acquire(&icsk->icsk_pending))
731+
if (!smp_load_acquire(&inet_csk(sk)->icsk_pending))
734732
goto out;
735733

736734
bh_lock_sock(sk);
@@ -755,12 +753,12 @@ void tcp_syn_ack_timeout(const struct request_sock *req)
755753

756754
void tcp_reset_keepalive_timer(struct sock *sk, unsigned long len)
757755
{
758-
sk_reset_timer(sk, &sk->sk_timer, jiffies + len);
756+
sk_reset_timer(sk, &inet_csk(sk)->icsk_keepalive_timer, jiffies + len);
759757
}
760758

761759
static void tcp_delete_keepalive_timer(struct sock *sk)
762760
{
763-
sk_stop_timer(sk, &sk->sk_timer);
761+
sk_stop_timer(sk, &inet_csk(sk)->icsk_keepalive_timer);
764762
}
765763

766764
void tcp_set_keepalive(struct sock *sk, int val)
@@ -777,8 +775,9 @@ EXPORT_IPV6_MOD_GPL(tcp_set_keepalive);
777775

778776
static void tcp_keepalive_timer(struct timer_list *t)
779777
{
780-
struct sock *sk = timer_container_of(sk, t, sk_timer);
781-
struct inet_connection_sock *icsk = inet_csk(sk);
778+
struct inet_connection_sock *icsk =
779+
timer_container_of(icsk, t, icsk_keepalive_timer);
780+
struct sock *sk = &icsk->icsk_inet.sk;
782781
struct tcp_sock *tp = tcp_sk(sk);
783782
u32 elapsed;
784783

net/ipv6/tcp_ipv6.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2163,13 +2163,13 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
21632163
icsk_pending == ICSK_TIME_REO_TIMEOUT ||
21642164
icsk_pending == ICSK_TIME_LOSS_PROBE) {
21652165
timer_active = 1;
2166-
timer_expires = icsk_timeout(icsk);
2166+
timer_expires = tcp_timeout_expires(sp);
21672167
} else if (icsk_pending == ICSK_TIME_PROBE0) {
21682168
timer_active = 4;
2169-
timer_expires = icsk_timeout(icsk);
2170-
} else if (timer_pending(&sp->sk_timer)) {
2169+
timer_expires = tcp_timeout_expires(sp);
2170+
} else if (timer_pending(&icsk->icsk_keepalive_timer)) {
21712171
timer_active = 2;
2172-
timer_expires = sp->sk_timer.expires;
2172+
timer_expires = icsk->icsk_keepalive_timer.expires;
21732173
} else {
21742174
timer_active = 0;
21752175
timer_expires = jiffies;

net/mptcp/protocol.c

Lines changed: 12 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -411,9 +411,7 @@ static bool __mptcp_move_skb(struct sock *sk, struct sk_buff *skb)
411411

412412
static void mptcp_stop_rtx_timer(struct sock *sk)
413413
{
414-
struct inet_connection_sock *icsk = inet_csk(sk);
415-
416-
sk_stop_timer(sk, &icsk->icsk_retransmit_timer);
414+
sk_stop_timer(sk, &sk->mptcp_retransmit_timer);
417415
mptcp_sk(sk)->timer_ival = 0;
418416
}
419417

@@ -519,7 +517,7 @@ static long mptcp_timeout_from_subflow(const struct mptcp_subflow_context *subfl
519517
const struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
520518

521519
return inet_csk(ssk)->icsk_pending && !subflow->stale_count ?
522-
icsk_timeout(inet_csk(ssk)) - jiffies : 0;
520+
tcp_timeout_expires(ssk) - jiffies : 0;
523521
}
524522

525523
static void mptcp_set_timeout(struct sock *sk)
@@ -954,20 +952,19 @@ static void __mptcp_flush_join_list(struct sock *sk, struct list_head *join_list
954952

955953
static bool mptcp_rtx_timer_pending(struct sock *sk)
956954
{
957-
return timer_pending(&inet_csk(sk)->icsk_retransmit_timer);
955+
return timer_pending(&sk->mptcp_retransmit_timer);
958956
}
959957

960958
static void mptcp_reset_rtx_timer(struct sock *sk)
961959
{
962-
struct inet_connection_sock *icsk = inet_csk(sk);
963960
unsigned long tout;
964961

965962
/* prevent rescheduling on close */
966963
if (unlikely(inet_sk_state_load(sk) == TCP_CLOSE))
967964
return;
968965

969966
tout = mptcp_sk(sk)->timer_ival;
970-
sk_reset_timer(sk, &icsk->icsk_retransmit_timer, jiffies + tout);
967+
sk_reset_timer(sk, &sk->mptcp_retransmit_timer, jiffies + tout);
971968
}
972969

973970
bool mptcp_schedule_work(struct sock *sk)
@@ -2354,9 +2351,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
23542351

23552352
static void mptcp_retransmit_timer(struct timer_list *t)
23562353
{
2357-
struct inet_connection_sock *icsk = timer_container_of(icsk, t,
2358-
icsk_retransmit_timer);
2359-
struct sock *sk = &icsk->icsk_inet.sk;
2354+
struct sock *sk = timer_container_of(sk, t, mptcp_retransmit_timer);
23602355
struct mptcp_sock *msk = mptcp_sk(sk);
23612356

23622357
bh_lock_sock(sk);
@@ -2374,7 +2369,9 @@ static void mptcp_retransmit_timer(struct timer_list *t)
23742369

23752370
static void mptcp_tout_timer(struct timer_list *t)
23762371
{
2377-
struct sock *sk = timer_container_of(sk, t, sk_timer);
2372+
struct inet_connection_sock *icsk =
2373+
timer_container_of(icsk, t, mptcp_tout_timer);
2374+
struct sock *sk = &icsk->icsk_inet.sk;
23782375

23792376
mptcp_schedule_work(sk);
23802377
sock_put(sk);
@@ -2828,7 +2825,7 @@ void mptcp_reset_tout_timer(struct mptcp_sock *msk, unsigned long fail_tout)
28282825
*/
28292826
timeout = inet_csk(sk)->icsk_mtup.probe_timestamp ? close_timeout : fail_tout;
28302827

2831-
sk_reset_timer(sk, &sk->sk_timer, timeout);
2828+
sk_reset_timer(sk, &inet_csk(sk)->mptcp_tout_timer, timeout);
28322829
}
28332830

28342831
static void mptcp_mp_fail_no_response(struct mptcp_sock *msk)
@@ -2973,8 +2970,8 @@ static void __mptcp_init_sock(struct sock *sk)
29732970
spin_lock_init(&msk->fallback_lock);
29742971

29752972
/* re-use the csk retrans timer for MPTCP-level retrans */
2976-
timer_setup(&msk->sk.icsk_retransmit_timer, mptcp_retransmit_timer, 0);
2977-
timer_setup(&sk->sk_timer, mptcp_tout_timer, 0);
2973+
timer_setup(&sk->mptcp_retransmit_timer, mptcp_retransmit_timer, 0);
2974+
timer_setup(&msk->sk.mptcp_tout_timer, mptcp_tout_timer, 0);
29782975
}
29792976

29802977
static void mptcp_ca_reset(struct sock *sk)
@@ -3176,7 +3173,7 @@ static void __mptcp_destroy_sock(struct sock *sk)
31763173
might_sleep();
31773174

31783175
mptcp_stop_rtx_timer(sk);
3179-
sk_stop_timer(sk, &sk->sk_timer);
3176+
sk_stop_timer(sk, &inet_csk(sk)->mptcp_tout_timer);
31803177
msk->pm.status = 0;
31813178
mptcp_release_sched(msk);
31823179

0 commit comments

Comments
 (0)