Skip to content

Commit 1c2d0bd

Browse files
committed
dialog: atomic state transition to close BYE/timer races (GH-3835)
The previous fix (196b51f) narrowed the race window between BYE processing and dlg_ontimeout() but could not fully close it: a BYE arriving between the state check and dlg_end_dlg() creates a TM transaction ref (via dlg_set_tm_dialog_ctx) that outlives the dialog. When the timeout-BYE responses destroy the dialog first, the real BYE's transaction callback dereferences freed memory. The same race pattern exists in dlg_options_routine() and dlg_reinvite_routine(), where expired ping dialogs call dlg_end_dlg() without any state transition. Fix: replace racy state checks with atomic next_state_dlg(DLG_EVENT_REQBYE) calls. The hash entry lock guarantees only one code path -- timer, ping handler, or BYE handler -- wins the CONFIRMED -> DELETED transition. Since dual_bye_event() now only sees DELETED -> DELETED for the timeout-BYE responses, the cleanup that it would normally perform on the first CONFIRMED -> DELETED transition (rt_on_hangup, profile linkers, DLGCB_TERMINATED callback, DB removal) is moved into the winning timeout/ping path. Three paths fixed: - dlg_ontimeout() bye_on_timeout (dlg_handlers.c) - dlg_options_routine() expired pings (dlg_timer.c) - dlg_reinvite_routine() expired pings (dlg_timer.c) Ref: #3835
1 parent 8f959e7 commit 1c2d0bd

File tree

2 files changed

+124
-26
lines changed

2 files changed

+124
-26
lines changed

modules/dialog/dlg_handlers.c

Lines changed: 63 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2670,22 +2670,71 @@ void dlg_ontimeout(struct dlg_tl *tl)
26702670
if ((dlg->flags&DLG_FLAG_BYEONTIMEOUT) &&
26712671
(dlg->state==DLG_STATE_CONFIRMED_NA || dlg->state==DLG_STATE_CONFIRMED)) {
26722672

2673-
if (do_expire_actions) {
2674-
if (dlg->flags & DLG_FLAG_RACE_CONDITION_OCCURRED)
2675-
init_dlg_term_reason(dlg,"SIP Race Condition",
2676-
sizeof("SIP Race Condition")-1);
2677-
else
2678-
init_dlg_term_reason(dlg,"Lifetime Timeout",
2679-
sizeof("Lifetime Timeout")-1);
2673+
/* Atomically claim the CONFIRMED -> DELETED transition (GH-3835).
2674+
* Without this, a BYE arriving between the state check above and
2675+
* dlg_end_dlg() below creates a TM transaction ref that outlives
2676+
* the dialog, causing use-after-free ("bogus ref -1").
2677+
* next_state_dlg holds the hash lock during the transition, so
2678+
* only one code path (timer or BYE handler) can win. */
2679+
next_state_dlg(dlg, DLG_EVENT_REQBYE, DLG_DIR_DOWNSTREAM,
2680+
&old_state, &new_state, &unref,
2681+
dlg->legs_no[DLG_LEG_200OK], do_expire_actions);
2682+
2683+
if (new_state == DLG_STATE_DELETED &&
2684+
old_state != DLG_STATE_DELETED) {
2685+
/* We won the transition -- handle cleanup and send BYEs.
2686+
* dual_bye_event() will see DELETED->DELETED for both BYE
2687+
* responses, so cleanup must happen here. */
2688+
2689+
if (do_expire_actions) {
2690+
if (dlg->flags & DLG_FLAG_RACE_CONDITION_OCCURRED)
2691+
init_dlg_term_reason(dlg,"SIP Race Condition",
2692+
sizeof("SIP Race Condition")-1);
2693+
else
2694+
init_dlg_term_reason(dlg,"Lifetime Timeout",
2695+
sizeof("Lifetime Timeout")-1);
2696+
}
2697+
2698+
if (ref_script_route_check_and_update(dlg->rt_on_hangup))
2699+
run_dlg_script_route(dlg, dlg->rt_on_hangup->idx);
2700+
2701+
destroy_linkers(dlg);
2702+
remove_dlg_prof_table(dlg, do_expire_actions);
2703+
2704+
/* fire DLGCB_TERMINATED -- same callback dual_bye_event
2705+
* would normally fire on the first BYE response */
2706+
if (push_new_processing_context(dlg, &old_ctx,
2707+
&new_ctx, &fake_msg) == 0) {
2708+
if (do_expire_actions)
2709+
run_dlg_callbacks(DLGCB_TERMINATED, dlg,
2710+
fake_msg, DLG_DIR_NONE, -1,
2711+
NULL, 0, do_expire_actions);
2712+
if (current_processing_ctx == NULL)
2713+
*new_ctx = NULL;
2714+
else
2715+
context_destroy(CONTEXT_GLOBAL, *new_ctx);
2716+
set_global_context(old_ctx);
2717+
release_dummy_sip_msg(fake_msg);
2718+
}
2719+
2720+
if (should_remove_dlg_db())
2721+
remove_dialog_from_db(dlg);
2722+
2723+
/* send BYEs in both directions */
2724+
dlg_end_dlg(dlg, NULL, do_expire_actions);
2725+
2726+
/* release timer ref (1) + hash ref from
2727+
* next_state_dlg (unref) */
2728+
unref_dlg(dlg, unref + 1);
2729+
2730+
if_update_stat(dlg_enable_stats, expired_dlgs, 1);
2731+
if_update_stat(dlg_enable_stats, active_dlgs, -1);
2732+
return;
26802733
}
2681-
/* we just send the BYEs in both directions */
2682-
dlg_end_dlg(dlg, NULL, do_expire_actions);
2683-
/* dialog is no longer refed by timer; from now on it is refed
2684-
by the send_bye functions */
2685-
unref_dlg(dlg, 1);
2686-
/* is not 100% sure, but do it */
2687-
if_update_stat(dlg_enable_stats, expired_dlgs, 1);
26882734

2735+
/* Lost the race -- a BYE handler already transitioned this
2736+
* dialog to DELETED and owns cleanup. Release timer ref. */
2737+
unref_dlg(dlg, 1);
26892738
return;
26902739
}
26912740

modules/dialog/dlg_timer.c

Lines changed: 61 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,10 @@
2626
#include "dlg_hash.h"
2727
#include "dlg_req_within.h"
2828
#include "dlg_replication.h"
29+
#include "dlg_handlers.h"
30+
#include "dlg_profile.h"
31+
#include "dlg_cb.h"
32+
#include "dlg_db_handler.h"
2933

3034
struct dlg_timer *d_timer = 0;
3135
dlg_timer_handler timer_hdl = 0;
@@ -888,6 +892,57 @@ void unref_dlg_cb(void *dlg)
888892
unref_dlg_destroy_safe((struct dlg_cell*)dlg,1);
889893
}
890894

895+
/* Atomically terminate a dialog from a ping/timer context (GH-3835).
896+
* Claims the CONFIRMED->DELETED transition via next_state_dlg so that
897+
* a concurrent BYE cannot race. Returns 1 if the dialog was terminated,
898+
* 0 if another code path already owns it. */
899+
static int dlg_ping_terminate(struct dlg_cell *dlg)
900+
{
901+
int old_state, new_state, unref;
902+
struct sip_msg *fake_msg = NULL;
903+
context_p old_ctx;
904+
context_p *new_ctx;
905+
906+
next_state_dlg(dlg, DLG_EVENT_REQBYE, DLG_DIR_DOWNSTREAM,
907+
&old_state, &new_state, &unref,
908+
dlg->legs_no[DLG_LEG_200OK], 1);
909+
910+
if (new_state != DLG_STATE_DELETED || old_state == DLG_STATE_DELETED) {
911+
/* Lost the race -- a BYE handler already owns this dialog */
912+
unref_dlg(dlg, 1);
913+
return 0;
914+
}
915+
916+
/* Won the transition -- run cleanup before sending BYEs */
917+
if (ref_script_route_check_and_update(dlg->rt_on_hangup))
918+
run_dlg_script_route(dlg, dlg->rt_on_hangup->idx);
919+
920+
destroy_linkers(dlg);
921+
remove_dlg_prof_table(dlg, 1);
922+
923+
if (push_new_processing_context(dlg, &old_ctx, &new_ctx, &fake_msg) == 0) {
924+
run_dlg_callbacks(DLGCB_TERMINATED, dlg, fake_msg,
925+
DLG_DIR_NONE, -1, NULL, 0, 1);
926+
if (current_processing_ctx == NULL)
927+
*new_ctx = NULL;
928+
else
929+
context_destroy(CONTEXT_GLOBAL, *new_ctx);
930+
set_global_context(old_ctx);
931+
release_dummy_sip_msg(fake_msg);
932+
}
933+
934+
if (should_remove_dlg_db())
935+
remove_dialog_from_db(dlg);
936+
937+
dlg_end_dlg(dlg, NULL, 1);
938+
939+
/* ping list ref (1) + hash ref from next_state_dlg (unref) */
940+
unref_dlg(dlg, unref + 1);
941+
942+
if_update_stat(dlg_enable_stats, active_dlgs, -1);
943+
return 1;
944+
}
945+
891946
void dlg_options_routine(unsigned int ticks , void * attr)
892947
{
893948
struct dlg_ping_list *expired,*to_be_deleted,*it,*curr,*next;
@@ -915,12 +970,9 @@ void dlg_options_routine(unsigned int ticks , void * attr)
915970
dlg->legs[callee_idx(dlg)].reply_received);
916971
init_dlg_term_reason(dlg, MI_SSTR("Ping Timeout"));
917972
}
918-
/* FIXME - maybe better not to send BYE both ways as we know for
919-
* sure one end in down . */
920-
dlg_end_dlg(dlg,0,1);
921-
922-
/* no longer reffed in list */
923-
unref_dlg(dlg,1);
973+
/* Atomically claim CONFIRMED->DELETED before sending BYEs
974+
* to prevent race with concurrent BYE (GH-3835) */
975+
dlg_ping_terminate(dlg);
924976
}
925977

926978
it = to_be_deleted;
@@ -1022,12 +1074,9 @@ void dlg_reinvite_routine(unsigned int ticks , void * attr)
10221074
dlg->legs[callee_idx(dlg)].reinvite_confirmed);
10231075
init_dlg_term_reason(dlg, MI_SSTR("ReINVITE Ping Timeout"));
10241076
}
1025-
/* FIXME - maybe better not to send BYE both ways as we know for
1026-
* sure one end in down . */
1027-
dlg_end_dlg(dlg,0,1);
1028-
1029-
/* no longer reffed in list */
1030-
unref_dlg(dlg,1);
1077+
/* Atomically claim CONFIRMED->DELETED before sending BYEs
1078+
* to prevent race with concurrent BYE (GH-3835) */
1079+
dlg_ping_terminate(dlg);
10311080
}
10321081

10331082
it = to_be_deleted;

0 commit comments

Comments
 (0)