[Asterisk-code-review] chan sip.c: Fix autokillid deadlock potential. (asterisk[master])

Anonymous Coward asteriskteam at digium.com
Thu Mar 17 08:19:32 CDT 2016


Anonymous Coward #1000019 has submitted this change and it was merged.

Change subject: chan_sip.c: Fix autokillid deadlock potential.
......................................................................


chan_sip.c: Fix autokillid deadlock potential.

This patch is part of a series to resolve deadlocks in chan_sip.c.

Stopping a scheduled event can result in a deadlock if the scheduled event
is running when you try to stop the event.  If you hold a lock needed by
the scheduled event while trying to stop the scheduled event then a
deadlock can happen.  The general strategy for resolving the deadlock
potential is to push the actual starting and stopping of the scheduled
events off onto the scheduler/do_monitor() thread by scheduling an
immediate one shot scheduled event.  Some restructuring may be needed
because the code may assume that the start/stop of the scheduled events is
immediate.

* Fix clearing autokillid in __sip_autodestruct() even though we could
reschedule.

ASTERISK-25023

Change-Id: I450580dbf26e2e3952ee6628c735b001565c368f
---
M channels/chan_sip.c
M channels/sip/include/dialog.h
2 files changed, 161 insertions(+), 84 deletions(-)

Approvals:
  Anonymous Coward #1000019: Verified
  Joshua Colp: Looks good to me, approved



diff --git a/channels/chan_sip.c b/channels/chan_sip.c
index d17f651..edcd66e 100644
--- a/channels/chan_sip.c
+++ b/channels/chan_sip.c
@@ -4259,6 +4259,7 @@
 	/* If this is a subscription, tell the phone that we got a timeout */
 	if (p->subscribed && p->subscribed != MWI_NOTIFICATION && p->subscribed != CALL_COMPLETION) {
 		struct state_notify_data data = { 0, };
+
 		data.state = AST_EXTENSION_DEACTIVATED;
 
 		transmit_state_notify(p, &data, 1, TRUE);	/* Send last notification */
@@ -4272,6 +4273,7 @@
 	if (p->packets) {
 		if (!p->needdestroy) {
 			char method_str[31];
+
 			ast_debug(3, "Re-scheduled destruction of SIP call %s\n", p->callid ? p->callid : "<unknown>");
 			append_history(p, "ReliableXmit", "timeout");
 			if (sscanf(p->lastmsg, "Tx: %30s", method_str) == 1 || sscanf(p->lastmsg, "Rx: %30s", method_str) == 1) {
@@ -4286,65 +4288,117 @@
 		}
 	}
 
-	/* Reset schedule ID */
-	p->autokillid = -1;
-
 	/*
 	 * Lock both the pvt and the channel safely so that we can queue up a frame.
 	 */
 	owner = sip_pvt_lock_full(p);
 	if (owner) {
-		ast_log(LOG_WARNING, "Autodestruct on dialog '%s' with owner %s in place (Method: %s). Rescheduling destruction for 10000 ms\n", p->callid, ast_channel_name(owner), sip_methods[p->method].text);
+		ast_log(LOG_WARNING,
+			"Autodestruct on dialog '%s' with owner %s in place (Method: %s). Rescheduling destruction for 10000 ms\n",
+			p->callid, ast_channel_name(owner), sip_methods[p->method].text);
 		ast_queue_hangup_with_cause(owner, AST_CAUSE_PROTOCOL_ERROR);
 		ast_channel_unlock(owner);
 		ast_channel_unref(owner);
 		sip_pvt_unlock(p);
 		return 10000;
-	} else if (p->refer && !p->alreadygone) {
+	}
+
+	/* Reset schedule ID */
+	p->autokillid = -1;
+
+	if (p->refer && !p->alreadygone) {
 		ast_debug(3, "Finally hanging up channel after transfer: %s\n", p->callid);
 		stop_media_flows(p);
 		transmit_request_with_auth(p, SIP_BYE, 0, XMIT_RELIABLE, 1);
 		append_history(p, "ReferBYE", "Sending BYE on transferer call leg %s", p->callid);
 		sip_scheddestroy(p, DEFAULT_TRANS_TIMEOUT);
+		sip_pvt_unlock(p);
 	} else {
 		append_history(p, "AutoDestroy", "%s", p->callid);
 		ast_debug(3, "Auto destroying SIP dialog '%s'\n", p->callid);
 		sip_pvt_unlock(p);
 		dialog_unlink_all(p); /* once it's unlinked and unrefd everywhere, it'll be freed automagically */
-		sip_pvt_lock(p);
-		/* dialog_unref(p, "unref dialog-- no other matching conditions"); -- unlink all now should finish off the dialog's references and free it. */
-		/* sip_destroy(p); */		/* Go ahead and destroy dialog. All attempts to recover is done */
-		/* sip_destroy also absorbs the reference */
 	}
 
-	sip_pvt_unlock(p);
-
-	dialog_unref(p, "The ref to a dialog passed to this sched callback is going out of scope; unref it.");
+	dialog_unref(p, "autokillid complete");
 
 	return 0;
 }
 
-/*! \brief Schedule final destruction of SIP dialog.  This can not be canceled.
- *  This function is used to keep a dialog around for a period of time in order
- *  to properly respond to any retransmits. */
-void sip_scheddestroy_final(struct sip_pvt *p, int ms)
+static void do_cancel_destroy(struct sip_pvt *pvt)
 {
-	if (p->final_destruction_scheduled) {
-		return; /* already set final destruction */
-	}
-
-	sip_scheddestroy(p, ms);
-	if (p->autokillid != -1) {
-		p->final_destruction_scheduled = 1;
+	if (-1 < pvt->autokillid) {
+		append_history(pvt, "CancelDestroy", "");
+		AST_SCHED_DEL_UNREF(sched, pvt->autokillid,
+			dialog_unref(pvt, "Stop scheduled autokillid"));
 	}
 }
 
-/*! \brief Schedule destruction of SIP dialog */
-void sip_scheddestroy(struct sip_pvt *p, int ms)
+/* Run by the sched thread. */
+static int __sip_cancel_destroy(const void *data)
 {
-	if (p->final_destruction_scheduled) {
-		return; /* already set final destruction */
+	struct sip_pvt *pvt = (void *) data;
+
+	sip_pvt_lock(pvt);
+	do_cancel_destroy(pvt);
+	sip_pvt_unlock(pvt);
+	dialog_unref(pvt, "Cancel destroy action");
+	return 0;
+}
+
+void sip_cancel_destroy(struct sip_pvt *pvt)
+{
+	if (pvt->final_destruction_scheduled) {
+		return;
 	}
+
+	dialog_ref(pvt, "Cancel destroy action");
+	if (ast_sched_add(sched, 0, __sip_cancel_destroy, pvt) < 0) {
+		/* Uh Oh.  Expect bad behavior. */
+		dialog_unref(pvt, "Failed to schedule cancel destroy action");
+		ast_log(LOG_WARNING, "Unable to cancel SIP destruction.  Expect bad things.\n");
+	}
+}
+
+struct sip_scheddestroy_data {
+	struct sip_pvt *pvt;
+	int ms;
+};
+
+/* Run by the sched thread. */
+static int __sip_scheddestroy(const void *data)
+{
+	struct sip_scheddestroy_data *sched_data = (void *) data;
+	struct sip_pvt *pvt = sched_data->pvt;
+	int ms = sched_data->ms;
+
+	ast_free(sched_data);
+
+	sip_pvt_lock(pvt);
+	do_cancel_destroy(pvt);
+
+	if (pvt->do_history) {
+		append_history(pvt, "SchedDestroy", "%d ms", ms);
+	}
+
+	dialog_ref(pvt, "Schedule autokillid");
+	pvt->autokillid = ast_sched_add(sched, ms, __sip_autodestruct, pvt);
+	if (pvt->autokillid < 0) {
+		/* Uh Oh.  Expect bad behavior. */
+		dialog_unref(pvt, "Failed to schedule autokillid");
+	}
+
+	if (pvt->stimer) {
+		stop_session_timer(pvt);
+	}
+	sip_pvt_unlock(pvt);
+	dialog_unref(pvt, "Destroy action");
+	return 0;
+}
+
+static int sip_scheddestroy_full(struct sip_pvt *p, int ms)
+{
+	struct sip_scheddestroy_data *sched_data;
 
 	if (ms < 0) {
 		if (p->timer_t1 == 0) {
@@ -4356,37 +4410,44 @@
 		ms = p->timer_t1 * 64;
 	}
 	if (sip_debug_test_pvt(p)) {
-		ast_verbose("Scheduling destruction of SIP dialog '%s' in %d ms (Method: %s)\n", p->callid, ms, sip_methods[p->method].text);
-	}
-	if (sip_cancel_destroy(p)) {
-		ast_log(LOG_WARNING, "Unable to cancel SIP destruction.  Expect bad things.\n");
+		ast_verbose("Scheduling destruction of SIP dialog '%s' in %d ms (Method: %s)\n",
+			p->callid, ms, sip_methods[p->method].text);
 	}
 
-	if (p->do_history) {
-		append_history(p, "SchedDestroy", "%d ms", ms);
+	sched_data = ast_malloc(sizeof(*sched_data));
+	if (!sched_data) {
+		/* Uh Oh.  Expect bad behavior. */
+		return -1;
 	}
-	p->autokillid = ast_sched_add(sched, ms, __sip_autodestruct, dialog_ref(p, "setting ref as passing into ast_sched_add for __sip_autodestruct"));
-
-	if (p->stimer && p->stimer->st_active == TRUE && p->stimer->st_schedid > -1) {
-		stop_session_timer(p);
-	}
-}
-
-/*! \brief Cancel destruction of SIP dialog.
- * Be careful as this also absorbs the reference - if you call it
- * from within the scheduler, this might be the last reference.
- */
-int sip_cancel_destroy(struct sip_pvt *p)
-{
-	if (p->final_destruction_scheduled) {
-		return 0;
-	}
-
-	if (p->autokillid > -1) {
-		append_history(p, "CancelDestroy", "");
-		AST_SCHED_DEL_UNREF(sched, p->autokillid, dialog_unref(p, "remove ref for autokillid"));
+	sched_data->pvt = p;
+	sched_data->ms = ms;
+	dialog_ref(p, "Destroy action");
+	if (ast_sched_add(sched, 0, __sip_scheddestroy, sched_data) < 0) {
+		/* Uh Oh.  Expect bad behavior. */
+		dialog_unref(p, "Failed to schedule destroy action");
+		ast_free(sched_data);
+		return -1;
 	}
 	return 0;
+}
+
+void sip_scheddestroy(struct sip_pvt *p, int ms)
+{
+	if (p->final_destruction_scheduled) {
+		return; /* already set final destruction */
+	}
+	sip_scheddestroy_full(p, ms);
+}
+
+void sip_scheddestroy_final(struct sip_pvt *p, int ms)
+{
+	if (p->final_destruction_scheduled) {
+		return; /* already set final destruction */
+	}
+
+	if (!sip_scheddestroy_full(p, ms)) {
+		p->final_destruction_scheduled = 1;
+	}
 }
 
 /*! \brief Acknowledges receipt of a packet and stops retransmission
@@ -7166,9 +7227,8 @@
 				ast_set_flag(&p->flags[0], SIP_PENDINGBYE);
 				ast_clear_flag(&p->flags[0], SIP_NEEDREINVITE);
 				AST_SCHED_DEL_UNREF(sched, p->waitid, dialog_unref(p, "when you delete the waitid sched, you should dec the refcount for the stored dialog ptr"));
-				if (sip_cancel_destroy(p)) {
-					ast_log(LOG_WARNING, "Unable to cancel SIP destruction.  Expect bad things.\n");
-				}
+				sip_cancel_destroy(p);
+
 				/* If we have an ongoing reinvite, there is a chance that we have gotten a provisional
 				 * response, but something weird has happened and we will never receive a final response.
 				 * So, just in case, check for pending actions after a bit of time to trigger the pending
@@ -17373,8 +17433,7 @@
 			ast_copy_flags(&p->flags[0], &peer->flags[0], SIP_NAT_FORCE_RPORT);
 
 			if (!(res = check_auth(p, req, peer->name, peer->secret, peer->md5secret, SIP_REGISTER, uri2, XMIT_UNRELIABLE))) {
-				if (sip_cancel_destroy(p))
-					ast_log(LOG_WARNING, "Unable to cancel SIP destruction.  Expect bad things.\n");
+				sip_cancel_destroy(p);
 
 				if (check_request_transport(peer, req)) {
 					ast_set_flag(&p->flags[0], SIP_PENDINGBYE);
@@ -17428,8 +17487,7 @@
 				ao2_t_link(peers_by_ip, peer, "link peer into peers-by-ip table");
 			}
 			ao2_lock(peer);
-			if (sip_cancel_destroy(p))
-				ast_log(LOG_WARNING, "Unable to cancel SIP destruction.  Expect bad things.\n");
+			sip_cancel_destroy(p);
 			switch (parse_register_contact(p, peer, req)) {
 			case PARSE_REGISTER_DENIED:
 				ast_log(LOG_WARNING, "Registration denied because of contact ACL\n");
@@ -23189,16 +23247,16 @@
 	switch (resp) {
 	case 100:	/* Trying */
 	case 101:	/* Dialog establishment */
-		if (!req->ignore && p->invitestate != INV_CANCELLED && sip_cancel_destroy(p)) {
-			ast_log(LOG_WARNING, "Unable to cancel SIP destruction.  Expect bad things.\n");
+		if (!req->ignore && p->invitestate != INV_CANCELLED) {
+			sip_cancel_destroy(p);
 		}
 		check_pendings(p);
 		break;
 
 	case 180:	/* 180 Ringing */
 	case 182:       /* 182 Queued */
-		if (!req->ignore && p->invitestate != INV_CANCELLED && sip_cancel_destroy(p)) {
-			ast_log(LOG_WARNING, "Unable to cancel SIP destruction.  Expect bad things.\n");
+		if (!req->ignore && p->invitestate != INV_CANCELLED) {
+			sip_cancel_destroy(p);
 		}
 		/* Store Route-set from provisional SIP responses so
 		 * early-dialog request can be routed properly
@@ -23254,8 +23312,9 @@
 		break;
 
 	case 181:	/* Call Is Being Forwarded */
-		if (!req->ignore && (p->invitestate != INV_CANCELLED) && sip_cancel_destroy(p))
-			ast_log(LOG_WARNING, "Unable to cancel SIP destruction.  Expect bad things.\n");
+		if (!req->ignore && p->invitestate != INV_CANCELLED) {
+			sip_cancel_destroy(p);
+		}
 		/* Store Route-set from provisional SIP responses so
 		 * early-dialog request can be routed properly
 		 * */
@@ -23286,8 +23345,8 @@
 		break;
 
 	case 183:	/* Session progress */
-		if (!req->ignore && (p->invitestate != INV_CANCELLED) && sip_cancel_destroy(p)) {
-			ast_log(LOG_WARNING, "Unable to cancel SIP destruction.  Expect bad things.\n");
+		if (!req->ignore && p->invitestate != INV_CANCELLED) {
+			sip_cancel_destroy(p);
 		}
 		/* Store Route-set from provisional SIP responses so
 		 * early-dialog request can be routed properly
@@ -23347,8 +23406,8 @@
 		break;
 
 	case 200:	/* 200 OK on invite - someone's answering our call */
-		if (!req->ignore && (p->invitestate != INV_CANCELLED) && sip_cancel_destroy(p)) {
-			ast_log(LOG_WARNING, "Unable to cancel SIP destruction.  Expect bad things.\n");
+		if (!req->ignore && p->invitestate != INV_CANCELLED) {
+			sip_cancel_destroy(p);
 		}
 		p->authtries = 0;
 		if (find_sdp(req)) {
@@ -24624,8 +24683,9 @@
 				}
 			} else if ((resp >= 100) && (resp < 200)) {
 				if (sipmethod == SIP_INVITE) {
-					if (!req->ignore && sip_cancel_destroy(p))
-						ast_log(LOG_WARNING, "Unable to cancel SIP destruction.  Expect bad things.\n");
+					if (!req->ignore) {
+						sip_cancel_destroy(p);
+					}
 					if (find_sdp(req))
 						process_sdp(p, req, SDP_T38_NONE, FALSE);
 					if (p->owner) {
@@ -24691,8 +24751,9 @@
 		default:	/* Errors without handlers */
 			if ((resp >= 100) && (resp < 200)) {
 				if (sipmethod == SIP_INVITE) {	/* re-invite */
-					if (!req->ignore && sip_cancel_destroy(p))
-						ast_log(LOG_WARNING, "Unable to cancel SIP destruction.  Expect bad things.\n");
+					if (!req->ignore) {
+						sip_cancel_destroy(p);
+					}
 				}
 			} else if ((resp >= 200) && (resp < 300)) { /* on any unrecognized 2XX response do the following */
 				if (sipmethod == SIP_INVITE) {
@@ -24709,10 +24770,10 @@
 				case 502: /* Bad gateway */
 				case 503: /* Service Unavailable */
 				case 504: /* Server timeout */
-
 					/* re-invite failed */
-					if (sipmethod == SIP_INVITE && sip_cancel_destroy(p))
-						ast_log(LOG_WARNING, "Unable to cancel SIP destruction.  Expect bad things.\n");
+					if (sipmethod == SIP_INVITE) {
+						sip_cancel_destroy(p);
+					}
 					break;
 				}
 			}
@@ -25660,8 +25721,8 @@
 	if (!req->ignore) {
 		int newcall = (p->initreq.headers ? TRUE : FALSE);
 
-		if (sip_cancel_destroy(p))
-			ast_log(LOG_WARNING, "Unable to cancel SIP destruction.  Expect bad things.\n");
+		sip_cancel_destroy(p);
+
 		/* This also counts as a pending invite */
 		p->pendinginvite = seqno;
 		check_via(p, req);
@@ -27935,10 +27996,13 @@
 						action, p->exten, p->context, p->username);
 			}
 		}
-		if (p->autokillid > -1 && sip_cancel_destroy(p))	/* Remove subscription expiry for renewals */
-			ast_log(LOG_WARNING, "Unable to cancel SIP destruction.  Expect bad things.\n");
-		if (p->expiry > 0)
-			sip_scheddestroy(p, (p->expiry + 10) * 1000);	/* Set timer for destruction of call at expiration */
+
+		/* Remove subscription expiry for renewals */
+		sip_cancel_destroy(p);
+		if (p->expiry > 0) {
+			/* Set timer for destruction of call at expiration */
+			sip_scheddestroy(p, (p->expiry + 10) * 1000);
+		}
 
 		if (p->subscribed == MWI_NOTIFICATION) {
 			ast_set_flag(&p->flags[1], SIP_PAGE2_DIALOG_ESTABLISHED);
diff --git a/channels/sip/include/dialog.h b/channels/sip/include/dialog.h
index f6adea5..0694170 100644
--- a/channels/sip/include/dialog.h
+++ b/channels/sip/include/dialog.h
@@ -39,9 +39,22 @@
 #define sip_alloc(callid, addr, useglobal_nat, intended_method, req, logger_callid) \
 	__sip_alloc(callid, addr, useglobal_nat, intended_method, req, logger_callid, __FILE__, __LINE__, __PRETTY_FUNCTION__)
 
+/*!
+ * \brief Schedule final destruction of SIP dialog.
+ *
+ * \note This cannot be canceled.
+ *
+ * \details
+ * This function is used to keep a dialog around for a period of time in order
+ * to properly respond to any retransmits.
+ */
 void sip_scheddestroy_final(struct sip_pvt *p, int ms);
+
+/*! \brief Schedule destruction of SIP dialog */
 void sip_scheddestroy(struct sip_pvt *p, int ms);
-int sip_cancel_destroy(struct sip_pvt *p);
+
+/*! \brief Cancel destruction of SIP dialog. */
+void sip_cancel_destroy(struct sip_pvt *pvt);
 
 /*!
  * \brief Unlink a dialog from the dialogs container, as well as any other places

-- 
To view, visit https://gerrit.asterisk.org/2409
To unsubscribe, visit https://gerrit.asterisk.org/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I450580dbf26e2e3952ee6628c735b001565c368f
Gerrit-PatchSet: 2
Gerrit-Project: asterisk
Gerrit-Branch: master
Gerrit-Owner: Richard Mudgett <rmudgett at digium.com>
Gerrit-Reviewer: Anonymous Coward #1000019
Gerrit-Reviewer: Joshua Colp <jcolp at digium.com>



More information about the asterisk-code-review mailing list