[Asterisk-code-review] stasis/control: Fix possible deadlock with swap channel (asterisk[15])

Jenkins2 asteriskteam at digium.com
Thu Sep 7 07:18:42 CDT 2017


Jenkins2 has submitted this change and it was merged. ( https://gerrit.asterisk.org/6368 )

Change subject: stasis/control:  Fix possible deadlock with swap channel
......................................................................

stasis/control:  Fix possible deadlock with swap channel

If an error occurs during a bridge impart it's possible that
the "bridge_after" callback might try to run before
control_swap_channel_in_bridge has been signalled to continue.
Since control_swap_channel_in_bridge is holding the control lock
and the callback needs it, a deadlock will occur.

* control_swap_channel_in_bridge now only holds the control
  lock while it's actually modifying the control structure and
  releases it while the bridge impart is running.
* bridge_after_cb is now tolerant of impart failures.

Change-Id: Ifd239aa93955b3eb475521f61e284fcb0da2c3b3
---
M include/asterisk/bridge_after.h
M main/bridge.c
M main/bridge_after.c
M res/stasis/control.c
4 files changed, 96 insertions(+), 64 deletions(-)

Approvals:
  Richard Mudgett: Looks good to me, but someone else must approve
  Joshua Colp: Looks good to me, but someone else must approve
  George Joseph: Looks good to me, approved
  Jenkins2: Approved for Submit



diff --git a/include/asterisk/bridge_after.h b/include/asterisk/bridge_after.h
index 53f30b9..0451685 100644
--- a/include/asterisk/bridge_after.h
+++ b/include/asterisk/bridge_after.h
@@ -45,6 +45,8 @@
 	AST_BRIDGE_AFTER_CB_REASON_DEPART,
 	/*! Was explicitly removed by external code. */
 	AST_BRIDGE_AFTER_CB_REASON_REMOVED,
+	/*! The channel failed to enter the bridge. */
+	AST_BRIDGE_AFTER_CB_REASON_IMPART_FAILED,
 };
 
 /*!
diff --git a/main/bridge.c b/main/bridge.c
index b732d5f..ab12ecf 100644
--- a/main/bridge.c
+++ b/main/bridge.c
@@ -1758,12 +1758,13 @@
 static void *bridge_channel_depart_thread(void *data)
 {
 	struct ast_bridge_channel *bridge_channel = data;
+	int res = 0;
 
 	if (bridge_channel->callid) {
 		ast_callid_threadassoc_add(bridge_channel->callid);
 	}
 
-	bridge_channel_internal_join(bridge_channel);
+	res = bridge_channel_internal_join(bridge_channel);
 
 	/*
 	 * cleanup
@@ -1775,7 +1776,8 @@
 	ast_bridge_features_destroy(bridge_channel->features);
 	bridge_channel->features = NULL;
 
-	ast_bridge_discard_after_callback(bridge_channel->chan, AST_BRIDGE_AFTER_CB_REASON_DEPART);
+	ast_bridge_discard_after_callback(bridge_channel->chan,
+		res ? AST_BRIDGE_AFTER_CB_REASON_IMPART_FAILED : AST_BRIDGE_AFTER_CB_REASON_DEPART);
 	/* If join failed there will be impart threads waiting. */
 	bridge_channel_impart_signal(bridge_channel->chan);
 	ast_bridge_discard_after_goto(bridge_channel->chan);
diff --git a/main/bridge_after.c b/main/bridge_after.c
index d4aec75..69c8f98 100644
--- a/main/bridge_after.c
+++ b/main/bridge_after.c
@@ -293,23 +293,23 @@
 	return 0;
 }
 
-const char *reason_strings[] = {
-	[AST_BRIDGE_AFTER_CB_REASON_DESTROY] = "Channel destroyed (hungup)",
-	[AST_BRIDGE_AFTER_CB_REASON_REPLACED] = "Callback was replaced",
-	[AST_BRIDGE_AFTER_CB_REASON_MASQUERADE] = "Channel masqueraded",
-	[AST_BRIDGE_AFTER_CB_REASON_DEPART] = "Channel was departed from bridge",
-	[AST_BRIDGE_AFTER_CB_REASON_REMOVED] = "Callback was removed",
-};
-
 const char *ast_bridge_after_cb_reason_string(enum ast_bridge_after_cb_reason reason)
 {
-	if (reason < AST_BRIDGE_AFTER_CB_REASON_DESTROY
-		|| AST_BRIDGE_AFTER_CB_REASON_REMOVED < reason
-		|| !reason_strings[reason]) {
-		return "Unknown";
+	switch (reason) {
+	case AST_BRIDGE_AFTER_CB_REASON_DESTROY:
+		return "Channel destroyed (hungup)";
+	case AST_BRIDGE_AFTER_CB_REASON_REPLACED:
+		return "Callback was replaced";
+	case AST_BRIDGE_AFTER_CB_REASON_MASQUERADE:
+		return "Channel masqueraded";
+	case AST_BRIDGE_AFTER_CB_REASON_DEPART:
+		return "Channel was departed from bridge";
+	case AST_BRIDGE_AFTER_CB_REASON_REMOVED:
+		return "Callback was removed";
+	case AST_BRIDGE_AFTER_CB_REASON_IMPART_FAILED:
+		return "Channel failed joining the bridge";
 	}
-
-	return reason_strings[reason];
+	return "Unknown";
 }
 
 struct after_bridge_goto_ds {
diff --git a/res/stasis/control.c b/res/stasis/control.c
index 503f111..085ca7e 100644
--- a/res/stasis/control.c
+++ b/res/stasis/control.c
@@ -983,14 +983,21 @@
 	return 0;
 }
 
-static void bridge_after_cb(struct ast_channel *chan, void *data)
+static void internal_bridge_after_cb(struct ast_channel *chan, void *data,
+	enum ast_bridge_after_cb_reason reason)
 {
 	struct stasis_app_control *control = data;
 	SCOPED_AO2LOCK(lock, control);
 	struct ast_bridge_channel *bridge_channel;
 
-	ast_debug(3, "%s, %s: Channel leaving bridge\n",
-		ast_channel_uniqueid(chan), control->bridge->uniqueid);
+	ast_debug(3, "%s, %s: %s\n",
+		ast_channel_uniqueid(chan), control->bridge ? control->bridge->uniqueid : "unknown",
+			ast_bridge_after_cb_reason_string(reason));
+
+	if (reason == AST_BRIDGE_AFTER_CB_REASON_IMPART_FAILED) {
+		/* The impart actually failed so control->bridge isn't valid. */
+		control->bridge = NULL;
+	}
 
 	ast_assert(chan == control->channel);
 
@@ -998,18 +1005,21 @@
 	ast_channel_pbx_set(control->channel, control->pbx);
 	control->pbx = NULL;
 
-	app_unsubscribe_bridge(control->app, control->bridge);
+	if (control->bridge) {
+		app_unsubscribe_bridge(control->app, control->bridge);
 
-	/* No longer in the bridge */
-	control->bridge = NULL;
+		/* No longer in the bridge */
+		control->bridge = NULL;
 
-	/* Get the bridge channel so we don't depart from the wrong bridge */
-	ast_channel_lock(chan);
-	bridge_channel = ast_channel_get_bridge_channel(chan);
-	ast_channel_unlock(chan);
+		/* Get the bridge channel so we don't depart from the wrong bridge */
+		ast_channel_lock(chan);
+		bridge_channel = ast_channel_get_bridge_channel(chan);
+		ast_channel_unlock(chan);
 
-	/* Depart this channel from the bridge using the command queue if possible */
-	stasis_app_send_command_async(control, bridge_channel_depart, bridge_channel, __ao2_cleanup);
+		/* Depart this channel from the bridge using the command queue if possible */
+		stasis_app_send_command_async(control, bridge_channel_depart, bridge_channel, __ao2_cleanup);
+	}
+
 	if (stasis_app_channel_is_stasis_end_published(chan)) {
 		/* The channel has had a StasisEnd published on it, but until now had remained in
 		 * the bridging system. This means that the channel moved from a Stasis bridge to a
@@ -1027,12 +1037,19 @@
 	}
 }
 
+static void bridge_after_cb(struct ast_channel *chan, void *data)
+{
+	struct stasis_app_control *control = data;
+
+	internal_bridge_after_cb(control->channel, data, AST_BRIDGE_AFTER_CB_REASON_DEPART);
+}
+
 static void bridge_after_cb_failed(enum ast_bridge_after_cb_reason reason,
 	void *data)
 {
 	struct stasis_app_control *control = data;
 
-	bridge_after_cb(control->channel, data);
+	internal_bridge_after_cb(control->channel, data, reason);
 
 	ast_debug(3, "  reason: %s\n",
 		ast_bridge_after_cb_reason_string(reason));
@@ -1171,46 +1188,57 @@
 		return -1;
 	}
 
-	{
-		/* pbx and bridge are modified by the bridging impart thread.
-		 * It shouldn't happen concurrently, but we still need to lock
-		 * for the memory fence.
+	ao2_lock(control);
+
+	/* Ensure the controlling application is subscribed early enough
+	 * to receive the ChannelEnteredBridge message. This works in concert
+	 * with the subscription handled in the Stasis application execution
+	 * loop */
+	app_subscribe_bridge(control->app, bridge);
+
+	/* Save off the channel's PBX */
+	ast_assert(control->pbx == NULL);
+	if (!control->pbx) {
+		control->pbx = ast_channel_pbx(chan);
+		ast_channel_pbx_set(chan, NULL);
+	}
+
+	ast_assert(stasis_app_get_bridge(control) == NULL);
+	/* We need to set control->bridge here since bridge_after_cb may be run
+	 * before ast_bridge_impart returns.  bridge_after_cb gets a reason
+	 * code so it can tell if the bridge is actually valid or not.
+	 */
+	control->bridge = bridge;
+
+	/* We can't be holding the control lock while impart is running
+	 * or we could create a deadlock with bridge_after_cb which also
+	 * tries to lock control.
+	 */
+	ao2_unlock(control);
+	res = ast_bridge_impart(bridge,
+		chan,
+		swap,
+		NULL, /* features */
+		AST_BRIDGE_IMPART_CHAN_DEPARTABLE);
+	if (res != 0) {
+		/* ast_bridge_impart failed before it could spawn the depart
+		 * thread.  The callbacks aren't called in this case.
+		 * The impart could still fail even if ast_bridge_impart returned
+		 * ok but that's handled by bridge_after_cb.
 		 */
-		SCOPED_AO2LOCK(lock, control);
-
-		/* Ensure the controlling application is subscribed early enough
-		 * to receive the ChannelEnteredBridge message. This works in concert
-		 * with the subscription handled in the Stasis application execution
-		 * loop */
-		app_subscribe_bridge(control->app, bridge);
-
-		/* Save off the channel's PBX */
-		ast_assert(control->pbx == NULL);
-		if (!control->pbx) {
-			control->pbx = ast_channel_pbx(chan);
-			ast_channel_pbx_set(chan, NULL);
-		}
-
-		res = ast_bridge_impart(bridge,
-			chan,
-			swap,
-			NULL, /* features */
-			AST_BRIDGE_IMPART_CHAN_DEPARTABLE);
-		if (res != 0) {
-			ast_log(LOG_ERROR, "Error adding channel to bridge\n");
-			ast_channel_pbx_set(chan, control->pbx);
-			control->pbx = NULL;
-			return -1;
-		}
-
-		ast_assert(stasis_app_get_bridge(control) == NULL);
-		control->bridge = bridge;
-
+		ast_log(LOG_ERROR, "Error adding channel to bridge\n");
+		ao2_lock(control);
+		ast_channel_pbx_set(chan, control->pbx);
+		control->pbx = NULL;
+		control->bridge = NULL;
+		ao2_unlock(control);
+	} else {
 		ast_channel_lock(chan);
 		set_interval_hook(chan);
 		ast_channel_unlock(chan);
 	}
-	return 0;
+
+	return res;
 }
 
 int control_add_channel_to_bridge(struct stasis_app_control *control, struct ast_channel *chan, void *data)

-- 
To view, visit https://gerrit.asterisk.org/6368
To unsubscribe, visit https://gerrit.asterisk.org/settings

Gerrit-Project: asterisk
Gerrit-Branch: 15
Gerrit-MessageType: merged
Gerrit-Change-Id: Ifd239aa93955b3eb475521f61e284fcb0da2c3b3
Gerrit-Change-Number: 6368
Gerrit-PatchSet: 5
Gerrit-Owner: George Joseph <gjoseph at digium.com>
Gerrit-Reviewer: George Joseph <gjoseph at digium.com>
Gerrit-Reviewer: Jenkins2
Gerrit-Reviewer: Joshua Colp <jcolp at digium.com>
Gerrit-Reviewer: Richard Mudgett <rmudgett at digium.com>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.digium.com/pipermail/asterisk-code-review/attachments/20170907/09e78a1a/attachment-0001.html>


More information about the asterisk-code-review mailing list