<p>Jenkins2 <strong>merged</strong> this change.</p><p><a href="https://gerrit.asterisk.org/6369">View Change</a></p><div style="white-space:pre-wrap">Approvals:
Richard Mudgett: Looks good to me, but someone else must approve
Joshua Colp: Looks good to me, but someone else must approve
George Joseph: Looks good to me, approved
Jenkins2: Approved for Submit
</div><pre style="font-family: monospace,monospace; white-space: pre-wrap;">stasis/control: Fix possible deadlock with swap channel<br><br>If an error occurs during a bridge impart it's possible that<br>the "bridge_after" callback might try to run before<br>control_swap_channel_in_bridge has been signalled to continue.<br>Since control_swap_channel_in_bridge is holding the control lock<br>and the callback needs it, a deadlock will occur.<br><br>* control_swap_channel_in_bridge now only holds the control<br> lock while it's actually modifying the control structure and<br> releases it while the bridge impart is running.<br>* bridge_after_cb is now tolerant of impart failures.<br><br>Change-Id: Ifd239aa93955b3eb475521f61e284fcb0da2c3b3<br>---<br>M include/asterisk/bridge_after.h<br>M main/bridge.c<br>M main/bridge_after.c<br>M res/stasis/control.c<br>4 files changed, 96 insertions(+), 64 deletions(-)<br><br></pre><pre style="font-family: monospace,monospace; white-space: pre-wrap;">diff --git a/include/asterisk/bridge_after.h b/include/asterisk/bridge_after.h<br>index 53f30b9..0451685 100644<br>--- a/include/asterisk/bridge_after.h<br>+++ b/include/asterisk/bridge_after.h<br>@@ -45,6 +45,8 @@<br> AST_BRIDGE_AFTER_CB_REASON_DEPART,<br> /*! Was explicitly removed by external code. */<br> AST_BRIDGE_AFTER_CB_REASON_REMOVED,<br>+ /*! The channel failed to enter the bridge. */<br>+ AST_BRIDGE_AFTER_CB_REASON_IMPART_FAILED,<br> };<br> <br> /*!<br>diff --git a/main/bridge.c b/main/bridge.c<br>index b732d5f..ab12ecf 100644<br>--- a/main/bridge.c<br>+++ b/main/bridge.c<br>@@ -1758,12 +1758,13 @@<br> static void *bridge_channel_depart_thread(void *data)<br> {<br> struct ast_bridge_channel *bridge_channel = data;<br>+ int res = 0;<br> <br> if (bridge_channel->callid) {<br> ast_callid_threadassoc_add(bridge_channel->callid);<br> }<br> <br>- bridge_channel_internal_join(bridge_channel);<br>+ res = bridge_channel_internal_join(bridge_channel);<br> <br> /*<br> * cleanup<br>@@ -1775,7 +1776,8 @@<br> ast_bridge_features_destroy(bridge_channel->features);<br> bridge_channel->features = NULL;<br> <br>- ast_bridge_discard_after_callback(bridge_channel->chan, AST_BRIDGE_AFTER_CB_REASON_DEPART);<br>+ ast_bridge_discard_after_callback(bridge_channel->chan,<br>+ res ? AST_BRIDGE_AFTER_CB_REASON_IMPART_FAILED : AST_BRIDGE_AFTER_CB_REASON_DEPART);<br> /* If join failed there will be impart threads waiting. */<br> bridge_channel_impart_signal(bridge_channel->chan);<br> ast_bridge_discard_after_goto(bridge_channel->chan);<br>diff --git a/main/bridge_after.c b/main/bridge_after.c<br>index d4aec75..69c8f98 100644<br>--- a/main/bridge_after.c<br>+++ b/main/bridge_after.c<br>@@ -293,23 +293,23 @@<br> return 0;<br> }<br> <br>-const char *reason_strings[] = {<br>- [AST_BRIDGE_AFTER_CB_REASON_DESTROY] = "Channel destroyed (hungup)",<br>- [AST_BRIDGE_AFTER_CB_REASON_REPLACED] = "Callback was replaced",<br>- [AST_BRIDGE_AFTER_CB_REASON_MASQUERADE] = "Channel masqueraded",<br>- [AST_BRIDGE_AFTER_CB_REASON_DEPART] = "Channel was departed from bridge",<br>- [AST_BRIDGE_AFTER_CB_REASON_REMOVED] = "Callback was removed",<br>-};<br>-<br> const char *ast_bridge_after_cb_reason_string(enum ast_bridge_after_cb_reason reason)<br> {<br>- if (reason < AST_BRIDGE_AFTER_CB_REASON_DESTROY<br>- || AST_BRIDGE_AFTER_CB_REASON_REMOVED < reason<br>- || !reason_strings[reason]) {<br>- return "Unknown";<br>+ switch (reason) {<br>+ case AST_BRIDGE_AFTER_CB_REASON_DESTROY:<br>+ return "Channel destroyed (hungup)";<br>+ case AST_BRIDGE_AFTER_CB_REASON_REPLACED:<br>+ return "Callback was replaced";<br>+ case AST_BRIDGE_AFTER_CB_REASON_MASQUERADE:<br>+ return "Channel masqueraded";<br>+ case AST_BRIDGE_AFTER_CB_REASON_DEPART:<br>+ return "Channel was departed from bridge";<br>+ case AST_BRIDGE_AFTER_CB_REASON_REMOVED:<br>+ return "Callback was removed";<br>+ case AST_BRIDGE_AFTER_CB_REASON_IMPART_FAILED:<br>+ return "Channel failed joining the bridge";<br> }<br>-<br>- return reason_strings[reason];<br>+ return "Unknown";<br> }<br> <br> struct after_bridge_goto_ds {<br>diff --git a/res/stasis/control.c b/res/stasis/control.c<br>index 503f111..085ca7e 100644<br>--- a/res/stasis/control.c<br>+++ b/res/stasis/control.c<br>@@ -983,14 +983,21 @@<br> return 0;<br> }<br> <br>-static void bridge_after_cb(struct ast_channel *chan, void *data)<br>+static void internal_bridge_after_cb(struct ast_channel *chan, void *data,<br>+ enum ast_bridge_after_cb_reason reason)<br> {<br> struct stasis_app_control *control = data;<br> SCOPED_AO2LOCK(lock, control);<br> struct ast_bridge_channel *bridge_channel;<br> <br>- ast_debug(3, "%s, %s: Channel leaving bridge\n",<br>- ast_channel_uniqueid(chan), control->bridge->uniqueid);<br>+ ast_debug(3, "%s, %s: %s\n",<br>+ ast_channel_uniqueid(chan), control->bridge ? control->bridge->uniqueid : "unknown",<br>+ ast_bridge_after_cb_reason_string(reason));<br>+<br>+ if (reason == AST_BRIDGE_AFTER_CB_REASON_IMPART_FAILED) {<br>+ /* The impart actually failed so control->bridge isn't valid. */<br>+ control->bridge = NULL;<br>+ }<br> <br> ast_assert(chan == control->channel);<br> <br>@@ -998,18 +1005,21 @@<br> ast_channel_pbx_set(control->channel, control->pbx);<br> control->pbx = NULL;<br> <br>- app_unsubscribe_bridge(control->app, control->bridge);<br>+ if (control->bridge) {<br>+ app_unsubscribe_bridge(control->app, control->bridge);<br> <br>- /* No longer in the bridge */<br>- control->bridge = NULL;<br>+ /* No longer in the bridge */<br>+ control->bridge = NULL;<br> <br>- /* Get the bridge channel so we don't depart from the wrong bridge */<br>- ast_channel_lock(chan);<br>- bridge_channel = ast_channel_get_bridge_channel(chan);<br>- ast_channel_unlock(chan);<br>+ /* Get the bridge channel so we don't depart from the wrong bridge */<br>+ ast_channel_lock(chan);<br>+ bridge_channel = ast_channel_get_bridge_channel(chan);<br>+ ast_channel_unlock(chan);<br> <br>- /* Depart this channel from the bridge using the command queue if possible */<br>- stasis_app_send_command_async(control, bridge_channel_depart, bridge_channel, __ao2_cleanup);<br>+ /* Depart this channel from the bridge using the command queue if possible */<br>+ stasis_app_send_command_async(control, bridge_channel_depart, bridge_channel, __ao2_cleanup);<br>+ }<br>+<br> if (stasis_app_channel_is_stasis_end_published(chan)) {<br> /* The channel has had a StasisEnd published on it, but until now had remained in<br> * the bridging system. This means that the channel moved from a Stasis bridge to a<br>@@ -1027,12 +1037,19 @@<br> }<br> }<br> <br>+static void bridge_after_cb(struct ast_channel *chan, void *data)<br>+{<br>+ struct stasis_app_control *control = data;<br>+<br>+ internal_bridge_after_cb(control->channel, data, AST_BRIDGE_AFTER_CB_REASON_DEPART);<br>+}<br>+<br> static void bridge_after_cb_failed(enum ast_bridge_after_cb_reason reason,<br> void *data)<br> {<br> struct stasis_app_control *control = data;<br> <br>- bridge_after_cb(control->channel, data);<br>+ internal_bridge_after_cb(control->channel, data, reason);<br> <br> ast_debug(3, " reason: %s\n",<br> ast_bridge_after_cb_reason_string(reason));<br>@@ -1171,46 +1188,57 @@<br> return -1;<br> }<br> <br>- {<br>- /* pbx and bridge are modified by the bridging impart thread.<br>- * It shouldn't happen concurrently, but we still need to lock<br>- * for the memory fence.<br>+ ao2_lock(control);<br>+<br>+ /* Ensure the controlling application is subscribed early enough<br>+ * to receive the ChannelEnteredBridge message. This works in concert<br>+ * with the subscription handled in the Stasis application execution<br>+ * loop */<br>+ app_subscribe_bridge(control->app, bridge);<br>+<br>+ /* Save off the channel's PBX */<br>+ ast_assert(control->pbx == NULL);<br>+ if (!control->pbx) {<br>+ control->pbx = ast_channel_pbx(chan);<br>+ ast_channel_pbx_set(chan, NULL);<br>+ }<br>+<br>+ ast_assert(stasis_app_get_bridge(control) == NULL);<br>+ /* We need to set control->bridge here since bridge_after_cb may be run<br>+ * before ast_bridge_impart returns. bridge_after_cb gets a reason<br>+ * code so it can tell if the bridge is actually valid or not.<br>+ */<br>+ control->bridge = bridge;<br>+<br>+ /* We can't be holding the control lock while impart is running<br>+ * or we could create a deadlock with bridge_after_cb which also<br>+ * tries to lock control.<br>+ */<br>+ ao2_unlock(control);<br>+ res = ast_bridge_impart(bridge,<br>+ chan,<br>+ swap,<br>+ NULL, /* features */<br>+ AST_BRIDGE_IMPART_CHAN_DEPARTABLE);<br>+ if (res != 0) {<br>+ /* ast_bridge_impart failed before it could spawn the depart<br>+ * thread. The callbacks aren't called in this case.<br>+ * The impart could still fail even if ast_bridge_impart returned<br>+ * ok but that's handled by bridge_after_cb.<br> */<br>- SCOPED_AO2LOCK(lock, control);<br>-<br>- /* Ensure the controlling application is subscribed early enough<br>- * to receive the ChannelEnteredBridge message. This works in concert<br>- * with the subscription handled in the Stasis application execution<br>- * loop */<br>- app_subscribe_bridge(control->app, bridge);<br>-<br>- /* Save off the channel's PBX */<br>- ast_assert(control->pbx == NULL);<br>- if (!control->pbx) {<br>- control->pbx = ast_channel_pbx(chan);<br>- ast_channel_pbx_set(chan, NULL);<br>- }<br>-<br>- res = ast_bridge_impart(bridge,<br>- chan,<br>- swap,<br>- NULL, /* features */<br>- AST_BRIDGE_IMPART_CHAN_DEPARTABLE);<br>- if (res != 0) {<br>- ast_log(LOG_ERROR, "Error adding channel to bridge\n");<br>- ast_channel_pbx_set(chan, control->pbx);<br>- control->pbx = NULL;<br>- return -1;<br>- }<br>-<br>- ast_assert(stasis_app_get_bridge(control) == NULL);<br>- control->bridge = bridge;<br>-<br>+ ast_log(LOG_ERROR, "Error adding channel to bridge\n");<br>+ ao2_lock(control);<br>+ ast_channel_pbx_set(chan, control->pbx);<br>+ control->pbx = NULL;<br>+ control->bridge = NULL;<br>+ ao2_unlock(control);<br>+ } else {<br> ast_channel_lock(chan);<br> set_interval_hook(chan);<br> ast_channel_unlock(chan);<br> }<br>- return 0;<br>+<br>+ return res;<br> }<br> <br> int control_add_channel_to_bridge(struct stasis_app_control *control, struct ast_channel *chan, void *data)<br></pre><p>To view, visit <a href="https://gerrit.asterisk.org/6369">change 6369</a>. To unsubscribe, visit <a href="https://gerrit.asterisk.org/settings">settings</a>.</p><div itemscope itemtype="http://schema.org/EmailMessage"><div itemscope itemprop="action" itemtype="http://schema.org/ViewAction"><link itemprop="url" href="https://gerrit.asterisk.org/6369"/><meta itemprop="name" content="View Change"/></div></div>
<div style="display:none"> Gerrit-Project: asterisk </div>
<div style="display:none"> Gerrit-Branch: master </div>
<div style="display:none"> Gerrit-MessageType: merged </div>
<div style="display:none"> Gerrit-Change-Id: Ifd239aa93955b3eb475521f61e284fcb0da2c3b3 </div>
<div style="display:none"> Gerrit-Change-Number: 6369 </div>
<div style="display:none"> Gerrit-PatchSet: 5 </div>
<div style="display:none"> Gerrit-Owner: George Joseph <gjoseph@digium.com> </div>
<div style="display:none"> Gerrit-Reviewer: George Joseph <gjoseph@digium.com> </div>
<div style="display:none"> Gerrit-Reviewer: Jenkins2 </div>
<div style="display:none"> Gerrit-Reviewer: Joshua Colp <jcolp@digium.com> </div>
<div style="display:none"> Gerrit-Reviewer: Richard Mudgett <rmudgett@digium.com> </div>