<p>Jenkins2 <strong>merged</strong> this change.</p><p><a href="https://gerrit.asterisk.org/6368">View Change</a></p><div style="white-space:pre-wrap">Approvals:
  Richard Mudgett: Looks good to me, but someone else must approve
  Joshua Colp: Looks good to me, but someone else must approve
  George Joseph: Looks good to me, approved
  Jenkins2: Approved for Submit

</div><pre style="font-family: monospace,monospace; white-space: pre-wrap;">stasis/control:  Fix possible deadlock with swap channel<br><br>If an error occurs during a bridge impart it's possible that<br>the "bridge_after" callback might try to run before<br>control_swap_channel_in_bridge has been signalled to continue.<br>Since control_swap_channel_in_bridge is holding the control lock<br>and the callback needs it, a deadlock will occur.<br><br>* control_swap_channel_in_bridge now only holds the control<br>  lock while it's actually modifying the control structure and<br>  releases it while the bridge impart is running.<br>* bridge_after_cb is now tolerant of impart failures.<br><br>Change-Id: Ifd239aa93955b3eb475521f61e284fcb0da2c3b3<br>---<br>M include/asterisk/bridge_after.h<br>M main/bridge.c<br>M main/bridge_after.c<br>M res/stasis/control.c<br>4 files changed, 96 insertions(+), 64 deletions(-)<br><br></pre><pre style="font-family: monospace,monospace; white-space: pre-wrap;">diff --git a/include/asterisk/bridge_after.h b/include/asterisk/bridge_after.h<br>index 53f30b9..0451685 100644<br>--- a/include/asterisk/bridge_after.h<br>+++ b/include/asterisk/bridge_after.h<br>@@ -45,6 +45,8 @@<br>       AST_BRIDGE_AFTER_CB_REASON_DEPART,<br>    /*! Was explicitly removed by external code. */<br>       AST_BRIDGE_AFTER_CB_REASON_REMOVED,<br>+  /*! The channel failed to enter the bridge. */<br>+       AST_BRIDGE_AFTER_CB_REASON_IMPART_FAILED,<br> };<br> <br> /*!<br>diff --git a/main/bridge.c b/main/bridge.c<br>index b732d5f..ab12ecf 100644<br>--- a/main/bridge.c<br>+++ b/main/bridge.c<br>@@ -1758,12 +1758,13 @@<br> static void *bridge_channel_depart_thread(void *data)<br> {<br>     struct ast_bridge_channel *bridge_channel = data;<br>+    int res = 0;<br> <br>       if (bridge_channel->callid) {<br>              ast_callid_threadassoc_add(bridge_channel->callid);<br>        }<br> <br>- bridge_channel_internal_join(bridge_channel);<br>+        res = bridge_channel_internal_join(bridge_channel);<br> <br>        /*<br>     * cleanup<br>@@ -1775,7 +1776,8 @@<br>     ast_bridge_features_destroy(bridge_channel->features);<br>     bridge_channel->features = NULL;<br> <br>-       ast_bridge_discard_after_callback(bridge_channel->chan, AST_BRIDGE_AFTER_CB_REASON_DEPART);<br>+       ast_bridge_discard_after_callback(bridge_channel->chan,<br>+           res ? AST_BRIDGE_AFTER_CB_REASON_IMPART_FAILED : AST_BRIDGE_AFTER_CB_REASON_DEPART);<br>  /* If join failed there will be impart threads waiting. */<br>    bridge_channel_impart_signal(bridge_channel->chan);<br>        ast_bridge_discard_after_goto(bridge_channel->chan);<br>diff --git a/main/bridge_after.c b/main/bridge_after.c<br>index d4aec75..69c8f98 100644<br>--- a/main/bridge_after.c<br>+++ b/main/bridge_after.c<br>@@ -293,23 +293,23 @@<br>   return 0;<br> }<br> <br>-const char *reason_strings[] = {<br>-  [AST_BRIDGE_AFTER_CB_REASON_DESTROY] = "Channel destroyed (hungup)",<br>-       [AST_BRIDGE_AFTER_CB_REASON_REPLACED] = "Callback was replaced",<br>-   [AST_BRIDGE_AFTER_CB_REASON_MASQUERADE] = "Channel masqueraded",<br>-   [AST_BRIDGE_AFTER_CB_REASON_DEPART] = "Channel was departed from bridge",<br>-  [AST_BRIDGE_AFTER_CB_REASON_REMOVED] = "Callback was removed",<br>-};<br>-<br> const char *ast_bridge_after_cb_reason_string(enum ast_bridge_after_cb_reason reason)<br> {<br>- if (reason < AST_BRIDGE_AFTER_CB_REASON_DESTROY<br>-           || AST_BRIDGE_AFTER_CB_REASON_REMOVED < reason<br>-            || !reason_strings[reason]) {<br>-                return "Unknown";<br>+  switch (reason) {<br>+    case AST_BRIDGE_AFTER_CB_REASON_DESTROY:<br>+             return "Channel destroyed (hungup)";<br>+       case AST_BRIDGE_AFTER_CB_REASON_REPLACED:<br>+            return "Callback was replaced";<br>+    case AST_BRIDGE_AFTER_CB_REASON_MASQUERADE:<br>+          return "Channel masqueraded";<br>+      case AST_BRIDGE_AFTER_CB_REASON_DEPART:<br>+              return "Channel was departed from bridge";<br>+ case AST_BRIDGE_AFTER_CB_REASON_REMOVED:<br>+             return "Callback was removed";<br>+     case AST_BRIDGE_AFTER_CB_REASON_IMPART_FAILED:<br>+               return "Channel failed joining the bridge";<br>         }<br>-<br>- return reason_strings[reason];<br>+       return "Unknown";<br> }<br> <br> struct after_bridge_goto_ds {<br>diff --git a/res/stasis/control.c b/res/stasis/control.c<br>index 503f111..085ca7e 100644<br>--- a/res/stasis/control.c<br>+++ b/res/stasis/control.c<br>@@ -983,14 +983,21 @@<br>    return 0;<br> }<br> <br>-static void bridge_after_cb(struct ast_channel *chan, void *data)<br>+static void internal_bridge_after_cb(struct ast_channel *chan, void *data,<br>+    enum ast_bridge_after_cb_reason reason)<br> {<br>   struct stasis_app_control *control = data;<br>    SCOPED_AO2LOCK(lock, control);<br>        struct ast_bridge_channel *bridge_channel;<br> <br>-        ast_debug(3, "%s, %s: Channel leaving bridge\n",<br>-           ast_channel_uniqueid(chan), control->bridge->uniqueid);<br>+        ast_debug(3, "%s, %s: %s\n",<br>+               ast_channel_uniqueid(chan), control->bridge ? control->bridge->uniqueid : "unknown",<br>+                      ast_bridge_after_cb_reason_string(reason));<br>+<br>+       if (reason == AST_BRIDGE_AFTER_CB_REASON_IMPART_FAILED) {<br>+            /* The impart actually failed so control->bridge isn't valid. */<br>+              control->bridge = NULL;<br>+   }<br> <br>  ast_assert(chan == control->channel);<br> <br>@@ -998,18 +1005,21 @@<br>   ast_channel_pbx_set(control->channel, control->pbx);<br>    control->pbx = NULL;<br> <br>-   app_unsubscribe_bridge(control->app, control->bridge);<br>+ if (control->bridge) {<br>+            app_unsubscribe_bridge(control->app, control->bridge);<br> <br>-      /* No longer in the bridge */<br>-        control->bridge = NULL;<br>+           /* No longer in the bridge */<br>+                control->bridge = NULL;<br> <br>-        /* Get the bridge channel so we don't depart from the wrong bridge */<br>-    ast_channel_lock(chan);<br>-      bridge_channel = ast_channel_get_bridge_channel(chan);<br>-       ast_channel_unlock(chan);<br>+            /* Get the bridge channel so we don't depart from the wrong bridge */<br>+            ast_channel_lock(chan);<br>+              bridge_channel = ast_channel_get_bridge_channel(chan);<br>+               ast_channel_unlock(chan);<br> <br>- /* Depart this channel from the bridge using the command queue if possible */<br>-        stasis_app_send_command_async(control, bridge_channel_depart, bridge_channel, __ao2_cleanup);<br>+                /* Depart this channel from the bridge using the command queue if possible */<br>+                stasis_app_send_command_async(control, bridge_channel_depart, bridge_channel, __ao2_cleanup);<br>+        }<br>+<br>  if (stasis_app_channel_is_stasis_end_published(chan)) {<br>               /* The channel has had a StasisEnd published on it, but until now had remained in<br>              * the bridging system. This means that the channel moved from a Stasis bridge to a<br>@@ -1027,12 +1037,19 @@<br>  }<br> }<br> <br>+static void bridge_after_cb(struct ast_channel *chan, void *data)<br>+{<br>+     struct stasis_app_control *control = data;<br>+<br>+        internal_bridge_after_cb(control->channel, data, AST_BRIDGE_AFTER_CB_REASON_DEPART);<br>+}<br>+<br> static void bridge_after_cb_failed(enum ast_bridge_after_cb_reason reason,<br>   void *data)<br> {<br>       struct stasis_app_control *control = data;<br> <br>-        bridge_after_cb(control->channel, data);<br>+  internal_bridge_after_cb(control->channel, data, reason);<br> <br>       ast_debug(3, "  reason: %s\n",<br>              ast_bridge_after_cb_reason_string(reason));<br>@@ -1171,46 +1188,57 @@<br>          return -1;<br>    }<br> <br>- {<br>-            /* pbx and bridge are modified by the bridging impart thread.<br>-                 * It shouldn't happen concurrently, but we still need to lock<br>-            * for the memory fence.<br>+     ao2_lock(control);<br>+<br>+        /* Ensure the controlling application is subscribed early enough<br>+      * to receive the ChannelEnteredBridge message. This works in concert<br>+         * with the subscription handled in the Stasis application execution<br>+  * loop */<br>+   app_subscribe_bridge(control->app, bridge);<br>+<br>+    /* Save off the channel's PBX */<br>+ ast_assert(control->pbx == NULL);<br>+ if (!control->pbx) {<br>+              control->pbx = ast_channel_pbx(chan);<br>+             ast_channel_pbx_set(chan, NULL);<br>+     }<br>+<br>+ ast_assert(stasis_app_get_bridge(control) == NULL);<br>+  /* We need to set control->bridge here since bridge_after_cb may be run<br>+    * before ast_bridge_impart returns.  bridge_after_cb gets a reason<br>+   * code so it can tell if the bridge is actually valid or not.<br>+        */<br>+  control->bridge = bridge;<br>+<br>+      /* We can't be holding the control lock while impart is running<br>+   * or we could create a deadlock with bridge_after_cb which also<br>+      * tries to lock control.<br>+     */<br>+  ao2_unlock(control);<br>+ res = ast_bridge_impart(bridge,<br>+              chan,<br>+                swap,<br>+                NULL, /* features */<br>+         AST_BRIDGE_IMPART_CHAN_DEPARTABLE);<br>+  if (res != 0) {<br>+              /* ast_bridge_impart failed before it could spawn the depart<br>+          * thread.  The callbacks aren't called in this case.<br>+             * The impart could still fail even if ast_bridge_impart returned<br>+             * ok but that's handled by bridge_after_cb.<br>               */<br>-          SCOPED_AO2LOCK(lock, control);<br>-<br>-            /* Ensure the controlling application is subscribed early enough<br>-              * to receive the ChannelEnteredBridge message. This works in concert<br>-                 * with the subscription handled in the Stasis application execution<br>-          * loop */<br>-           app_subscribe_bridge(control->app, bridge);<br>-<br>-            /* Save off the channel's PBX */<br>-         ast_assert(control->pbx == NULL);<br>-         if (!control->pbx) {<br>-                      control->pbx = ast_channel_pbx(chan);<br>-                     ast_channel_pbx_set(chan, NULL);<br>-             }<br>-<br>-         res = ast_bridge_impart(bridge,<br>-                      chan,<br>-                        swap,<br>-                        NULL, /* features */<br>-                 AST_BRIDGE_IMPART_CHAN_DEPARTABLE);<br>-          if (res != 0) {<br>-                      ast_log(LOG_ERROR, "Error adding channel to bridge\n");<br>-                    ast_channel_pbx_set(chan, control->pbx);<br>-                  control->pbx = NULL;<br>-                      return -1;<br>-           }<br>-<br>-         ast_assert(stasis_app_get_bridge(control) == NULL);<br>-          control->bridge = bridge;<br>-<br>+              ast_log(LOG_ERROR, "Error adding channel to bridge\n");<br>+            ao2_lock(control);<br>+           ast_channel_pbx_set(chan, control->pbx);<br>+          control->pbx = NULL;<br>+              control->bridge = NULL;<br>+           ao2_unlock(control);<br>+ } else {<br>              ast_channel_lock(chan);<br>               set_interval_hook(chan);<br>              ast_channel_unlock(chan);<br>     }<br>-    return 0;<br>+<br>+ return res;<br> }<br> <br> int control_add_channel_to_bridge(struct stasis_app_control *control, struct ast_channel *chan, void *data)<br></pre><p>To view, visit <a href="https://gerrit.asterisk.org/6368">change 6368</a>. To unsubscribe, visit <a href="https://gerrit.asterisk.org/settings">settings</a>.</p><div itemscope itemtype="http://schema.org/EmailMessage"><div itemscope itemprop="action" itemtype="http://schema.org/ViewAction"><link itemprop="url" href="https://gerrit.asterisk.org/6368"/><meta itemprop="name" content="View Change"/></div></div>

<div style="display:none"> Gerrit-Project: asterisk </div>
<div style="display:none"> Gerrit-Branch: 15 </div>
<div style="display:none"> Gerrit-MessageType: merged </div>
<div style="display:none"> Gerrit-Change-Id: Ifd239aa93955b3eb475521f61e284fcb0da2c3b3 </div>
<div style="display:none"> Gerrit-Change-Number: 6368 </div>
<div style="display:none"> Gerrit-PatchSet: 5 </div>
<div style="display:none"> Gerrit-Owner: George Joseph <gjoseph@digium.com> </div>
<div style="display:none"> Gerrit-Reviewer: George Joseph <gjoseph@digium.com> </div>
<div style="display:none"> Gerrit-Reviewer: Jenkins2 </div>
<div style="display:none"> Gerrit-Reviewer: Joshua Colp <jcolp@digium.com> </div>
<div style="display:none"> Gerrit-Reviewer: Richard Mudgett <rmudgett@digium.com> </div>