<p>Richard Mudgett has uploaded this change for <strong>review</strong>.</p><p><a href="https://gerrit.asterisk.org/8703">View Change</a></p><pre style="font-family: monospace,monospace; white-space: pre-wrap;">res_pjsip: Fix deadlock on reliable transport shutdown.<br><br>A deadlock can happen when the PJSIP monitor thread is shutting down a<br>connection oriented transport (TCP/TLS) used by a subscription at the same<br>time as another thread tries to send something for that subscription.  The<br>deadlock is between the pjsip monitor thread attempting to get the dialog<br>lock and another thread sending something for that dialog when it tries to<br>get the transport manager lock.<br><br>* res_pjsip_pubsub.c: Avoid the deadlock by pushing the subscription<br>removal to the subscription serializer.<br><br>* res_pjsip_registrar.c: Pushed off incoming registration contact removals<br>to a default serializer as a precaution.  Removing the contacts involves<br>sorcery access which in this case will involve database access.  Depending<br>upon the setup, the database may not be on the same machine and could take<br>awhile.  We don't want to hold up the pjsip monitor thread with<br>potentially long access times.<br><br>ASTERISK-27706<br><br>Change-Id: I56b647aea565f24dba33e9e5ebeed4cd3f31f8c4<br>---<br>M res/res_pjsip_outbound_registration.c<br>M res/res_pjsip_pubsub.c<br>M res/res_pjsip_registrar.c<br>3 files changed, 75 insertions(+), 6 deletions(-)<br><br></pre><pre style="font-family: monospace,monospace; white-space: pre-wrap;">git pull ssh://gerrit.asterisk.org:29418/asterisk refs/changes/03/8703/1</pre><pre style="font-family: monospace,monospace; white-space: pre-wrap;">diff --git a/res/res_pjsip_outbound_registration.c b/res/res_pjsip_outbound_registration.c<br>index e3799fb..448dcd6 100644<br>--- a/res/res_pjsip_outbound_registration.c<br>+++ b/res/res_pjsip_outbound_registration.c<br>@@ -834,6 +834,8 @@<br>  *<br>  * \param obj What is needed to initiate a reregister attempt.<br>  *<br>+ * \note Normally executed by the pjsip monitor thread.<br>+ *<br>  * \return Nothing<br>  */<br> static void registration_transport_shutdown_cb(void *obj)<br>diff --git a/res/res_pjsip_pubsub.c b/res/res_pjsip_pubsub.c<br>index 6795e68..b4aa023 100644<br>--- a/res/res_pjsip_pubsub.c<br>+++ b/res/res_pjsip_pubsub.c<br>@@ -560,15 +560,52 @@<br>       return ast_sorcery_generic_alloc(sizeof(struct ast_sip_publication_resource), publication_resource_destroy);<br> }<br> <br>-static void sub_tree_transport_cb(void *data) {<br>+static int sub_tree_subscription_terminate_cb(void *data)<br>+{<br>         struct sip_subscription_tree *sub_tree = data;<br> <br>-    ast_debug(3, "Transport destroyed.  Removing subscription '%s->%s'  prune on restart: %d\n",<br>+    if (!sub_tree->evsub) {<br>+           /* Something else already terminated the subscription. */<br>+            ao2_ref(sub_tree, -1);<br>+               return 0;<br>+    }<br>+<br>+ ast_debug(3, "Transport destroyed.  Removing subscription '%s->%s'  prune on boot: %d\n",<br>                sub_tree->persistence->endpoint, sub_tree->root->resource,<br>                sub_tree->persistence->prune_on_boot);<br> <br>       sub_tree->state = SIP_SUB_TREE_TERMINATE_IN_PROGRESS;<br>      pjsip_evsub_terminate(sub_tree->evsub, PJ_TRUE);<br>+<br>+       ao2_ref(sub_tree, -1);<br>+       return 0;<br>+}<br>+<br>+/*!<br>+ * \internal<br>+ * \brief The reliable transport we used as a subscription contact has shutdown.<br>+ *<br>+ * \param data What subscription needs to be terminated.<br>+ *<br>+ * \note Normally executed by the pjsip monitor thread.<br>+ *<br>+ * \return Nothing<br>+ */<br>+static void sub_tree_transport_cb(void *data)<br>+{<br>+  struct sip_subscription_tree *sub_tree = data;<br>+<br>+    /*<br>+    * Push off the subscription termination to the serializer to<br>+         * avoid deadlock.  Another thread could be trying to send a<br>+  * message on the subscription that can deadlock with this<br>+    * thread.<br>+    */<br>+  ao2_ref(sub_tree, +1);<br>+       if (ast_sip_push_task(sub_tree->serializer, sub_tree_subscription_terminate_cb,<br>+           sub_tree)) {<br>+         ao2_ref(sub_tree, -1);<br>+       }<br> }<br> <br> /*! \brief Destructor for subscription persistence */<br>@@ -621,7 +658,7 @@<br>                 return;<br>       }<br> <br>- ast_debug(3, "Updating persistence for '%s->%s'  prune on restart: %s\n",<br>+       ast_debug(3, "Updating persistence for '%s->%s'  prune on boot: %s\n",<br>           sub_tree->persistence->endpoint, sub_tree->root->resource,<br>                sub_tree->persistence->prune_on_boot ? "yes" : "no");<br> <br>@@ -645,7 +682,7 @@<br>                                                       sub_tree->endpoint, rdata);<br> <br>                                     if (sub_tree->persistence->prune_on_boot) {<br>-                                            ast_debug(3, "adding transport monitor on %s for '%s->%s'  prune on restart: %d\n",<br>+                                             ast_debug(3, "adding transport monitor on %s for '%s->%s'  prune on boot: %d\n",<br>                                                         rdata->tp_info.transport->obj_name,<br>                                                     sub_tree->persistence->endpoint, sub_tree->root->resource,<br>                                                        sub_tree->persistence->prune_on_boot);<br>diff --git a/res/res_pjsip_registrar.c b/res/res_pjsip_registrar.c<br>index bddea27..bf51733 100644<br>--- a/res/res_pjsip_registrar.c<br>+++ b/res/res_pjsip_registrar.c<br>@@ -337,7 +337,7 @@<br>                && strcmp(ma->contact_name, mb->contact_name) == 0;<br> }<br> <br>-static void register_contact_transport_shutdown_cb(void *data)<br>+static int register_contact_transport_remove_cb(void *data)<br> {<br>   struct contact_transport_monitor *monitor = data;<br>     struct ast_sip_contact *contact;<br>@@ -345,7 +345,8 @@<br> <br>      lock = ast_named_lock_get(AST_NAMED_LOCK_TYPE_MUTEX, "aor", monitor->aor_name);<br>  if (!lock) {<br>-         return;<br>+              ao2_ref(monitor, -1);<br>+                return 0;<br>     }<br> <br>  ao2_lock(lock);<br>@@ -365,6 +366,35 @@<br>         }<br>     ao2_unlock(lock);<br>     ast_named_lock_put(lock);<br>+<br>+ ao2_ref(monitor, -1);<br>+        return 0;<br>+}<br>+<br>+/*!<br>+ * \internal<br>+ * \brief The reliable transport we registered as a contact has shutdown.<br>+ *<br>+ * \param data What contact needs to be removed.<br>+ *<br>+ * \note Normally executed by the pjsip monitor thread.<br>+ *<br>+ * \return Nothing<br>+ */<br>+static void register_contact_transport_shutdown_cb(void *data)<br>+{<br>+        struct contact_transport_monitor *monitor = data;<br>+<br>+ /*<br>+    * Push off to a default serializer.  This is in case sorcery<br>+         * does database accesses for contacts.  Database accesses may<br>+        * not be on this machine.  We don't want to tie up the pjsip<br>+     * monitor thread with potentially long access times.<br>+         */<br>+  ao2_ref(monitor, +1);<br>+        if (ast_sip_push_task(NULL, register_contact_transport_remove_cb, monitor)) {<br>+                ao2_ref(monitor, -1);<br>+        }<br> }<br> <br> AST_VECTOR(excess_contact_vector, struct ast_sip_contact *);<br></pre><p>To view, visit <a href="https://gerrit.asterisk.org/8703">change 8703</a>. To unsubscribe, visit <a href="https://gerrit.asterisk.org/settings">settings</a>.</p><div itemscope itemtype="http://schema.org/EmailMessage"><div itemscope itemprop="action" itemtype="http://schema.org/ViewAction"><link itemprop="url" href="https://gerrit.asterisk.org/8703"/><meta itemprop="name" content="View Change"/></div></div>

<div style="display:none"> Gerrit-Project: asterisk </div>
<div style="display:none"> Gerrit-Branch: 13 </div>
<div style="display:none"> Gerrit-MessageType: newchange </div>
<div style="display:none"> Gerrit-Change-Id: I56b647aea565f24dba33e9e5ebeed4cd3f31f8c4 </div>
<div style="display:none"> Gerrit-Change-Number: 8703 </div>
<div style="display:none"> Gerrit-PatchSet: 1 </div>
<div style="display:none"> Gerrit-Owner: Richard Mudgett <rmudgett@digium.com> </div>