<p>Richard Mudgett has uploaded this change for <strong>review</strong>.</p><p><a href="https://gerrit.asterisk.org/8704">View Change</a></p><pre style="font-family: monospace,monospace; white-space: pre-wrap;">res_pjsip: Fix deadlock on reliable transport shutdown.<br><br>A deadlock can happen when the PJSIP monitor thread is shutting down a<br>connection oriented transport (TCP/TLS) used by a subscription at the same<br>time as another thread tries to send something for that subscription. The<br>deadlock is between the pjsip monitor thread attempting to get the dialog<br>lock and another thread sending something for that dialog when it tries to<br>get the transport manager lock.<br><br>* res_pjsip_pubsub.c: Avoid the deadlock by pushing the subscription<br>removal to the subscription serializer.<br><br>* res_pjsip_registrar.c: Pushed off incoming registration contact removals<br>to a default serializer as a precaution. Removing the contacts involves<br>sorcery access which in this case will involve database access. Depending<br>upon the setup, the database may not be on the same machine and could take<br>awhile. We don't want to hold up the pjsip monitor thread with<br>potentially long access times.<br><br>ASTERISK-27706<br><br>Change-Id: I56b647aea565f24dba33e9e5ebeed4cd3f31f8c4<br>---<br>M res/res_pjsip_outbound_registration.c<br>M res/res_pjsip_pubsub.c<br>M res/res_pjsip_registrar.c<br>3 files changed, 75 insertions(+), 6 deletions(-)<br><br></pre><pre style="font-family: monospace,monospace; white-space: pre-wrap;">git pull ssh://gerrit.asterisk.org:29418/asterisk refs/changes/04/8704/1</pre><pre style="font-family: monospace,monospace; white-space: pre-wrap;">diff --git a/res/res_pjsip_outbound_registration.c b/res/res_pjsip_outbound_registration.c<br>index c1077f2..8935bd1 100644<br>--- a/res/res_pjsip_outbound_registration.c<br>+++ b/res/res_pjsip_outbound_registration.c<br>@@ -833,6 +833,8 @@<br> *<br> * \param obj What is needed to initiate a reregister attempt.<br> *<br>+ * \note Normally executed by the pjsip monitor thread.<br>+ *<br> * \return Nothing<br> */<br> static void registration_transport_shutdown_cb(void *obj)<br>diff --git a/res/res_pjsip_pubsub.c b/res/res_pjsip_pubsub.c<br>index 9b45df5..1289822 100644<br>--- a/res/res_pjsip_pubsub.c<br>+++ b/res/res_pjsip_pubsub.c<br>@@ -560,15 +560,52 @@<br> return ast_sorcery_generic_alloc(sizeof(struct ast_sip_publication_resource), publication_resource_destroy);<br> }<br> <br>-static void sub_tree_transport_cb(void *data) {<br>+static int sub_tree_subscription_terminate_cb(void *data)<br>+{<br> struct sip_subscription_tree *sub_tree = data;<br> <br>- ast_debug(3, "Transport destroyed. Removing subscription '%s->%s' prune on restart: %d\n",<br>+ if (!sub_tree->evsub) {<br>+ /* Something else already terminated the subscription. */<br>+ ao2_ref(sub_tree, -1);<br>+ return 0;<br>+ }<br>+<br>+ ast_debug(3, "Transport destroyed. Removing subscription '%s->%s' prune on boot: %d\n",<br> sub_tree->persistence->endpoint, sub_tree->root->resource,<br> sub_tree->persistence->prune_on_boot);<br> <br> sub_tree->state = SIP_SUB_TREE_TERMINATE_IN_PROGRESS;<br> pjsip_evsub_terminate(sub_tree->evsub, PJ_TRUE);<br>+<br>+ ao2_ref(sub_tree, -1);<br>+ return 0;<br>+}<br>+<br>+/*!<br>+ * \internal<br>+ * \brief The reliable transport we used as a subscription contact has shutdown.<br>+ *<br>+ * \param data What subscription needs to be terminated.<br>+ *<br>+ * \note Normally executed by the pjsip monitor thread.<br>+ *<br>+ * \return Nothing<br>+ */<br>+static void sub_tree_transport_cb(void *data)<br>+{<br>+ struct sip_subscription_tree *sub_tree = data;<br>+<br>+ /*<br>+ * Push off the subscription termination to the serializer to<br>+ * avoid deadlock. Another thread could be trying to send a<br>+ * message on the subscription that can deadlock with this<br>+ * thread.<br>+ */<br>+ ao2_ref(sub_tree, +1);<br>+ if (ast_sip_push_task(sub_tree->serializer, sub_tree_subscription_terminate_cb,<br>+ sub_tree)) {<br>+ ao2_ref(sub_tree, -1);<br>+ }<br> }<br> <br> /*! \brief Destructor for subscription persistence */<br>@@ -621,7 +658,7 @@<br> return;<br> }<br> <br>- ast_debug(3, "Updating persistence for '%s->%s' prune on restart: %s\n",<br>+ ast_debug(3, "Updating persistence for '%s->%s' prune on boot: %s\n",<br> sub_tree->persistence->endpoint, sub_tree->root->resource,<br> sub_tree->persistence->prune_on_boot ? "yes" : "no");<br> <br>@@ -645,7 +682,7 @@<br> sub_tree->endpoint, rdata);<br> <br> if (sub_tree->persistence->prune_on_boot) {<br>- ast_debug(3, "adding transport monitor on %s for '%s->%s' prune on restart: %d\n",<br>+ ast_debug(3, "adding transport monitor on %s for '%s->%s' prune on boot: %d\n",<br> rdata->tp_info.transport->obj_name,<br> sub_tree->persistence->endpoint, sub_tree->root->resource,<br> sub_tree->persistence->prune_on_boot);<br>diff --git a/res/res_pjsip_registrar.c b/res/res_pjsip_registrar.c<br>index 8f84288..675d177 100644<br>--- a/res/res_pjsip_registrar.c<br>+++ b/res/res_pjsip_registrar.c<br>@@ -337,7 +337,7 @@<br> && strcmp(ma->contact_name, mb->contact_name) == 0;<br> }<br> <br>-static void register_contact_transport_shutdown_cb(void *data)<br>+static int register_contact_transport_remove_cb(void *data)<br> {<br> struct contact_transport_monitor *monitor = data;<br> struct ast_sip_contact *contact;<br>@@ -345,7 +345,8 @@<br> <br> aor = ast_sip_location_retrieve_aor(monitor->aor_name);<br> if (!aor) {<br>- return;<br>+ ao2_ref(monitor, -1);<br>+ return 0;<br> }<br> <br> ao2_lock(aor);<br>@@ -365,6 +366,35 @@<br> }<br> ao2_unlock(aor);<br> ao2_ref(aor, -1);<br>+<br>+ ao2_ref(monitor, -1);<br>+ return 0;<br>+}<br>+<br>+/*!<br>+ * \internal<br>+ * \brief The reliable transport we registered as a contact has shutdown.<br>+ *<br>+ * \param data What contact needs to be removed.<br>+ *<br>+ * \note Normally executed by the pjsip monitor thread.<br>+ *<br>+ * \return Nothing<br>+ */<br>+static void register_contact_transport_shutdown_cb(void *data)<br>+{<br>+ struct contact_transport_monitor *monitor = data;<br>+<br>+ /*<br>+ * Push off to a default serializer. This is in case sorcery<br>+ * does database accesses for contacts. Database accesses may<br>+ * not be on this machine. We don't want to tie up the pjsip<br>+ * monitor thread with potentially long access times.<br>+ */<br>+ ao2_ref(monitor, +1);<br>+ if (ast_sip_push_task(NULL, register_contact_transport_remove_cb, monitor)) {<br>+ ao2_ref(monitor, -1);<br>+ }<br> }<br> <br> AST_VECTOR(excess_contact_vector, struct ast_sip_contact *);<br></pre><p>To view, visit <a href="https://gerrit.asterisk.org/8704">change 8704</a>. To unsubscribe, visit <a href="https://gerrit.asterisk.org/settings">settings</a>.</p><div itemscope itemtype="http://schema.org/EmailMessage"><div itemscope itemprop="action" itemtype="http://schema.org/ViewAction"><link itemprop="url" href="https://gerrit.asterisk.org/8704"/><meta itemprop="name" content="View Change"/></div></div>
<div style="display:none"> Gerrit-Project: asterisk </div>
<div style="display:none"> Gerrit-Branch: 15 </div>
<div style="display:none"> Gerrit-MessageType: newchange </div>
<div style="display:none"> Gerrit-Change-Id: I56b647aea565f24dba33e9e5ebeed4cd3f31f8c4 </div>
<div style="display:none"> Gerrit-Change-Number: 8704 </div>
<div style="display:none"> Gerrit-PatchSet: 1 </div>
<div style="display:none"> Gerrit-Owner: Richard Mudgett <rmudgett@digium.com> </div>