[Asterisk-code-review] res pjsip: Fix deadlock on reliable transport shutdown. (asterisk[master])

Joshua Colp asteriskteam at digium.com
Wed Apr 18 17:32:34 CDT 2018


Joshua Colp has submitted this change and it was merged. ( https://gerrit.asterisk.org/8705 )

Change subject: res_pjsip: Fix deadlock on reliable transport shutdown.
......................................................................

res_pjsip: Fix deadlock on reliable transport shutdown.

A deadlock can happen when the PJSIP monitor thread is shutting down a
connection oriented transport (TCP/TLS) used by a subscription at the same
time as another thread tries to send something for that subscription.  The
deadlock is between the pjsip monitor thread attempting to get the dialog
lock and another thread sending something for that dialog when it tries to
get the transport manager lock.

* res_pjsip_pubsub.c: Avoid the deadlock by pushing the subscription
removal to the subscription serializer.

* res_pjsip_registrar.c: Pushed off incoming registration contact removals
to a default serializer as a precaution.  Removing the contacts involves
sorcery access which in this case will involve database access.  Depending
upon the setup, the database may not be on the same machine and could take
awhile.  We don't want to hold up the pjsip monitor thread with
potentially long access times.

ASTERISK-27706

Change-Id: I56b647aea565f24dba33e9e5ebeed4cd3f31f8c4
---
M res/res_pjsip_outbound_registration.c
M res/res_pjsip_pubsub.c
M res/res_pjsip_registrar.c
3 files changed, 75 insertions(+), 6 deletions(-)

Approvals:
  Joshua Colp: Looks good to me, but someone else must approve; Approved for Submit
  Benjamin Keith Ford: Looks good to me, but someone else must approve
  George Joseph: Looks good to me, approved



diff --git a/res/res_pjsip_outbound_registration.c b/res/res_pjsip_outbound_registration.c
index d0f7546..0111863 100644
--- a/res/res_pjsip_outbound_registration.c
+++ b/res/res_pjsip_outbound_registration.c
@@ -833,6 +833,8 @@
  *
  * \param obj What is needed to initiate a reregister attempt.
  *
+ * \note Normally executed by the pjsip monitor thread.
+ *
  * \return Nothing
  */
 static void registration_transport_shutdown_cb(void *obj)
diff --git a/res/res_pjsip_pubsub.c b/res/res_pjsip_pubsub.c
index 69c256d..f26acb3 100644
--- a/res/res_pjsip_pubsub.c
+++ b/res/res_pjsip_pubsub.c
@@ -560,15 +560,52 @@
 	return ast_sorcery_generic_alloc(sizeof(struct ast_sip_publication_resource), publication_resource_destroy);
 }
 
-static void sub_tree_transport_cb(void *data) {
+static int sub_tree_subscription_terminate_cb(void *data)
+{
 	struct sip_subscription_tree *sub_tree = data;
 
-	ast_debug(3, "Transport destroyed.  Removing subscription '%s->%s'  prune on restart: %d\n",
+	if (!sub_tree->evsub) {
+		/* Something else already terminated the subscription. */
+		ao2_ref(sub_tree, -1);
+		return 0;
+	}
+
+	ast_debug(3, "Transport destroyed.  Removing subscription '%s->%s'  prune on boot: %d\n",
 		sub_tree->persistence->endpoint, sub_tree->root->resource,
 		sub_tree->persistence->prune_on_boot);
 
 	sub_tree->state = SIP_SUB_TREE_TERMINATE_IN_PROGRESS;
 	pjsip_evsub_terminate(sub_tree->evsub, PJ_TRUE);
+
+	ao2_ref(sub_tree, -1);
+	return 0;
+}
+
+/*!
+ * \internal
+ * \brief The reliable transport we used as a subscription contact has shutdown.
+ *
+ * \param data What subscription needs to be terminated.
+ *
+ * \note Normally executed by the pjsip monitor thread.
+ *
+ * \return Nothing
+ */
+static void sub_tree_transport_cb(void *data)
+{
+	struct sip_subscription_tree *sub_tree = data;
+
+	/*
+	 * Push off the subscription termination to the serializer to
+	 * avoid deadlock.  Another thread could be trying to send a
+	 * message on the subscription that can deadlock with this
+	 * thread.
+	 */
+	ao2_ref(sub_tree, +1);
+	if (ast_sip_push_task(sub_tree->serializer, sub_tree_subscription_terminate_cb,
+		sub_tree)) {
+		ao2_ref(sub_tree, -1);
+	}
 }
 
 /*! \brief Destructor for subscription persistence */
@@ -621,7 +658,7 @@
 		return;
 	}
 
-	ast_debug(3, "Updating persistence for '%s->%s'  prune on restart: %s\n",
+	ast_debug(3, "Updating persistence for '%s->%s'  prune on boot: %s\n",
 		sub_tree->persistence->endpoint, sub_tree->root->resource,
 		sub_tree->persistence->prune_on_boot ? "yes" : "no");
 
@@ -645,7 +682,7 @@
 							sub_tree->endpoint, rdata);
 
 					if (sub_tree->persistence->prune_on_boot) {
-						ast_debug(3, "adding transport monitor on %s for '%s->%s'  prune on restart: %d\n",
+						ast_debug(3, "adding transport monitor on %s for '%s->%s'  prune on boot: %d\n",
 							rdata->tp_info.transport->obj_name,
 							sub_tree->persistence->endpoint, sub_tree->root->resource,
 							sub_tree->persistence->prune_on_boot);
diff --git a/res/res_pjsip_registrar.c b/res/res_pjsip_registrar.c
index bdee91f..985933e 100644
--- a/res/res_pjsip_registrar.c
+++ b/res/res_pjsip_registrar.c
@@ -337,7 +337,7 @@
 		&& strcmp(ma->contact_name, mb->contact_name) == 0;
 }
 
-static void register_contact_transport_shutdown_cb(void *data)
+static int register_contact_transport_remove_cb(void *data)
 {
 	struct contact_transport_monitor *monitor = data;
 	struct ast_sip_contact *contact;
@@ -345,7 +345,8 @@
 
 	aor = ast_sip_location_retrieve_aor(monitor->aor_name);
 	if (!aor) {
-		return;
+		ao2_ref(monitor, -1);
+		return 0;
 	}
 
 	ao2_lock(aor);
@@ -365,6 +366,35 @@
 	}
 	ao2_unlock(aor);
 	ao2_ref(aor, -1);
+
+	ao2_ref(monitor, -1);
+	return 0;
+}
+
+/*!
+ * \internal
+ * \brief The reliable transport we registered as a contact has shutdown.
+ *
+ * \param data What contact needs to be removed.
+ *
+ * \note Normally executed by the pjsip monitor thread.
+ *
+ * \return Nothing
+ */
+static void register_contact_transport_shutdown_cb(void *data)
+{
+	struct contact_transport_monitor *monitor = data;
+
+	/*
+	 * Push off to a default serializer.  This is in case sorcery
+	 * does database accesses for contacts.  Database accesses may
+	 * not be on this machine.  We don't want to tie up the pjsip
+	 * monitor thread with potentially long access times.
+	 */
+	ao2_ref(monitor, +1);
+	if (ast_sip_push_task(NULL, register_contact_transport_remove_cb, monitor)) {
+		ao2_ref(monitor, -1);
+	}
 }
 
 AST_VECTOR(excess_contact_vector, struct ast_sip_contact *);

-- 
To view, visit https://gerrit.asterisk.org/8705
To unsubscribe, visit https://gerrit.asterisk.org/settings

Gerrit-Project: asterisk
Gerrit-Branch: master
Gerrit-MessageType: merged
Gerrit-Change-Id: I56b647aea565f24dba33e9e5ebeed4cd3f31f8c4
Gerrit-Change-Number: 8705
Gerrit-PatchSet: 1
Gerrit-Owner: Richard Mudgett <rmudgett at digium.com>
Gerrit-Reviewer: Benjamin Keith Ford <bford at digium.com>
Gerrit-Reviewer: George Joseph <gjoseph at digium.com>
Gerrit-Reviewer: Jenkins2
Gerrit-Reviewer: Joshua Colp <jcolp at digium.com>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.digium.com/pipermail/asterisk-code-review/attachments/20180418/1c57f07e/attachment-0001.html>


More information about the asterisk-code-review mailing list