[asterisk-commits] dvossel: trunk r222761 - in /trunk: channels/ include/asterisk/ main/

SVN commits to the Asterisk project asterisk-commits at lists.digium.com
Wed Oct 7 17:58:45 CDT 2009


Author: dvossel
Date: Wed Oct  7 17:58:38 2009
New Revision: 222761

URL: http://svnview.digium.com/svn/asterisk?view=rev&rev=222761
Log:
Deadlock in channel masquerade handling

Channels are stored in an ao2_container.  When accessing an item within
an ao2_container the proper locking order is to first lock the container,
and then the items within it.

In ast_do_masquerade both the clone and original channel must be locked
for the entire duration of the function.  The problem with this is that
it attemptes to unlink and link these channels back into the ao2_container
when one of the channel's name changes.  This is invalid locking order as
the process of unlinking and linking will lock the ao2_container while
the channels are locked!!! Now, both the channels in do_masquerade are
unlinked from the ao2_container and then locked for the entire function.
At the end of the function both channels are unlocked and linked back
into the container with their new names as hash values.

This new method of requiring all channels and tech pvts to be unlocked
before ast_do_masquerade() or ast_change_name() required several
changes throughout the code base.


(closes issue #15911)
Reported by: russell
Patches:
      masq_deadlock_trunk.diff uploaded by dvossel (license 671)
Tested by: dvossel, atis

Review: https://reviewboard.asterisk.org/r/387/


Modified:
    trunk/channels/chan_misdn.c
    trunk/channels/chan_sip.c
    trunk/include/asterisk/channel.h
    trunk/main/channel.c
    trunk/main/features.c
    trunk/main/pbx.c

Modified: trunk/channels/chan_misdn.c
URL: http://svnview.digium.com/svn/asterisk/trunk/channels/chan_misdn.c?view=diff&rev=222761&r1=222760&r2=222761
==============================================================================
--- trunk/channels/chan_misdn.c (original)
+++ trunk/channels/chan_misdn.c Wed Oct  7 17:58:38 2009
@@ -7762,9 +7762,7 @@
 	snprintf(newname, sizeof(newname), "%s/%d-", misdn_type, chan_offset + c);
 	if (strncmp(tmp->name, newname, strlen(newname))) {
 		snprintf(newname, sizeof(newname), "%s/%d-u%d", misdn_type, chan_offset + c, glob_channel++);
-		ast_channel_lock(tmp);
 		ast_change_name(tmp, newname);
-		ast_channel_unlock(tmp);
 		chan_misdn_log(3, port, " --> updating channel name to [%s]\n", tmp->name);
 	}
 }

Modified: trunk/channels/chan_sip.c
URL: http://svnview.digium.com/svn/asterisk/trunk/channels/chan_sip.c?view=diff&rev=222761&r1=222760&r2=222761
==============================================================================
--- trunk/channels/chan_sip.c (original)
+++ trunk/channels/chan_sip.c Wed Oct  7 17:58:38 2009
@@ -2696,7 +2696,7 @@
 static int handle_request_options(struct sip_pvt *p, struct sip_request *req);
 static int handle_invite_replaces(struct sip_pvt *p, struct sip_request *req, int debug, int seqno, struct sockaddr_in *sin, int *nounlock);
 static int handle_request_notify(struct sip_pvt *p, struct sip_request *req, struct sockaddr_in *sin, int seqno, const char *e);
-static int local_attended_transfer(struct sip_pvt *transferer, struct sip_dual *current, struct sip_request *req, int seqno);
+static int local_attended_transfer(struct sip_pvt *transferer, struct sip_dual *current, struct sip_request *req, int seqno, int *nounlock);
 
 /*------Response handling functions */
 static void handle_response_invite(struct sip_pvt *p, int resp, const char *rest, struct sip_request *req, int seqno);
@@ -19250,17 +19250,14 @@
 	}
 	ast_debug(4, "SIP Park: Transferer channel %s, Transferee %s\n", transferer->name, transferee->name);
 
-	ast_channel_lock(transferee);
 	if (ast_do_masquerade(transferee)) {
 		ast_log(LOG_WARNING, "Masquerade failed.\n");
 		transmit_response(transferer->tech_pvt, "503 Internal error", &req);
-		ast_channel_unlock(transferee);
 		if (d->req.data)
 			ast_free(d->req.data);
 		free(d);
 		return NULL;
 	}
-	ast_channel_unlock(transferee);
 
 	res = ast_park_call(transferee, transferer, 0, &ext);
 	
@@ -19359,15 +19356,12 @@
 	ast_copy_string(transferer->exten, chan2->exten, sizeof(transferer->exten));
 	transferer->priority = chan2->priority;
 
-	ast_channel_lock(transferer);
 	if (ast_do_masquerade(transferer)) {
 		ast_log(LOG_WARNING, "Masquerade failed :(\n");
-		ast_channel_unlock(transferer);
 		transferer->hangupcause = AST_CAUSE_SWITCH_CONGESTION;
 		ast_hangup(transferer);
 		return -1;
 	}
-	ast_channel_unlock(transferer);
 	if (!transferer || !transferee) {
 		if (!transferer) {
 			ast_debug(1, "No transferer channel, giving up parking\n");
@@ -19710,8 +19704,11 @@
 	XXX 'ignore' is unused.
 
 	\note this function is called by handle_request_invite(). Four locks
-	held at the beginning of this function, p, p->owner, p->refer->refer_call->owner...
-	only p's lock should remain at the end of this function.  p's lock is held by sipsock_read()
+	held at the beginning of this function, p, p->owner, p->refer->refer_call and
+	p->refere->refer_call->owner.  only p's lock should remain at the end of this
+	function.  p's lock as well as the channel p->owner's lock are held by
+	handle_request_do(), we unlock p->owner before the masq.  By setting nounlock
+	we are indicating to handle_request_do() that we have already unlocked the owner.
  */
 static int handle_invite_replaces(struct sip_pvt *p, struct sip_request *req, int debug, int seqno, struct sockaddr_in *sin, int *nounlock)
 {
@@ -19720,8 +19717,6 @@
 	struct ast_channel *c = p->owner;	/* Our incoming call */
 	struct ast_channel *replacecall = p->refer->refer_call->owner;	/* The channel we're about to take over */
 	struct ast_channel *targetcall;		/* The bridge to the take-over target */
-
-	struct ast_channel *test;
 
 	/* Check if we're in ring state */
 	if (replacecall->_state == AST_STATE_RING)
@@ -19788,9 +19783,9 @@
 
 	/* Answer the incoming call and set channel to UP state */
 	transmit_response_with_sdp(p, "200 OK", req, XMIT_RELIABLE, FALSE, FALSE);
-		
+
 	ast_setstate(c, AST_STATE_UP);
-	
+
 	/* Stop music on hold and other generators */
 	ast_quiet_chan(replacecall);
 	ast_quiet_chan(targetcall);
@@ -19806,43 +19801,35 @@
 	else
 		ast_debug(4, "Invite/Replaces: Going to masquerade %s into %s\n", c->name, replacecall->name);
 
-	/* C should now be in place of replacecall */
-	if (ast_do_masquerade(replacecall)) {
-		ast_log(LOG_WARNING, "Failed to perform masquerade with INVITE replaces\n");
-	}
-
-	if (earlyreplace || oneleggedreplace ) {
-		c->hangupcause = AST_CAUSE_SWITCH_CONGESTION;
-	}
-
-	ast_setstate(c, AST_STATE_DOWN);
-	ast_debug(4, "After transfer:----------------------------\n");
-	ast_debug(4, " -- C:        %s State %s\n", c->name, ast_state2str(c->_state));
-	if (replacecall)
-		ast_debug(4, " -- replacecall:        %s State %s\n", replacecall->name, ast_state2str(replacecall->_state));
-	if (p->owner) {
-		ast_debug(4, " -- P->owner: %s State %s\n", p->owner->name, ast_state2str(p->owner->_state));
-		test = ast_bridged_channel(p->owner);
-		if (test)
-			ast_debug(4, " -- Call bridged to P->owner: %s State %s\n", test->name, ast_state2str(test->_state));
-		else
-			ast_debug(4, " -- No call bridged to C->owner \n");
-	} else
-		ast_debug(4, " -- No channel yet \n");
-	ast_debug(4, "End After transfer:----------------------------\n");
-
-	/* unlock sip pvt and owner so hangup can do its thing */
+	/* C should now be in place of replacecall. all channel locks and pvt locks should be removed
+	 * before issuing the masq.  Since we are unlocking both the pvt (p) and its owner channel (c)
+	 * it is possible for channel c to be destroyed on us.  To prevent this, we must give c a reference
+	 * before any unlocking takes place and remove it only once we are completely done with it */
+	ast_channel_ref(c);
 	ast_channel_unlock(replacecall);
 	ast_channel_unlock(c);
 	sip_pvt_unlock(p->refer->refer_call);
 	sip_pvt_unlock(p);
-	*nounlock = 1;
+	if (ast_do_masquerade(replacecall)) {
+		ast_log(LOG_WARNING, "Failed to perform masquerade with INVITE replaces\n");
+	}
+	ast_channel_lock(c);
+	if (earlyreplace || oneleggedreplace ) {
+		c->hangupcause = AST_CAUSE_SWITCH_CONGESTION;
+	}
+	ast_setstate(c, AST_STATE_DOWN);
+	ast_channel_unlock(c);
 
 	/* The call should be down with no ast_channel, so hang it up */
 	c->tech_pvt = dialog_unref(c->tech_pvt, "unref dialog c->tech_pvt");
+
+	/* c and c's tech pvt must be unlocked at this point for ast_hangup */
 	ast_hangup(c);
-	sip_pvt_lock(p); /* lock PVT structure again after hangup */
-
+	/* this indicates to handle_request_do that the owner channel has already been unlocked */
+	*nounlock = 1;
+	/* lock PVT structure again after hangup */
+	sip_pvt_lock(p);
+	ast_channel_unref(c);
 	return 0;
 }
 
@@ -20947,8 +20934,19 @@
 }
 
 /*! \brief  Find all call legs and bridge transferee with target
- *	called from handle_request_refer */
-static int local_attended_transfer(struct sip_pvt *transferer, struct sip_dual *current, struct sip_request *req, int seqno)
+ *	called from handle_request_refer
+ *
+ *	\note this function assumes two locks to begin with, sip_pvt transferer and current.chan1 (the pvt's owner)... 
+ *	2 additional locks are held at the beginning of the function, targetcall_pvt, and targetcall_pvt's owner
+ *	channel (which is stored in target.chan1).  These 2 locks _MUST_ be let go by the end of the function.  Do
+ *	not be confused into thinking a pvt's owner is the same thing as the channels locked at the beginning of
+ *	this function, after the masquerade this may not be true.  Be consistent and unlock only the exact same
+ *	pointers that were locked to begin with.
+ *
+ *	If this function is successful, only the transferer pvt lock will remain on return.  Setting nounlock indicates
+ *	to handle_request_do() that the pvt's owner it locked does not require an unlock.
+ */
+static int local_attended_transfer(struct sip_pvt *transferer, struct sip_dual *current, struct sip_request *req, int seqno, int *nounlock)
 {
 	struct sip_dual target;		/* Chan 1: Call from tranferer to Asterisk */
 					/* Chan 2: Call from Asterisk to target */
@@ -20968,7 +20966,7 @@
 		   	to follow the standard */
 			transmit_notify_with_sipfrag(transferer, seqno, "481 Call leg/transaction does not exist", TRUE);
 			append_history(transferer, "Xfer", "Refer failed");
-			ast_clear_flag(&transferer->flags[0], SIP_GOTREFER);	
+			ast_clear_flag(&transferer->flags[0], SIP_GOTREFER);
 			transferer->refer->status = REFER_FAILED;
 			return -1;
 		}
@@ -21034,21 +21032,20 @@
 	ast_party_connected_line_copy(&connected_to_target, &target.chan1->connected);
 	connected_to_target.source = connected_to_transferee.source = AST_CONNECTED_LINE_UPDATE_SOURCE_TRANSFER;
 	res = attempt_transfer(current, &target);
-	sip_pvt_unlock(targetcall_pvt);
 	if (res) {
 		/* Failed transfer */
 		transmit_notify_with_sipfrag(transferer, seqno, "486 Busy Here", TRUE);
 		append_history(transferer, "Xfer", "Refer failed");
-		if (targetcall_pvt->owner)
-			ast_channel_unlock(targetcall_pvt->owner);
 		ast_clear_flag(&transferer->flags[0], SIP_DEFER_BYE_ON_TRANSFER);
+		/* if transfer failed, go ahead and unlock targetcall_pvt and it's owner channel */
+		sip_pvt_unlock(targetcall_pvt);
+		ast_channel_unlock(target.chan1);
 	} else {
 		/* Transfer succeeded! */
 		const char *xfersound = pbx_builtin_getvar_helper(target.chan1, "ATTENDED_TRANSFER_COMPLETE_SOUND");
 
-		/* target.chan1 was locked in get_sip_pvt_byid_locked */
+		/* target.chan1 was locked in get_sip_pvt_byid_locked, do not unlock target.chan1 before this */
 		ast_cel_report_event(target.chan1, AST_CEL_ATTENDEDTRANSFER, NULL, transferer_linkedid, target.chan2);
-		ast_channel_unlock(target.chan1);
 
 		/* Tell transferer that we're done. */
 		transmit_notify_with_sipfrag(transferer, seqno, "200 OK", TRUE);
@@ -21057,21 +21054,27 @@
 		if (target.chan2 && !ast_strlen_zero(xfersound) && ast_streamfile(target.chan2, xfersound, target.chan2->language) >= 0) {
 			ast_waitstream(target.chan2, "");
 		}
-		
+
 		/* By forcing the masquerade, we know that target.chan1 and target.chan2 are bridged. We then
 		 * can queue connected line updates where they need to go.
 		 *
-		 * No need to lock target.chan1 here since it was previously locked in get_sip_pvt_byid_locked
+		 * before a masquerade, all channel and pvt locks must be unlocked.  Any recursive
+		 * channel locks held before this function invalidates channel container locking order.
+		 * Since we are unlocking both the pvt (transferer) and its owner channel (current.chan1)
+		 * it is possible for current.chan1 to be destroyed in the pbx thread.  To prevent this
+		 * we must give c a reference before any unlocking takes place.
 		 */
-		if (target.chan1->masq) {
-			/* If the channel thread already did the masquerade, then we don't need to do anything */
-			ast_do_masquerade(target.chan1);
-		}
-
-		if (targetcall_pvt->owner) {
-			ast_debug(1, "SIP attended transfer: Unlocking channel %s\n", targetcall_pvt->owner->name);
-			ast_channel_unlock(targetcall_pvt->owner);
-		}
+
+		ast_channel_ref(current->chan1);
+		ast_channel_unlock(current->chan1); /* current.chan1 is p->owner before the masq, it was locked by socket_read()*/
+		ast_channel_unlock(target.chan1);
+		*nounlock = 1;  /* we just unlocked the dialog's channel and have no plans of locking it again. */
+		sip_pvt_unlock(targetcall_pvt);
+		sip_pvt_unlock(transferer);
+
+		ast_do_masquerade(target.chan1);
+
+		ast_channel_lock(transferer); /* the transferer pvt is expected to remain locked on return */
 
 		if (target.chan2) {
 			ast_channel_queue_connected_line_update(target.chan1, &connected_to_transferee);
@@ -21084,7 +21087,10 @@
 			 */
 			ast_channel_update_connected_line(target.chan1, &connected_to_target);
 		}
-	}
+		ast_channel_unref(current->chan1);
+	}
+
+	/* at this point if the transfer is successful only the transferer pvt should be locked. */
 	ast_party_connected_line_free(&connected_to_target);
 	ast_party_connected_line_free(&connected_to_transferee);
 	if (targetcall_pvt)
@@ -21310,7 +21316,7 @@
 
 	/* Attended transfer: Find all call legs and bridge transferee with target*/
 	if (p->refer->attendedtransfer) {
-		if ((res = local_attended_transfer(p, &current, req, seqno)))
+		if ((res = local_attended_transfer(p, &current, req, seqno, nounlock)))
 			return res;	/* We're done with the transfer */
 		/* Fall through for remote transfers that we did not find locally */
 		if (sipdebug)
@@ -21735,7 +21741,9 @@
 				if (bridged_to) {
 					/* Don't actually hangup here... */
 					ast_queue_control(c, AST_CONTROL_UNHOLD);
+					ast_channel_unlock(c);  /* async_goto can do a masquerade, no locks can be held during a masq */
 					ast_async_goto(bridged_to, p->context, p->refer->refer_to, 1);
+					ast_channel_lock(c);
 				} else
 					ast_queue_hangup(p->owner);
 			}

Modified: trunk/include/asterisk/channel.h
URL: http://svnview.digium.com/svn/asterisk/trunk/include/asterisk/channel.h?view=diff&rev=222761&r1=222760&r2=222761
==============================================================================
--- trunk/include/asterisk/channel.h (original)
+++ trunk/include/asterisk/channel.h Wed Oct  7 17:58:38 2009
@@ -1054,12 +1054,17 @@
 /*!
  * \brief Change channel name
  *
- * \pre The channel must be locked before calling this function.
+ * \pre Absolutely all channels _MUST_ be unlocked before calling this function.
  *
  * \param chan the channel to change the name of
  * \param newname the name to change to
  *
  * \return nothing
+ *
+ * \note this function must _NEVER_ be used when any channels are locked
+ * regardless if it is the channel who's name is being changed or not because
+ * it invalidates our channel container locking order... lock container first,
+ * then the individual channels, never the other way around.
  */
 void ast_change_name(struct ast_channel *chan, const char *newname);
 
@@ -1904,6 +1909,7 @@
 
 /*!
  * \brief Start masquerading a channel
+ * \note absolutely _NO_ channel locks should be held before calling this function.
  * \details
  * XXX This is a seriously whacked out operation.  We're essentially putting the guts of
  *     the clone channel into the original channel.  Start by killing off the original

Modified: trunk/main/channel.c
URL: http://svnview.digium.com/svn/asterisk/trunk/main/channel.c?view=diff&rev=222761&r1=222760&r2=222761
==============================================================================
--- trunk/main/channel.c (original)
+++ trunk/main/channel.c Wed Oct  7 17:58:38 2009
@@ -2143,8 +2143,11 @@
 	ast_autoservice_stop(chan);
 
 	if (chan->masq) {
-		if (ast_do_masquerade(chan))
+		ast_channel_unlock(chan);
+		if (ast_do_masquerade(chan)) {
 			ast_log(LOG_WARNING, "Failed to perform masquerade\n");
+		}
+		ast_channel_lock(chan);
 	}
 
 	if (chan->masq) {
@@ -2514,13 +2517,13 @@
 	
 	/* Perform any pending masquerades */
 	for (x = 0; x < n; x++) {
-		ast_channel_lock(c[x]);
 		if (c[x]->masq && ast_do_masquerade(c[x])) {
 			ast_log(LOG_WARNING, "Masquerade failed\n");
 			*ms = -1;
-			ast_channel_unlock(c[x]);
 			return NULL;
 		}
+
+		ast_channel_lock(c[x]);
 		if (!ast_tvzero(c[x]->whentohangup)) {
 			if (ast_tvzero(whentohangup))
 				now = ast_tvnow();
@@ -2641,16 +2644,15 @@
 	struct ast_channel *winner = NULL;
 	struct ast_epoll_data *aed = NULL;
 
-	ast_channel_lock(chan);
 
 	/* See if this channel needs to be masqueraded */
 	if (chan->masq && ast_do_masquerade(chan)) {
 		ast_log(LOG_WARNING, "Failed to perform masquerade on %s\n", chan->name);
 		*ms = -1;
-		ast_channel_unlock(chan);
 		return NULL;
 	}
 
+	ast_channel_lock(chan);
 	/* Figure out their timeout */
 	if (!ast_tvzero(chan->whentohangup)) {
 		if ((diff = ast_tvdiff_ms(chan->whentohangup, ast_tvnow())) < 0) {
@@ -2726,13 +2728,13 @@
 	struct ast_channel *winner = NULL;
 
 	for (i = 0; i < n; i++) {
-		ast_channel_lock(c[i]);
 		if (c[i]->masq && ast_do_masquerade(c[i])) {
 			ast_log(LOG_WARNING, "Masquerade failed\n");
 			*ms = -1;
-			ast_channel_unlock(c[i]);
 			return NULL;
 		}
+
+		ast_channel_lock(c[i]);
 		if (!ast_tvzero(c[i]->whentohangup)) {
 			if (whentohangup == 0)
 				now = ast_tvnow();
@@ -3064,25 +3066,22 @@
 	struct ast_frame *f = NULL;	/* the return value */
 	int blah;
 	int prestate;
-	int count = 0, cause = 0;
+	int cause = 0;
 
 	/* this function is very long so make sure there is only one return
 	 * point at the end (there are only two exceptions to this).
 	 */
-	while(ast_channel_trylock(chan)) {
-		if(count++ > 10) 
-			/*cannot goto done since the channel is not locked*/
-			return &ast_null_frame;
-		usleep(1);
-	}
 
 	if (chan->masq) {
 		if (ast_do_masquerade(chan))
 			ast_log(LOG_WARNING, "Failed to perform masquerade\n");
 		else
 			f =  &ast_null_frame;
-		goto done;
-	}
+		return f;
+	}
+
+	/* if here, no masq has happened, lock the channel and proceed */
+	ast_channel_lock(chan);
 
 	/* Stop if we're a zombie or need a soft hangup */
 	if (ast_test_flag(chan, AST_FLAG_ZOMBIE) || ast_check_hangup(chan)) {
@@ -3893,9 +3892,13 @@
 		goto done;
 
 	/* Handle any pending masquerades */
-	if (chan->masq && ast_do_masquerade(chan)) {
-		ast_log(LOG_WARNING, "Failed to perform masquerade\n");
-		goto done;
+	if (chan->masq) {
+		ast_channel_unlock(chan);
+		if (ast_do_masquerade(chan)) {
+			ast_log(LOG_WARNING, "Failed to perform masquerade\n");
+			return res; /* no need to goto done: chan is already unlocked for masq */
+		}
+		ast_channel_lock(chan);
 	}
 	if (chan->masqr) {
 		res = 0;	/* XXX explain, why 0 ? */
@@ -4806,15 +4809,23 @@
 	return res;
 }
 
+/*! \brief this function simply changes the name of the channel and issues a manager_event
+ *         with out unlinking and linking the channel from the ao2_container.  This should
+ *         only be used when the channel has already been unlinked from the ao2_container.
+ */
+static void __ast_change_name_nolink(struct ast_channel *chan, const char *newname)
+{
+	manager_event(EVENT_FLAG_CALL, "Rename", "Channel: %s\r\nNewname: %s\r\nUniqueid: %s\r\n", chan->name, newname, chan->uniqueid);
+	ast_string_field_set(chan, name, newname);
+}
+
 void ast_change_name(struct ast_channel *chan, const char *newname)
 {
 	/* We must re-link, as the hash value will change here. */
 	ao2_unlink(channels, chan);
-
-	manager_event(EVENT_FLAG_CALL, "Rename", "Channel: %s\r\nNewname: %s\r\nUniqueid: %s\r\n", chan->name, newname, chan->uniqueid);
-
-	ast_string_field_set(chan, name, newname);
-
+	ast_channel_lock(chan);
+	__ast_change_name_nolink(chan, newname);
+	ast_channel_unlock(chan);
 	ao2_link(channels, chan);
 }
 
@@ -5063,7 +5074,9 @@
 /*!
   \brief Masquerade a channel
 
-  \note Assumes channel will be locked when called
+  \note Assumes _NO_ channels and _NO_ channel pvt's are locked.  If a channel is locked while calling
+        this function, it invalidates our channel container locking order.  All channels
+		must be unlocked before it is permissible to lock the channels' ao2 container.
 */
 int ast_do_masquerade(struct ast_channel *original)
 {
@@ -5078,7 +5091,7 @@
 		struct ast_party_connected_line connected;
 		struct ast_party_redirecting redirecting;
 	} exchange;
-	struct ast_channel *clonechan = original->masq;
+	struct ast_channel *clonechan;
 	struct ast_cdr *cdr;
 	int rformat = original->readformat;
 	int wformat = original->writeformat;
@@ -5092,8 +5105,52 @@
 	 * original channel's backend.  While the features are nice, which is the
 	 * reason we're keeping it, it's still awesomely weird. XXX */
 
-	/* We need the clone's lock, too */
-	ast_channel_lock(clonechan);
+	/* The reasoning for the channels ao2_container lock here is complex.
+	 * 
+	 * In order to check for a race condition, the original channel must
+	 * be locked.  If it is determined that the masquerade should proceed
+	 * the original channel can absolutely not be unlocked until the end
+	 * of the function.  Since after determining the masquerade should
+	 * continue requires the channels to be unlinked from the ao2_container,
+	 * the container lock must be held first to achieve proper locking order.
+	 */
+	ao2_lock(channels);
+
+	/* lock the original channel to determine if the masquerade is require or not */
+	ast_channel_lock(original);
+
+	/* This checks to see if the masquerade has already happened or not.  There is a
+	 * race condition that exists for this function. Since all pvt and channel locks
+	 * must be let go before calling do_masquerade, it is possible that it could be
+	 * called multiple times for the same channel.  This check verifies whether
+	 * or not the masquerade has already been completed by another thread */
+	if (!original->masq) {
+		ast_channel_unlock(original);
+		ao2_unlock(channels);
+		return 0; /* masq already completed by another thread, or never needed to be done to begin with */
+	}
+
+	/* now that we have verified no race condition exists, set the clone channel */
+	clonechan = original->masq;
+
+	/* since this function already holds the global container lock, unlocking original
+	 * for deadlock avoidance will not result in any sort of masquerade race condition.
+	 * If masq is called by a different thread while this happens, it will be stuck waiting
+	 * until we unlock the container. */
+	while (ast_channel_trylock(clonechan)) {
+		CHANNEL_DEADLOCK_AVOIDANCE(original);
+	}
+
+	/* clear the masquerade channels */
+	original->masq = NULL;
+	clonechan->masqr = NULL;
+
+	/* unlink from channels container as name (which is the hash value) will change */
+	ao2_unlink(channels, original);
+	ao2_unlink(channels, clonechan);
+
+	/* now that both channels are locked and unlinked from the container, it is safe to unlock it */
+	ao2_unlock(channels);
 
 	ast_debug(4, "Actually Masquerading %s(%d) into the structure of %s(%d)\n",
 		clonechan->name, clonechan->_state, original->name, original->_state);
@@ -5105,10 +5162,6 @@
 	   one */
 	free_translation(clonechan);
 	free_translation(original);
-
-	/* Unlink the masquerade */
-	original->masq = NULL;
-	clonechan->masqr = NULL;
 
 	/* Save the original name */
 	ast_copy_string(orig, original->name, sizeof(orig));
@@ -5118,10 +5171,10 @@
 	snprintf(masqn, sizeof(masqn), "%s<MASQ>", newn);
 
 	/* Mangle the name of the clone channel */
-	ast_change_name(clonechan, masqn);
+	__ast_change_name_nolink(clonechan, masqn);
 
 	/* Copy the name from the clone channel */
-	ast_change_name(original, newn);
+	__ast_change_name_nolink(original, newn);
 
 	/* share linked id's */
 	ast_channel_set_linkgroup(original, clonechan);
@@ -5195,24 +5248,20 @@
 	original->_state = clonechan->_state;
 	clonechan->_state = origstate;
 
-	if (clonechan->tech->fixup){
-		res = clonechan->tech->fixup(original, clonechan);
-		if (res)
-			ast_log(LOG_WARNING, "Fixup failed on channel %s, strange things may happen.\n", clonechan->name);
+	if (clonechan->tech->fixup && clonechan->tech->fixup(original, clonechan)) {
+		ast_log(LOG_WARNING, "Fixup failed on channel %s, strange things may happen.\n", clonechan->name);
 	}
 
 	/* Start by disconnecting the original's physical side */
-	if (clonechan->tech->hangup)
-		res = clonechan->tech->hangup(clonechan);
-	if (res) {
+	if (clonechan->tech->hangup && clonechan->tech->hangup(clonechan)) {
 		ast_log(LOG_WARNING, "Hangup failed!  Strange things may happen!\n");
-		ast_channel_unlock(clonechan);
-		return -1;
+		res = -1;
+		goto done;
 	}
 
 	/* Mangle the name of the clone channel */
-	snprintf(zombn, sizeof(zombn), "%s<ZOMBIE>", orig);
-	ast_change_name(clonechan, zombn);
+	snprintf(zombn, sizeof(zombn), "%s<ZOMBIE>", orig); /* quick, hide the brains! */
+	__ast_change_name_nolink(clonechan, zombn);
 
 	/* Update the type. */
 	t_pvt = original->monitor;
@@ -5306,12 +5355,11 @@
 	/* Okay.  Last thing is to let the channel driver know about all this mess, so he
 	   can fix up everything as best as possible */
 	if (original->tech->fixup) {
-		res = original->tech->fixup(clonechan, original);
-		if (res) {
+		if (original->tech->fixup(clonechan, original)) {
 			ast_log(LOG_WARNING, "Channel for type '%s' could not fixup channel %s\n",
 				original->tech->type, original->name);
-			ast_channel_unlock(clonechan);
-			return -1;
+			res = -1;
+			goto done;
 		}
 	} else
 		ast_log(LOG_WARNING, "Channel type '%s' does not have a fixup routine (for %s)!  Bad things may happen.\n",
@@ -5350,13 +5398,24 @@
 		ast_debug(1, "Released clone lock on '%s'\n", clonechan->name);
 		ast_set_flag(clonechan, AST_FLAG_ZOMBIE);
 		ast_queue_frame(clonechan, &ast_null_frame);
-		ast_channel_unlock(clonechan);
 	}
 
 	/* Signal any blocker */
 	if (ast_test_flag(original, AST_FLAG_BLOCKING))
 		pthread_kill(original->blocker, SIGURG);
 	ast_debug(1, "Done Masquerading %s (%d)\n", original->name, original->_state);
+
+done:
+	/* it is possible for the clone channel to disappear during this */
+	if (clonechan) {
+		ast_channel_unlock(original);
+		ast_channel_unlock(clonechan);
+		ao2_link(channels, clonechan);
+		ao2_link(channels, original);
+	} else {
+		ast_channel_unlock(original);
+		ao2_link(channels, original);
+	}
 	return 0;
 }
 

Modified: trunk/main/features.c
URL: http://svnview.digium.com/svn/asterisk/trunk/main/features.c?view=diff&rev=222761&r1=222760&r2=222761
==============================================================================
--- trunk/main/features.c (original)
+++ trunk/main/features.c Wed Oct  7 17:58:38 2009
@@ -4378,17 +4378,19 @@
 static void do_bridge_masquerade(struct ast_channel *chan, struct ast_channel *tmpchan)
 {
 	ast_moh_stop(chan);
-	ast_channel_lock(chan);
+	ast_channel_lock_both(chan, tmpchan);
 	ast_setstate(tmpchan, chan->_state);
 	tmpchan->readformat = chan->readformat;
 	tmpchan->writeformat = chan->writeformat;
 	ast_channel_masquerade(tmpchan, chan);
-	ast_channel_lock(tmpchan);
+	ast_channel_unlock(chan);
+	ast_channel_unlock(tmpchan);
+
+	/* must be done without any channel locks held */
 	ast_do_masquerade(tmpchan);
+
 	/* when returning from bridge, the channel will continue at the next priority */
 	ast_explicit_goto(tmpchan, chan->context, chan->exten, chan->priority + 1);
-	ast_channel_unlock(tmpchan);
-	ast_channel_unlock(chan);
 }
 
 /*!
@@ -5004,9 +5006,10 @@
 					"Channel1: %s\r\n"
 					"Channel2: %s\r\n", chan->name, args.dest_chan);
 	}
+
+	ast_channel_unlock(current_dest_chan);
+
 	do_bridge_masquerade(current_dest_chan, final_dest_chan);
-
-	ast_channel_unlock(current_dest_chan);
 
 	/* now current_dest_chan is a ZOMBIE and with softhangup set to 1 and final_dest_chan is our end point */
 	/* try to make compatible, send error if we fail */

Modified: trunk/main/pbx.c
URL: http://svnview.digium.com/svn/asterisk/trunk/main/pbx.c?view=diff&rev=222761&r1=222760&r2=222761
==============================================================================
--- trunk/main/pbx.c (original)
+++ trunk/main/pbx.c Wed Oct  7 17:58:38 2009
@@ -7662,10 +7662,12 @@
 				tmpchan = NULL;
 				res = -1;
 			} else {
-				/* Grab the locks and get going */
-				ast_channel_lock(tmpchan);
+				/* it may appear odd to unlock chan here since the masquerade is on
+				 * tmpchan, but no channel locks should be held when doing a masquerade
+				 * since a masquerade requires a lock on the channels ao2 container. */
+				ast_channel_unlock(chan);
 				ast_do_masquerade(tmpchan);
-				ast_channel_unlock(tmpchan);
+				ast_channel_lock(chan);
 				/* Start the PBX going on our stolen channel */
 				if (ast_pbx_start(tmpchan)) {
 					ast_log(LOG_WARNING, "Unable to start PBX on %s\n", tmpchan->name);




More information about the asterisk-commits mailing list