<p>Richard Mudgett has uploaded this change for <strong>review</strong>.</p><p><a href="https://gerrit.asterisk.org/5920">View Change</a></p><pre style="font-family: monospace,monospace; white-space: pre-wrap;">pjsip_distributor.c: Fix deadlock with TCP type transports.<br><br>When a SIP message comes in on a transport, pjproject obtains the lock on<br>the transport and pulls the data out of the socket.  Unlike UDP, the TCP<br>transport does not allow concurrent access.  Without concurrency the<br>transport lock is not released when the transport's message complete<br>callback is called.  The processing continues and eventually Asterisk<br>starts processing the SIP message.  The first thing Asterisk tries to do<br>is determine the associated dialog of the message to determine the<br>associated serializer.  To get the associated serializer safely requires<br>us to get the dialog lock.<br><br>To send a request or response message for a dialog, pjproject obtains the<br>dialog lock and then obtains the transport lock.  Deadlock can result<br>because of the opposite order the locks are obtained.<br><br>* Fix the deadlock by passing all incoming SIP messages into a new master<br>distributor serializer.  The transport lock will never be held by the new<br>serializer so it is safe to get the dialog lock to determine which<br>serializer must process the message.<br><br>ASTERISK-27090 #close<br><br>Change-Id: I9ed63f4da9649e9db6ed4be29c360968917a89bd<br>---<br>M res/res_pjsip/pjsip_distributor.c<br>1 file changed, 55 insertions(+), 19 deletions(-)<br><br></pre><pre style="font-family: monospace,monospace; white-space: pre-wrap;">git pull ssh://gerrit.asterisk.org:29418/asterisk refs/changes/20/5920/1</pre><pre style="font-family: monospace,monospace; white-space: pre-wrap;">diff --git a/res/res_pjsip/pjsip_distributor.c b/res/res_pjsip/pjsip_distributor.c<br>index cca26a8..c2b7429 100644<br>--- a/res/res_pjsip/pjsip_distributor.c<br>+++ b/res/res_pjsip/pjsip_distributor.c<br>@@ -58,6 +58,9 @@<br>   char src_name[];<br> };<br> <br>+/*! Master distributor serializer to avoid deadlock */<br>+static struct ast_taskprocessor *distributor_master;<br>+<br> /*! Number of serializers in pool if one not otherwise known.  (Best if prime number) */<br> #define DISTRIBUTOR_POOL_SIZE          31<br> <br>@@ -368,20 +371,12 @@<br>  .on_rx_request = endpoint_lookup,<br> };<br> <br>-static pj_bool_t distributor(pjsip_rx_data *rdata)<br>+static int distribute_master(void *data)<br> {<br>         pjsip_dialog *dlg;<br>    struct distributor_dialog_data *dist = NULL;<br>  struct ast_taskprocessor *serializer = NULL;<br>- pjsip_rx_data *clone;<br>-<br>-     if (!ast_test_flag(&ast_options, AST_OPT_FLAG_FULLY_BOOTED)) {<br>-           /*<br>-            * Ignore everything until we are fully booted.  Let the<br>-              * peer retransmit messages until we are ready.<br>-               */<br>-          return PJ_TRUE;<br>-      }<br>+    pjsip_rx_data *rdata = data;<br> <br>       dlg = find_dialog(rdata);<br>     if (dlg) {<br>@@ -417,7 +412,8 @@<br>               /* We have a BYE or CANCEL request without a serializer. */<br>           pjsip_endpt_respond_stateless(ast_sip_get_pjsip_endpoint(), rdata,<br>                    PJSIP_SC_CALL_TSX_DOES_NOT_EXIST, NULL, NULL, NULL);<br>-         return PJ_TRUE;<br>+              pjsip_rx_data_free_cloned(rdata);<br>+            return 0;<br>     } else {<br>              if (ast_taskprocessor_alert_get()) {<br>                  /*<br>@@ -431,26 +427,53 @@<br>                      */<br>                   ast_debug(3, "Taskprocessor overload alert: Ignoring '%s'.\n",<br>                              pjsip_rx_data_get_info(rdata));<br>-                      return PJ_TRUE;<br>+                      pjsip_rx_data_free_cloned(rdata);<br>+                    return 0;<br>             }<br> <br>          /* Pick a serializer for the out-of-dialog request. */<br>                serializer = ast_sip_get_distributor_serializer(rdata);<br>       }<br> <br>- pjsip_rx_data_clone(rdata, 0, &clone);<br>-<br>         if (dist) {<br>-          clone->endpt_info.mod_data[endpoint_mod.id] = ao2_bump(dist->endpoint);<br>+                rdata->endpt_info.mod_data[endpoint_mod.id] = ao2_bump(dist->endpoint);<br>         }<br> <br>- if (ast_sip_push_task(serializer, distribute, clone)) {<br>-              ao2_cleanup(clone->endpt_info.mod_data[endpoint_mod.id]);<br>-         pjsip_rx_data_free_cloned(clone);<br>+    if (ast_sip_push_task(serializer, distribute, rdata)) {<br>+              ao2_cleanup(rdata->endpt_info.mod_data[endpoint_mod.id]);<br>+         pjsip_rx_data_free_cloned(rdata);<br>     }<br> <br>  ast_taskprocessor_unreference(serializer);<br> <br>+        return 0;<br>+}<br>+<br>+static pj_bool_t distributor(pjsip_rx_data *rdata)<br>+{<br>+    pjsip_rx_data *clone;<br>+<br>+     if (!ast_test_flag(&ast_options, AST_OPT_FLAG_FULLY_BOOTED)) {<br>+           /*<br>+            * Ignore everything until we are fully booted.  Let the<br>+              * peer retransmit messages until we are ready.<br>+               */<br>+          return PJ_TRUE;<br>+      }<br>+<br>+ if (pjsip_rx_data_clone(rdata, 0, &clone) != PJ_SUCCESS) {<br>+               return PJ_TRUE;<br>+      }<br>+<br>+ /*<br>+    * We have to push everything off into a serializer to avoid a<br>+        * deadlock involving the transport lock and the dialog lock.<br>+         * A TCP type transport has the transport lock still held at<br>+  * this point while a UDP type transport does not.<br>+    */<br>+  if (ast_sip_push_task(distributor_master, distribute_master, clone)) {<br>+               pjsip_rx_data_free_cloned(clone);<br>+    }<br>     return PJ_TRUE;<br> }<br> <br>@@ -1037,11 +1060,17 @@<br>  */<br> static void distributor_pool_shutdown(void)<br> {<br>+      struct ast_taskprocessor *serializer;<br>         int idx;<br> <br>+  serializer = distributor_master;<br>+     distributor_master = NULL;<br>+   ast_taskprocessor_unreference(serializer);<br>+<br>         for (idx = 0; idx < ARRAY_LEN(distributor_pool); ++idx) {<br>-         ast_taskprocessor_unreference(distributor_pool[idx]);<br>+                serializer = distributor_pool[idx];<br>           distributor_pool[idx] = NULL;<br>+                ast_taskprocessor_unreference(serializer);<br>    }<br> }<br> <br>@@ -1067,6 +1096,13 @@<br>                      return -1;<br>            }<br>     }<br>+<br>+ ast_taskprocessor_build_name(tps_name, sizeof(tps_name), "pjsip/distrib_master");<br>+  distributor_master = ast_sip_create_serializer(tps_name);<br>+    if (!distributor_master) {<br>+           return -1;<br>+   }<br>+<br>  return 0;<br> }<br> <br></pre><p>To view, visit <a href="https://gerrit.asterisk.org/5920">change 5920</a>. To unsubscribe, visit <a href="https://gerrit.asterisk.org/settings">settings</a>.</p><div itemscope itemtype="http://schema.org/EmailMessage"><div itemscope itemprop="action" itemtype="http://schema.org/ViewAction"><link itemprop="url" href="https://gerrit.asterisk.org/5920"/><meta itemprop="name" content="View Change"/></div></div>

<div style="display:none"> Gerrit-Project: asterisk </div>
<div style="display:none"> Gerrit-Branch: 14 </div>
<div style="display:none"> Gerrit-MessageType: newchange </div>
<div style="display:none"> Gerrit-Change-Id: I9ed63f4da9649e9db6ed4be29c360968917a89bd </div>
<div style="display:none"> Gerrit-Change-Number: 5920 </div>
<div style="display:none"> Gerrit-PatchSet: 1 </div>
<div style="display:none"> Gerrit-Owner: Richard Mudgett <rmudgett@digium.com> </div>