<p>George Joseph <strong>merged</strong> this change.</p><p><a href="https://gerrit.asterisk.org/5919">View Change</a></p><div style="white-space:pre-wrap">Approvals:
  Joshua Colp: Looks good to me, but someone else must approve
  George Joseph: Looks good to me, approved; Approved for Submit

</div><pre style="font-family: monospace,monospace; white-space: pre-wrap;">pjsip_distributor.c: Fix deadlock with TCP type transports.<br><br>When a SIP message comes in on a transport, pjproject obtains the lock on<br>the transport and pulls the data out of the socket.  Unlike UDP, the TCP<br>transport does not allow concurrent access.  Without concurrency the<br>transport lock is not released when the transport's message complete<br>callback is called.  The processing continues and eventually Asterisk<br>starts processing the SIP message.  The first thing Asterisk tries to do<br>is determine the associated dialog of the message to determine the<br>associated serializer.  To get the associated serializer safely requires<br>us to get the dialog lock.<br><br>To send a request or response message for a dialog, pjproject obtains the<br>dialog lock and then obtains the transport lock.  Deadlock can result<br>because of the opposite order the locks are obtained.<br><br>* Fix the deadlock by obtaining the serializer associated with the dialog<br>another way that doesn't involve obtaining the dialog lock.  In this case,<br>we use an ao2 container to hold the associated endpoint and serializer.<br>The new locks are held a brief time and won't overlap other existing lock<br>times.<br><br>ASTERISK-27090 #close<br><br>Change-Id: I9ed63f4da9649e9db6ed4be29c360968917a89bd<br>---<br>M res/res_pjsip/pjsip_distributor.c<br>1 file changed, 174 insertions(+), 42 deletions(-)<br><br></pre><pre style="font-family: monospace,monospace; white-space: pre-wrap;">diff --git a/res/res_pjsip/pjsip_distributor.c b/res/res_pjsip/pjsip_distributor.c<br>index dadde25..36bc6f5 100644<br>--- a/res/res_pjsip/pjsip_distributor.c<br>+++ b/res/res_pjsip/pjsip_distributor.c<br>@@ -140,62 +140,189 @@<br> <br> /*! Dialog-specific information the distributor uses */<br> struct distributor_dialog_data {<br>+      /*! dialog_associations ao2 container key */<br>+ pjsip_dialog *dlg;<br>    /*! Serializer to distribute tasks to for this dialog */<br>      struct ast_taskprocessor *serializer;<br>         /*! Endpoint associated with this dialog */<br>   struct ast_sip_endpoint *endpoint;<br> };<br> <br>+#define DIALOG_ASSOCIATIONS_BUCKETS 251<br>+<br>+static struct ao2_container *dialog_associations;<br>+<br> /*!<br>  * \internal<br>+ * \brief Compute a hash value on an arbitrary buffer.<br>+ * \since 13.17.0<br>  *<br>- * \note Call this with the dialog locked<br>+ * \param[in] pos The buffer to add to the hash<br>+ * \param[in] len The buffer length to add to the hash<br>+ * \param[in] hash The hash value to add to<br>+ *<br>+ * \details<br>+ * This version of the function is for when you need to compute a<br>+ * hash of more than one buffer.<br>+ *<br>+ * This famous hash algorithm was written by Dan Bernstein and is<br>+ * commonly used.<br>+ *<br>+ * \sa http://www.cse.yorku.ca/~oz/hash.html<br>  */<br>-static struct distributor_dialog_data *distributor_dialog_data_alloc(pjsip_dialog *dlg)<br>+static int buf_hash_add(const char *pos, size_t len, int hash)<br> {<br>-   struct distributor_dialog_data *dist;<br>+        while (len--) {<br>+              hash = hash * 33 ^ *pos++;<br>+   }<br> <br>- dist = PJ_POOL_ZALLOC_T(dlg->pool, struct distributor_dialog_data);<br>-       pjsip_dlg_set_mod_data(dlg, distributor_mod.id, dist);<br>+       return hash;<br>+}<br> <br>-  return dist;<br>+/*!<br>+ * \internal<br>+ * \brief Compute a hash value on an arbitrary buffer.<br>+ * \since 13.17.0<br>+ *<br>+ * \param[in] pos The buffer to add to the hash<br>+ * \param[in] len The buffer length to add to the hash<br>+ *<br>+ * \details<br>+ * This version of the function is for when you need to compute a<br>+ * hash of more than one buffer.<br>+ *<br>+ * This famous hash algorithm was written by Dan Bernstein and is<br>+ * commonly used.<br>+ *<br>+ * \sa http://www.cse.yorku.ca/~oz/hash.html<br>+ */<br>+static int buf_hash(const char *pos, size_t len)<br>+{<br>+       return buf_hash_add(pos, len, 5381);<br>+}<br>+<br>+static int dialog_associations_hash(const void *obj, int flags)<br>+{<br>+    const struct distributor_dialog_data *object;<br>+        union {<br>+              const pjsip_dialog *dlg;<br>+             const char buf[sizeof(pjsip_dialog *)];<br>+      } key;<br>+<br>+    switch (flags & OBJ_SEARCH_MASK) {<br>+       case OBJ_SEARCH_KEY:<br>+         key.dlg = obj;<br>+               break;<br>+       case OBJ_SEARCH_OBJECT:<br>+              object = obj;<br>+                key.dlg = object->dlg;<br>+            break;<br>+       default:<br>+             /* Hash can only work on something with a full key. */<br>+               ast_assert(0);<br>+               return 0;<br>+    }<br>+    return ast_str_hash_restrict(buf_hash(key.buf, sizeof(key.buf)));<br>+}<br>+<br>+static int dialog_associations_cmp(void *obj, void *arg, int flags)<br>+{<br>+   const struct distributor_dialog_data *object_left = obj;<br>+     const struct distributor_dialog_data *object_right = arg;<br>+    const pjsip_dialog *right_key = arg;<br>+ int cmp = 0;<br>+<br>+      switch (flags & OBJ_SEARCH_MASK) {<br>+       case OBJ_SEARCH_OBJECT:<br>+              right_key = object_right->dlg;<br>+            /* Fall through */<br>+   case OBJ_SEARCH_KEY:<br>+         if (object_left->dlg == right_key) {<br>+                      cmp = CMP_MATCH;<br>+             }<br>+            break;<br>+       case OBJ_SEARCH_PARTIAL_KEY:<br>+         /* There is no such thing for this container. */<br>+             ast_assert(0);<br>+               break;<br>+       default:<br>+             cmp = 0;<br>+             break;<br>+       }<br>+    return cmp;<br> }<br> <br> void ast_sip_dialog_set_serializer(pjsip_dialog *dlg, struct ast_taskprocessor *serializer)<br> {<br>  struct distributor_dialog_data *dist;<br>-        SCOPED_LOCK(lock, dlg, pjsip_dlg_inc_lock, pjsip_dlg_dec_lock);<br> <br>-   dist = pjsip_dlg_get_mod_data(dlg, distributor_mod.id);<br>+      ao2_wrlock(dialog_associations);<br>+     dist = ao2_find(dialog_associations, dlg, OBJ_SEARCH_KEY | OBJ_NOLOCK);<br>       if (!dist) {<br>-         dist = distributor_dialog_data_alloc(dlg);<br>+           if (serializer) {<br>+                    dist = ao2_alloc(sizeof(*dist), NULL);<br>+                       if (dist) {<br>+                          dist->dlg = dlg;<br>+                          dist->serializer = serializer;<br>+                            ao2_link_flags(dialog_associations, dist, OBJ_NOLOCK);<br>+                               ao2_ref(dist, -1);<br>+                   }<br>+            }<br>+    } else {<br>+             ao2_lock(dist);<br>+              dist->serializer = serializer;<br>+            if (!dist->serializer && !dist->endpoint) {<br>+                    ao2_unlink_flags(dialog_associations, dist, OBJ_NOLOCK);<br>+             }<br>+            ao2_unlock(dist);<br>+            ao2_ref(dist, -1);<br>    }<br>-    dist->serializer = serializer;<br>+    ao2_unlock(dialog_associations);<br> }<br> <br> void ast_sip_dialog_set_endpoint(pjsip_dialog *dlg, struct ast_sip_endpoint *endpoint)<br> {<br>  struct distributor_dialog_data *dist;<br>-        SCOPED_LOCK(lock, dlg, pjsip_dlg_inc_lock, pjsip_dlg_dec_lock);<br> <br>-   dist = pjsip_dlg_get_mod_data(dlg, distributor_mod.id);<br>+      ao2_wrlock(dialog_associations);<br>+     dist = ao2_find(dialog_associations, dlg, OBJ_SEARCH_KEY | OBJ_NOLOCK);<br>       if (!dist) {<br>-         dist = distributor_dialog_data_alloc(dlg);<br>+           if (endpoint) {<br>+                      dist = ao2_alloc(sizeof(*dist), NULL);<br>+                       if (dist) {<br>+                          dist->dlg = dlg;<br>+                          dist->endpoint = endpoint;<br>+                                ao2_link_flags(dialog_associations, dist, OBJ_NOLOCK);<br>+                               ao2_ref(dist, -1);<br>+                   }<br>+            }<br>+    } else {<br>+             ao2_lock(dist);<br>+              dist->endpoint = endpoint;<br>+                if (!dist->serializer && !dist->endpoint) {<br>+                    ao2_unlink_flags(dialog_associations, dist, OBJ_NOLOCK);<br>+             }<br>+            ao2_unlock(dist);<br>+            ao2_ref(dist, -1);<br>    }<br>-    dist->endpoint = endpoint;<br>+        ao2_unlock(dialog_associations);<br> }<br> <br> struct ast_sip_endpoint *ast_sip_dialog_get_endpoint(pjsip_dialog *dlg)<br> {<br>         struct distributor_dialog_data *dist;<br>-        SCOPED_LOCK(lock, dlg, pjsip_dlg_inc_lock, pjsip_dlg_dec_lock);<br>+      struct ast_sip_endpoint *endpoint;<br> <br>-        dist = pjsip_dlg_get_mod_data(dlg, distributor_mod.id);<br>-      if (!dist || !dist->endpoint) {<br>-           return NULL;<br>+ dist = ao2_find(dialog_associations, dlg, OBJ_SEARCH_KEY);<br>+   if (dist) {<br>+          ao2_lock(dist);<br>+              endpoint = ao2_bump(dist->endpoint);<br>+              ao2_unlock(dist);<br>+            ao2_ref(dist, -1);<br>+   } else {<br>+             endpoint = NULL;<br>      }<br>-    ao2_ref(dist->endpoint, +1);<br>-      return dist->endpoint;<br>+    return endpoint;<br> }<br> <br> static pjsip_dialog *find_dialog(pjsip_rx_data *rdata)<br>@@ -227,7 +354,7 @@<br>                         pjsip_method_cmp(&rdata->msg_info.msg->line.req.method, &pjsip_cancel_method) ||<br>                        rdata->msg_info.to->tag.slen != 0) {<br>            dlg = pjsip_ua_find_dialog(&rdata->msg_info.cid->id, local_tag,<br>-                            remote_tag, PJ_TRUE);<br>+                                remote_tag, PJ_FALSE);<br>                if (dlg) {<br>                    return dlg;<br>           }<br>@@ -265,11 +392,6 @@<br>       pj_mutex_unlock(tsx->mutex);<br> #endif<br> <br>-  if (!dlg) {<br>-          return NULL;<br>- }<br>-<br>- pjsip_dlg_inc_lock(dlg);<br>      return dlg;<br> }<br> <br>@@ -292,16 +414,7 @@<br>  */<br> static int pjstr_hash_add(pj_str_t *str, int hash)<br> {<br>-      size_t len;<br>-  const char *pos;<br>-<br>-  len = pj_strlen(str);<br>-        pos = pj_strbuf(str);<br>-        while (len--) {<br>-              hash = hash * 33 ^ *pos++;<br>-   }<br>-<br>- return hash;<br>+ return buf_hash_add(pj_strbuf(str), pj_strlen(str), hash);<br> }<br> <br> /*!<br>@@ -340,7 +453,7 @@<br>  /* Compute the hash from the SIP message call-id and remote-tag */<br>    hash = pjstr_hash(&rdata->msg_info.cid->id);<br>        hash = pjstr_hash_add(remote_tag, hash);<br>-     hash = abs(hash);<br>+    hash = ast_str_hash_restrict(hash);<br> <br>        serializer = ao2_bump(distributor_pool[hash % ARRAY_LEN(distributor_pool)]);<br>  if (serializer) {<br>@@ -375,17 +488,18 @@<br> <br>   dlg = find_dialog(rdata);<br>     if (dlg) {<br>-           ast_debug(3, "Searching for serializer on dialog %s for %s\n",<br>+             ast_debug(3, "Searching for serializer associated with dialog %s for %s\n",<br>                         dlg->obj_name, pjsip_rx_data_get_info(rdata));<br>-            dist = pjsip_dlg_get_mod_data(dlg, distributor_mod.id);<br>+              dist = ao2_find(dialog_associations, dlg, OBJ_SEARCH_KEY);<br>            if (dist) {<br>+                  ao2_lock(dist);<br>                       serializer = ao2_bump(dist->serializer);<br>+                  ao2_unlock(dist);<br>                     if (serializer) {<br>-                            ast_debug(3, "Found serializer %s on dialog %s\n",<br>+                         ast_debug(3, "Found serializer %s associated with dialog %s\n",<br>                                     ast_taskprocessor_name(serializer), dlg->obj_name);<br>                        }<br>             }<br>-            pjsip_dlg_dec_lock(dlg);<br>      }<br> <br>  if (serializer) {<br>@@ -407,6 +521,7 @@<br>                /* We have a BYE or CANCEL request without a serializer. */<br>           pjsip_endpt_respond_stateless(ast_sip_get_pjsip_endpoint(), rdata,<br>                    PJSIP_SC_CALL_TSX_DOES_NOT_EXIST, NULL, NULL, NULL);<br>+         ao2_cleanup(dist);<br>            return PJ_TRUE;<br>       } else {<br>              if (ast_taskprocessor_alert_get()) {<br>@@ -421,6 +536,7 @@<br>                      */<br>                   ast_debug(3, "Taskprocessor overload alert: Ignoring '%s'.\n",<br>                              pjsip_rx_data_get_info(rdata));<br>+                      ao2_cleanup(dist);<br>                    return PJ_TRUE;<br>               }<br> <br>@@ -428,10 +544,17 @@<br>           serializer = ast_sip_get_distributor_serializer(rdata);<br>       }<br> <br>- pjsip_rx_data_clone(rdata, 0, &clone);<br>+   if (pjsip_rx_data_clone(rdata, 0, &clone) != PJ_SUCCESS) {<br>+               ast_taskprocessor_unreference(serializer);<br>+           ao2_cleanup(dist);<br>+           return PJ_TRUE;<br>+      }<br> <br>  if (dist) {<br>+          ao2_lock(dist);<br>               clone->endpt_info.mod_data[endpoint_mod.id] = ao2_bump(dist->endpoint);<br>+                ao2_unlock(dist);<br>+            ao2_cleanup(dist);<br>    }<br> <br>  if (ast_sip_push_task(serializer, distribute, clone)) {<br>@@ -1068,6 +1191,14 @@<br>               return -1;<br>    }<br> <br>+ dialog_associations = ao2_container_alloc_hash(AO2_ALLOC_OPT_LOCK_RWLOCK, 0,<br>+         DIALOG_ASSOCIATIONS_BUCKETS, dialog_associations_hash, NULL,<br>+         dialog_associations_cmp);<br>+    if (!dialog_associations) {<br>+          ast_sip_destroy_distributor();<br>+               return -1;<br>+   }<br>+<br>  if (distributor_pool_setup()) {<br>               ast_sip_destroy_distributor();<br>                return -1;<br>@@ -1146,5 +1277,6 @@<br> <br>  distributor_pool_shutdown();<br> <br>+      ao2_cleanup(dialog_associations);<br>     ao2_cleanup(unidentified_requests);<br> }<br></pre><p>To view, visit <a href="https://gerrit.asterisk.org/5919">change 5919</a>. To unsubscribe, visit <a href="https://gerrit.asterisk.org/settings">settings</a>.</p><div itemscope itemtype="http://schema.org/EmailMessage"><div itemscope itemprop="action" itemtype="http://schema.org/ViewAction"><link itemprop="url" href="https://gerrit.asterisk.org/5919"/><meta itemprop="name" content="View Change"/></div></div>

<div style="display:none"> Gerrit-Project: asterisk </div>
<div style="display:none"> Gerrit-Branch: 13 </div>
<div style="display:none"> Gerrit-MessageType: merged </div>
<div style="display:none"> Gerrit-Change-Id: I9ed63f4da9649e9db6ed4be29c360968917a89bd </div>
<div style="display:none"> Gerrit-Change-Number: 5919 </div>
<div style="display:none"> Gerrit-PatchSet: 2 </div>
<div style="display:none"> Gerrit-Owner: Richard Mudgett <rmudgett@digium.com> </div>
<div style="display:none"> Gerrit-Reviewer: Alexei Gradinari <alex2grad@gmail.com> </div>
<div style="display:none"> Gerrit-Reviewer: George Joseph <gjoseph@digium.com> </div>
<div style="display:none"> Gerrit-Reviewer: Jenkins2 </div>
<div style="display:none"> Gerrit-Reviewer: Joshua Colp <jcolp@digium.com> </div>
<div style="display:none"> Gerrit-Reviewer: Richard Mudgett <rmudgett@digium.com> </div>