[Asterisk-code-review] chan sip.c: Fix registration timeout and expire deadlock pot... (asterisk[13])

Joshua Colp asteriskteam at digium.com
Thu Mar 17 10:29:06 CDT 2016


Joshua Colp has submitted this change and it was merged.

Change subject: chan_sip.c: Fix registration timeout and expire deadlock potential.
......................................................................


chan_sip.c: Fix registration timeout and expire deadlock potential.

This patch is part of a series to resolve deadlocks in chan_sip.c.

Stopping a scheduled event can result in a deadlock if the scheduled event
is running when you try to stop the event.  If you hold a lock needed by
the scheduled event while trying to stop the scheduled event then a
deadlock can happen.  The general strategy for resolving the deadlock
potential is to push the actual starting and stopping of the scheduled
events off onto the scheduler/do_monitor() thread by scheduling an
immediate one shot scheduled event.  Some restructuring may be needed
because the code may assume that the start/stop of the scheduled events is
immediate.

ASTERISK-25023

Change-Id: I2e40de89efc8ae6e8850771d089ca44bc604b508
---
M channels/chan_sip.c
M channels/sip/include/sip.h
2 files changed, 185 insertions(+), 63 deletions(-)

Approvals:
  Mark Michelson: Looks good to me, but someone else must approve
  Joshua Colp: Looks good to me, approved; Verified



diff --git a/channels/chan_sip.c b/channels/chan_sip.c
index 9f6ee21..2cc365d 100644
--- a/channels/chan_sip.c
+++ b/channels/chan_sip.c
@@ -1372,7 +1372,6 @@
 static void sip_registry_destroy(void *reg);
 static int sip_register(const char *value, int lineno);
 static const char *regstate2str(enum sipregistrystate regstate) attribute_const;
-static int sip_reregister(const void *data);
 static int __sip_do_register(struct sip_registry *r);
 static int sip_reg_timeout(const void *data);
 static void sip_send_all_registers(void);
@@ -6504,8 +6503,6 @@
 		reg->call = dialog_unref(reg->call, "unref reg->call");
 		/* reg->call = sip_destroy(reg->call); */
 	}
-	AST_SCHED_DEL(sched, reg->expire);
-	AST_SCHED_DEL(sched, reg->timeout);
 
 	ast_string_field_free_memory(reg);
 }
@@ -15469,7 +15466,7 @@
 	{ REG_STATE_AUTHSENT, "Auth. Sent"},
 	{ REG_STATE_REGISTERED, "Registered"},
 	{ REG_STATE_REJECTED, "Rejected"},
-	{ REG_STATE_TIMEOUT, "Timeout"},
+	{ REG_STATE_TIMEOUT, "Registered"},/* Hidden state.  We are renewing registration. */
 	{ REG_STATE_NOAUTH, "No Authentication"},
 	{ -1, NULL } /* terminator */
 };
@@ -15485,21 +15482,21 @@
 	ast_system_publish_registry("SIP", username, domain, status, NULL);
 }
 
-/*! \brief Update registration with SIP Proxy.
+/*!
+ * \brief Update registration with SIP Proxy.
+ *
+ * \details
  * Called from the scheduler when the previous registration expires,
  * so we don't have to cancel the pending event.
  * We assume the reference so the sip_registry is valid, since it
  * is stored in the scheduled event anyways.
+ *
+ * \note Run by the sched thread.
  */
 static int sip_reregister(const void *data)
 {
 	/* if we are here, we know that we need to reregister. */
 	struct sip_registry *r = (struct sip_registry *) data;
-
-	/* if we couldn't get a reference to the registry object, punt */
-	if (!r) {
-		return 0;
-	}
 
 	if (r->call && r->call->do_history) {
 		append_history(r->call, "RegistryRenew", "Account: %s@%s", r->username, r->hostname);
@@ -15512,8 +15509,25 @@
 
 	r->expire = -1;
 	r->expiry = r->configured_expiry;
+	switch (r->regstate) {
+	case REG_STATE_UNREGISTERED:
+	case REG_STATE_REGSENT:
+	case REG_STATE_AUTHSENT:
+		break;
+	case REG_STATE_REJECTED:
+	case REG_STATE_NOAUTH:
+	case REG_STATE_FAILED:
+		/* Restarting registration as unregistered */
+		r->regstate = REG_STATE_UNREGISTERED;
+		break;
+	case REG_STATE_TIMEOUT:
+	case REG_STATE_REGISTERED:
+		/* Registration needs to be renewed. */
+		r->regstate = REG_STATE_TIMEOUT;
+		break;
+	}
 	__sip_do_register(r);
-	ao2_t_ref(r, -1, "unref the re-register scheduled event");
+	ao2_t_ref(r, -1, "Scheduled reregister timeout complete");
 	return 0;
 }
 
@@ -15528,21 +15542,83 @@
 	return res;
 }
 
-/*! \brief Registration timeout, register again
+struct reregister_data {
+	struct sip_registry *reg;
+	int ms;
+};
+
+/* Run by the sched thread. */
+static int __start_reregister_timeout(const void *data)
+{
+	struct reregister_data *sched_data = (void *) data;
+	struct sip_registry *reg = sched_data->reg;
+	int ms = sched_data->ms;
+
+	ast_free(sched_data);
+
+	AST_SCHED_DEL_UNREF(sched, reg->expire,
+		ao2_t_ref(reg, -1, "Stop scheduled reregister timeout"));
+
+	ao2_t_ref(reg, +1, "Schedule reregister timeout");
+	reg->expire = ast_sched_add(sched, ms, sip_reregister, reg);
+	if (reg->expire < 0) {
+		/* Uh Oh.  Expect bad behavior. */
+		ao2_t_ref(reg, -1, "Failed to schedule reregister timeout");
+	}
+
+	ao2_t_ref(reg, -1, "Start reregister timeout action");
+	return 0;
+}
+
+static void start_reregister_timeout(struct sip_registry *reg, int ms)
+{
+	struct reregister_data *sched_data;
+
+	sched_data = ast_malloc(sizeof(*sched_data));
+	if (!sched_data) {
+		/* Uh Oh.  Expect bad behavior. */
+		return;
+	}
+	sched_data->reg = reg;
+	sched_data->ms = ms;
+	ao2_t_ref(reg, +1, "Start reregister timeout action");
+	if (ast_sched_add(sched, 0, __start_reregister_timeout, sched_data) < 0) {
+		/* Uh Oh.  Expect bad behavior. */
+		ao2_t_ref(reg, -1, "Failed to schedule start reregister timeout action");
+		ast_free(sched_data);
+	}
+}
+
+/*!
+ * \brief Registration request timeout, register again
+ *
+ * \details
  * Registered as a timeout handler during transmit_register(),
  * to retransmit the packet if a reply does not come back.
- * This is called by the scheduler so the event is not pending anymore when
+ *
+ * \note This is called by the scheduler so the event is not pending anymore when
  * we are called.
+ *
+ * \note Run by the sched thread.
  */
 static int sip_reg_timeout(const void *data)
 {
-
-	/* if we are here, our registration timed out, so we'll just do it over */
 	struct sip_registry *r = (struct sip_registry *)data; /* the ref count should have been bumped when the sched item was added */
 	struct sip_pvt *p;
 
-	/* if we couldn't get a reference to the registry object, punt */
-	if (!r) {
+	switch (r->regstate) {
+	case REG_STATE_UNREGISTERED:
+	case REG_STATE_REGSENT:
+	case REG_STATE_AUTHSENT:
+	case REG_STATE_TIMEOUT:
+		break;
+	default:
+		/*
+		 * Registration completed because we got a request response
+		 * and we couldn't stop the scheduled entry in time.
+		 */
+		r->timeout = -1;
+		ao2_t_ref(r, -1, "Scheduled register timeout completed early");
 		return 0;
 	}
 
@@ -15584,8 +15660,58 @@
 		ast_log(LOG_NOTICE, "   -- Registration for '%s@%s' timed out, trying again (Attempt #%d)\n", r->username, r->hostname, r->regattempts);
 	}
 	sip_publish_registry(r->username, r->hostname, regstate2str(r->regstate));
-	ao2_t_ref(r, -1, "unreffing registry_unref r");
+	ao2_t_ref(r, -1, "Scheduled register timeout complete");
 	return 0;
+}
+
+/* Run by the sched thread. */
+static int __stop_register_timeout(const void *data)
+{
+	struct sip_registry *reg = (struct sip_registry *) data;
+
+	AST_SCHED_DEL_UNREF(sched, reg->timeout,
+		ao2_t_ref(reg, -1, "Stop scheduled register timeout"));
+	ao2_t_ref(reg, -1, "Stop register timeout action");
+	return 0;
+}
+
+static void stop_register_timeout(struct sip_registry *reg)
+{
+	ao2_t_ref(reg, +1, "Stop register timeout action");
+	if (ast_sched_add(sched, 0, __stop_register_timeout, reg) < 0) {
+		/* Uh Oh.  Expect bad behavior. */
+		ao2_t_ref(reg, -1, "Failed to schedule stop register timeout action");
+	}
+}
+
+/* Run by the sched thread. */
+static int __start_register_timeout(const void *data)
+{
+	struct sip_registry *reg = (struct sip_registry *) data;
+
+	AST_SCHED_DEL_UNREF(sched, reg->timeout,
+		ao2_t_ref(reg, -1, "Stop scheduled register timeout"));
+
+	ao2_t_ref(reg, +1, "Schedule register timeout");
+	reg->timeout = ast_sched_add(sched, global_reg_timeout * 1000, sip_reg_timeout, reg);
+	if (reg->timeout < 0) {
+		/* Uh Oh.  Expect bad behavior. */
+		ao2_t_ref(reg, -1, "Failed to schedule register timeout");
+	}
+	ast_debug(1, "Scheduled a registration timeout for %s id  #%d \n",
+		reg->hostname, reg->timeout);
+
+	ao2_t_ref(reg, -1, "Start register timeout action");
+	return 0;
+}
+
+static void start_register_timeout(struct sip_registry *reg)
+{
+	ao2_t_ref(reg, +1, "Start register timeout action");
+	if (ast_sched_add(sched, 0, __start_register_timeout, reg) < 0) {
+		/* Uh Oh.  Expect bad behavior. */
+		ao2_t_ref(reg, -1, "Failed to schedule start register timeout action");
+	}
 }
 
 static const char *sip_sanitized_host(const char *host)
@@ -15701,16 +15827,9 @@
 			 * probably DNS.  We need to reschedule a registration try */
 			dialog_unlink_all(p);
 			p = dialog_unref(p, "unref dialog after unlink_all");
-			if (r->timeout > -1) {
-				AST_SCHED_REPLACE_UNREF(r->timeout, sched, global_reg_timeout * 1000, sip_reg_timeout, r,
-										ao2_t_ref(_data, -1, "del for REPLACE of registry ptr"),
-										ao2_t_ref(r, -1, "object ptr dec when SCHED_REPLACE add failed"),
-										ao2_t_ref(r, +1, "add for REPLACE registry ptr"));
-				ast_log(LOG_WARNING, "Still have a registration timeout for %s@%s (create_addr() error), %d\n", r->username, r->hostname, r->timeout);
-			} else {
-				r->timeout = ast_sched_add(sched, global_reg_timeout * 1000, sip_reg_timeout, ao2_t_bump(r, "add for REPLACE registry ptr"));
-				ast_log(LOG_WARNING, "Probably a DNS error for registration to %s@%s, trying REGISTER again (after %d seconds)\n", r->username, r->hostname, global_reg_timeout);
-			}
+			ast_log(LOG_WARNING, "Probably a DNS error for registration to %s@%s, trying REGISTER again (after %d seconds)\n",
+				r->username, r->hostname, global_reg_timeout);
+			start_register_timeout(r);
 			r->regattempts++;
 			return 0;
 		}
@@ -15773,14 +15892,7 @@
 
 	/* set up a timeout */
 	if (auth == NULL)  {
-		if (r->timeout > -1) {
-			ast_log(LOG_WARNING, "Still have a registration timeout, #%d - deleting it\n", r->timeout);
-		}
-		AST_SCHED_REPLACE_UNREF(r->timeout, sched, global_reg_timeout * 1000, sip_reg_timeout, r,
-								ao2_t_ref(_data, -1, "reg ptr unrefed from del in SCHED_REPLACE"),
-								ao2_t_ref(r, -1, "reg ptr unrefed from add failure in SCHED_REPLACE"),
-								ao2_t_ref(r, +1, "reg ptr reffed from add in SCHED_REPLACE"));
-		ast_debug(1, "Scheduled a registration timeout for %s id  #%d \n", r->hostname, r->timeout);
+		start_register_timeout(r);
 	}
 
 	snprintf(from, sizeof(from), "<sip:%s@%s>;tag=%s", r->username, S_OR(r->regdomain, sip_sanitized_host(p->tohost)), p->tag);
@@ -24047,8 +24159,8 @@
 			break;
 		}
 		ast_log(LOG_WARNING, "Forbidden - wrong password on authentication for REGISTER for '%s' to '%s'\n", p->registry->username, p->registry->hostname);
-		AST_SCHED_DEL_UNREF(sched, r->timeout, ao2_t_ref(r, -1, "reg ptr unref from handle_response_register 403"));
 		r->regstate = REG_STATE_NOAUTH;
+		stop_register_timeout(r);
 		sip_publish_registry(r->username, r->hostname, regstate2str(r->regstate));
 		pvt_set_needdestroy(p, "received 403 response");
 		break;
@@ -24058,8 +24170,8 @@
 		if (r->call)
 			r->call = dialog_unref(r->call, "unsetting registry->call pointer-- case 404");
 		r->regstate = REG_STATE_REJECTED;
+		stop_register_timeout(r);
 		sip_publish_registry(r->username, r->hostname, regstate2str(r->regstate));
-		AST_SCHED_DEL_UNREF(sched, r->timeout, ao2_t_ref(r, -1, "reg ptr unref from handle_response_register 404"));
 		break;
 	case 407:	/* Proxy auth */
 		if (p->authtries == MAX_AUTHTRIES || do_register_auth(p, req, resp)) {
@@ -24078,7 +24190,6 @@
 	case 423:	/* Interval too brief */
 		r->expiry = atoi(sip_get_header(req, "Min-Expires"));
 		ast_log(LOG_WARNING, "Got 423 Interval too brief for service %s@%s, minimum is %d seconds\n", p->registry->username, p->registry->hostname, r->expiry);
-		AST_SCHED_DEL_UNREF(sched, r->timeout, ao2_t_ref(r, -1, "reg ptr unref from handle_response_register 423"));
 		if (r->call) {
 			r->call = dialog_unref(r->call, "unsetting registry->call pointer-- case 423");
 			pvt_set_needdestroy(p, "received 423 response");
@@ -24087,6 +24198,7 @@
 			ast_log(LOG_WARNING, "Required expiration time from %s@%s is too high, giving up\n", p->registry->username, p->registry->hostname);
 			r->expiry = r->configured_expiry;
 			r->regstate = REG_STATE_REJECTED;
+			stop_register_timeout(r);
 		} else {
 			r->regstate = REG_STATE_UNREGISTERED;
 			transmit_register(r, SIP_REGISTER, NULL, NULL);
@@ -24102,8 +24214,8 @@
 		if (r->call)
 			r->call = dialog_unref(r->call, "unsetting registry->call pointer-- case 4xx");
 		r->regstate = REG_STATE_REJECTED;
+		stop_register_timeout(r);
 		sip_publish_registry(r->username, r->hostname, regstate2str(r->regstate));
-		AST_SCHED_DEL_UNREF(sched, r->timeout, ao2_t_ref(r, -1, "reg ptr unref from handle_response_register 479"));
 		break;
 	case 200:	/* 200 OK */
 		if (!r) {
@@ -24112,15 +24224,15 @@
 			return 0;
 		}
 
-		r->regstate = REG_STATE_REGISTERED;
-		r->regtime = ast_tvnow();		/* Reset time of last successful registration */
-		sip_publish_registry(r->username, r->hostname, regstate2str(r->regstate));
-		r->regattempts = 0;
 		ast_debug(1, "Registration successful\n");
 		if (r->timeout > -1) {
 			ast_debug(1, "Cancelling timeout %d\n", r->timeout);
 		}
-		AST_SCHED_DEL_UNREF(sched, r->timeout, ao2_t_ref(r, -1, "reg ptr unref from handle_response_register 200"));
+		r->regstate = REG_STATE_REGISTERED;
+		stop_register_timeout(r);
+		r->regtime = ast_tvnow();		/* Reset time of last successful registration */
+		sip_publish_registry(r->username, r->hostname, regstate2str(r->regstate));
+		r->regattempts = 0;
 		if (r->call)
 			r->call = dialog_unref(r->call, "unsetting registry->call pointer-- case 200");
 		ao2_t_replace(p->registry, NULL, "unref registry entry p->registry");
@@ -24174,10 +24286,7 @@
 		r->refresh= (int) expires_ms / 1000;
 
 		/* Schedule re-registration before we expire */
-		AST_SCHED_REPLACE_UNREF(r->expire, sched, expires_ms, sip_reregister, r,
-								ao2_t_ref(_data, -1, "unref in REPLACE del fail"),
-								ao2_t_ref(r, -1, "unref in REPLACE add fail"),
-								ao2_t_ref(r, +1, "The Addition side of REPLACE"));
+		start_reregister_timeout(r, expires_ms);
 	}
 	return 1;
 }
@@ -31606,9 +31715,11 @@
 	}
 }
 
-static int cleanup_registration(void *obj, void *arg, int flags)
+/* Run by the sched thread. */
+static int __cleanup_registration(const void *data)
 {
-	struct sip_registry *reg = obj;
+	struct sip_registry *reg = (struct sip_registry *) data;
+
 	ao2_lock(reg);
 
 	if (reg->call) {
@@ -31617,12 +31728,12 @@
 		dialog_unlink_all(reg->call);
 		reg->call = dialog_unref(reg->call, "remove iterator->call from registry traversal");
 	}
-	if (reg->expire > -1) {
-		AST_SCHED_DEL_UNREF(sched, reg->expire, ao2_t_ref(reg, -1, "reg ptr unref from reload config"));
-	}
-	if (reg->timeout > -1) {
-		AST_SCHED_DEL_UNREF(sched, reg->timeout, ao2_t_ref(reg, -1, "reg ptr unref from reload config"));
-	}
+
+	AST_SCHED_DEL_UNREF(sched, reg->expire,
+		ao2_t_ref(reg, -1, "Stop scheduled reregister timeout"));
+	AST_SCHED_DEL_UNREF(sched, reg->timeout,
+		ao2_t_ref(reg, -1, "Stop scheduled register timeout"));
+
 	if (reg->dnsmgr) {
 		ast_dnsmgr_release(reg->dnsmgr);
 		reg->dnsmgr = NULL;
@@ -31630,6 +31741,21 @@
 	}
 
 	ao2_unlock(reg);
+
+	ao2_t_ref(reg, -1, "cleanup_registration action");
+	return 0;
+}
+
+static int cleanup_registration(void *obj, void *arg, int flags)
+{
+	struct sip_registry *reg = obj;
+
+	ao2_t_ref(reg, +1, "cleanup_registration action");
+	if (ast_sched_add(sched, 0, __cleanup_registration, reg) < 0) {
+		/* Uh Oh.  Expect bad behavior. */
+		ao2_t_ref(reg, -1, "Failed to schedule cleanup_registration action");
+	}
+
 	return CMP_MATCH;
 }
 
@@ -33423,10 +33549,7 @@
 	while ((iterator = ao2_t_iterator_next(&iter, "sip_send_all_registers iter"))) {
 		ao2_lock(iterator);
 		ms += regspacing;
-		AST_SCHED_REPLACE_UNREF(iterator->expire, sched, ms, sip_reregister, iterator,
-								ao2_t_ref(_data, -1, "REPLACE sched del decs the refcount"),
-								ao2_t_ref(iterator, -1, "REPLACE sched add failure decs the refcount"),
-								ao2_t_ref(iterator, +1, "REPLACE sched add incs the refcount"));
+		start_reregister_timeout(iterator, ms);
 		ao2_unlock(iterator);
 		ao2_t_ref(iterator, -1, "sip_send_all_registers iter");
 	}
diff --git a/channels/sip/include/sip.h b/channels/sip/include/sip.h
index c995ff1..3e68321 100644
--- a/channels/sip/include/sip.h
+++ b/channels/sip/include/sip.h
@@ -547,8 +547,7 @@
 		 * recover (not sure how correctly).
 		 */
 
-	REG_STATE_TIMEOUT,	/*!< Registration timed out
-		* \note XXX unused */
+	REG_STATE_TIMEOUT,	/*!< Registration about to expire, renewing registration */
 
 	REG_STATE_NOAUTH,	/*!< We have no accepted credentials
 		 * \note fatal - no chance to proceed */

-- 
To view, visit https://gerrit.asterisk.org/2400
To unsubscribe, visit https://gerrit.asterisk.org/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I2e40de89efc8ae6e8850771d089ca44bc604b508
Gerrit-PatchSet: 3
Gerrit-Project: asterisk
Gerrit-Branch: 13
Gerrit-Owner: Richard Mudgett <rmudgett at digium.com>
Gerrit-Reviewer: Anonymous Coward #1000019
Gerrit-Reviewer: Joshua Colp <jcolp at digium.com>
Gerrit-Reviewer: Mark Michelson <mmichelson at digium.com>



More information about the asterisk-code-review mailing list