[Asterisk-code-review] sched: fix and test a double deref on delete of an executing call back (asterisk[18])

Michael Bradeen asteriskteam at digium.com
Fri Jan 21 10:06:05 CST 2022


Michael Bradeen has uploaded this change for review. ( https://gerrit.asterisk.org/c/asterisk/+/17922 )


Change subject: sched: fix and test a double deref on delete of an executing call back
......................................................................

sched: fix and test a double deref on delete of an executing call back

sched: Avoid a double deref when AST_SCHED_DEL_UNREF is called on an
executing call-back. This is done by adding a new variable 'rescheduled'
to the struct sched which is set in ast_sched_runq and checked in
ast_sched_del_nonrunning. ast_sched_del_nonrunning is a replacement for
now deprecated ast_sched_del which returns a new possible value -2
if called on an executing call-back with rescheduled set. ast_sched_del
is modified to call ast_sched_del_nonrunning to maintain existing code.
AST_SCHED_DEL_UNREF is also updated to look for the -2 in which case it
will not throw a warning or invoke refcall.
test_sched: Add a new unit test sched_test_freebird that will check the
reference count in the resolved scenario.

ASTERISK-29698

Change-Id: Icfb16b3acbc29cf5b4cef74183f7531caaefe21d
---
M include/asterisk/sched.h
M main/sched.c
M tests/test_sched.c
3 files changed, 196 insertions(+), 8 deletions(-)



  git pull ssh://gerrit.asterisk.org:29418/asterisk refs/changes/22/17922/1

diff --git a/include/asterisk/sched.h b/include/asterisk/sched.h
index bbef914..af9b76c 100644
--- a/include/asterisk/sched.h
+++ b/include/asterisk/sched.h
@@ -72,20 +72,22 @@
 /*!
  * \brief schedule task to get deleted and call unref function
  *
- * Only calls unref function if the delete succeeded.
+ * Only calls the unref function if the task is actually deleted by
+ * ast_sched_del_nonrunning. If a failure occurs or the task is
+ * currently running and not rescheduled then refcall is not invoked.
  *
  * \sa AST_SCHED_DEL
  * \since 1.6.1
  */
 #define AST_SCHED_DEL_UNREF(sched, id, refcall)			\
 	do { \
-		int _count = 0, _id; \
-		while ((_id = id) > -1 && ast_sched_del(sched, _id) && ++_count < 10) { \
+		int _count = 0, _id, _ret = 0; \
+		while ((_id = id) > -1 && (( _ret = ast_sched_del_nonrunning(sched, _id)) == -1) && ++_count < 10) { \
 			usleep(1); \
 		} \
 		if (_count == 10) { \
 			ast_log(LOG_WARNING, "Unable to cancel schedule ID %d.  This is probably a bug (%s: %s, line %d).\n", _id, __FILE__, __PRETTY_FUNCTION__, __LINE__); \
-		} else if (_id > -1) { \
+		} else if (_id > -1 && _ret >-2) { \
 			refcall; \
 			id = -1; \
 		} \
@@ -294,10 +296,30 @@
  *
  * \retval -1 on failure
  * \retval 0 on success
+ *
+ * \deprecated in favor of ast_sched_del_nonrunning which checks if the event is running and rescheduled
+ *
  */
 int ast_sched_del(struct ast_sched_context *con, int id) attribute_warn_unused_result;
 
 /*!
+ * \brief Deletes a scheduled event with care against the event running
+ *
+ * Remove this event from being run.  A procedure should not remove its own
+ * event, but return 0 instead.  In most cases, you should not call this
+ * routine directly, but use the AST_SCHED_DEL() macro instead (especially if
+ * you don't intend to do something different when it returns failure).
+ *
+ * \param con scheduling context to delete item from
+ * \param id ID of the scheduled item to delete
+ *
+ * \retval -1 on failure
+ * \retval -2 event was running but was deleted because it was not rescheduled
+ * \retval 0 on success
+ */
+int ast_sched_del_nonrunning(struct ast_sched_context *con, int id) attribute_warn_unused_result;
+
+/*!
  * \brief Determines number of seconds until the next outstanding event to take place
  *
  * Determine the number of seconds until the next outstanding event
diff --git a/main/sched.c b/main/sched.c
index e3a7d30..45e0677 100644
--- a/main/sched.c
+++ b/main/sched.c
@@ -98,6 +98,8 @@
 	ast_cond_t cond;
 	/*! Indication that a running task was deleted. */
 	unsigned int deleted:1;
+	/*! Indication that a running task was rescheduled. */
+	unsigned int rescheduled:1;
 };
 
 struct sched_thread {
@@ -606,11 +608,27 @@
  * "id".  It's nearly impossible that there
  * would be two or more in the list with that
  * id.
+ * Deprecated in favor of ast_sched_del_nonrunning
+ * which checks running event status.
  */
 int ast_sched_del(struct ast_sched_context *con, int id)
 {
+	return ast_sched_del_nonrunning(con, id) ? -1 : 0;
+}
+
+/*! \brief
+ * Delete the schedule entry with number "id".
+ * If running, wait for the task to complete,
+ * check to see if it is rescheduled then
+ * schedule the release.
+ * It's nearly impossible that there would be
+ * two or more in the list with that id.
+ */
+int ast_sched_del_nonrunning(struct ast_sched_context *con, int id)
+{
 	struct sched *s = NULL;
 	int *last_id = ast_threadstorage_get(&last_del_id, sizeof(int));
+	int res = 0;
 
 	DEBUG(ast_debug(1, "ast_sched_del(%d)\n", id));
 
@@ -645,7 +663,17 @@
 			while (con->currently_executing && (id == con->currently_executing->sched_id->id)) {
 				ast_cond_wait(&s->cond, &con->lock);
 			}
-			/* Do not sched_release() here because ast_sched_runq() will do it */
+			/* This is not rescheduled so the caller of ast_sched_del_nonrunning needs to know
+			 * that it was still deleted
+			 */
+			if (!s->rescheduled) {
+				res = -2;
+			}
+			/* ast_sched_runq knows we are waiting on this item and is passing responsibility for
+			 * its destruction to us
+			 */
+			sched_release(con, s);
+			s = NULL;
 		}
 	}
 
@@ -658,7 +686,10 @@
 	}
 	ast_mutex_unlock(&con->lock);
 
-	if (!s && *last_id != id) {
+	if(res == -2){
+		return res;
+	}
+	else if (!s && *last_id != id) {
 		ast_debug(1, "Attempted to delete nonexistent schedule entry %d!\n", id);
 		/* Removing nonexistent schedule entry shouldn't trigger assert (it was enabled in DEV_MODE);
 		 * because in many places entries is deleted without having valid id. */
@@ -668,7 +699,7 @@
 		return -1;
 	}
 
-	return 0;
+	return res;
 }
 
 void ast_sched_report(struct ast_sched_context *con, struct ast_str **buf, struct ast_cb_names *cbnames)
@@ -793,7 +824,13 @@
 		con->currently_executing = NULL;
 		ast_cond_signal(&current->cond);
 
-		if (res && !current->deleted) {
+		if (current->deleted) {
+			/*
+			 * Another thread is waiting on this scheduled item.  That thread
+			 * will be responsible for it's destruction
+			 */
+			current->rescheduled = res ? 1 : 0;
+		} else if (res) {
 			/*
 			 * If they return non-zero, we should schedule them to be
 			 * run again.
diff --git a/tests/test_sched.c b/tests/test_sched.c
index e995c2c..cff8d65 100644
--- a/tests/test_sched.c
+++ b/tests/test_sched.c
@@ -37,6 +37,7 @@
 #include "asterisk/sched.h"
 #include "asterisk/test.h"
 #include "asterisk/cli.h"
+#include "asterisk/astobj2.h"
 
 static int sched_cb(const void *data)
 {
@@ -336,6 +337,132 @@
 	return CLI_SUCCESS;
 }
 
+struct test_obj {
+	ast_mutex_t lock;
+	ast_cond_t cond;
+	int scheduledCBstarted;
+	int id;
+};
+
+static void test_obj_cleanup(void *data)
+{
+	struct test_obj *obj = data;
+	ast_mutex_destroy(&obj->lock);
+	ast_cond_destroy(&obj->cond);
+}
+
+static int lockingcb(const void *data)
+{
+	struct test_obj *obj = (struct test_obj *)data;
+	struct timespec delay = {3,0};
+
+	ast_mutex_lock(&obj->lock);
+
+	obj->scheduledCBstarted = 1;
+	ast_cond_signal(&obj->cond);
+
+	ast_mutex_unlock(&obj->lock);
+
+	ao2_ref(obj, -1);
+	while (nanosleep(&delay, &delay));
+	/* sleep to force this scheduled event to remain running long
+	 * enough for the scheduling thread to unlock and call
+	 * AST_SCHED_DEL_UNREF
+	 */
+
+	return 0;
+}
+
+AST_TEST_DEFINE(sched_test_freebird)
+{
+	struct test_obj * obj;
+	struct ast_sched_context * con;
+	enum ast_test_result_state res = AST_TEST_FAIL;
+	int refs;
+
+	switch (cmd) {
+	case TEST_INIT:
+		info->name = "sched_test_freebird";
+		info->category = "/main/sched/";
+		info->summary = "Test deadlock avoidance and double-unref";
+		info->description =
+			"This tests a call to AST_SCHED_DEL_UNREF on a running event.";
+		return AST_TEST_NOT_RUN;
+	case TEST_EXECUTE:
+		res = AST_TEST_PASS;
+		break;
+	}
+
+	obj = ao2_alloc(sizeof(struct test_obj), test_obj_cleanup);
+	if (!obj) {
+		ast_test_status_update(test,
+				"ao2_alloc() did not return an object\n");
+		return AST_TEST_FAIL;
+	}
+
+	obj->scheduledCBstarted = 0;
+
+	con = ast_sched_context_create();
+	if (!con) {
+		ast_test_status_update(test,
+				"ast_sched_context_create() did not return a context\n");
+		ao2_cleanup(obj);
+		return AST_TEST_FAIL;
+	}
+
+	if (ast_sched_start_thread(con)) {
+		ast_test_status_update(test, "Failed to start test thread\n");
+		ao2_cleanup(obj);
+		ast_sched_context_destroy(con);
+		return AST_TEST_FAIL;
+	}
+
+	/* This double reference is to ensure that the object isn't destroyed prematurely
+	 * in a case where it is unreffed an additional time.
+	 */
+	ao2_ref(obj, +2);
+	if ((obj->id = ast_sched_add(con, 0, lockingcb, obj)) == -1) {
+		ast_test_status_update(test, "Failed to add scheduler entry\n");
+		ao2_ref(obj, -3);
+		ast_sched_context_destroy(con);
+		return AST_TEST_FAIL;
+	}
+
+	ast_mutex_lock(&obj->lock);
+	while(obj->scheduledCBstarted == 0) {
+		/* Wait for the scheduled thread to indicate that it has started so we can
+		 * then call the AST_SCHED_DEL_UNREF macro
+		 */
+		ast_cond_wait(&obj->cond,&obj->lock);
+	}
+	ast_mutex_unlock(&obj->lock);
+
+	ast_test_status_update(test, "Received signal, calling Scedule and UNREF\n");
+	ast_test_status_update(test, "ID: %d\n", obj->id);
+	AST_SCHED_DEL_UNREF(con, obj->id, ao2_ref(obj, -1));
+
+	refs = ao2_ref(obj, 0);
+
+	switch(refs){
+		case 2:
+			ast_test_status_update(test, "Correct number of references '2'\n");
+			break;
+		default:
+			ast_test_status_update(test, "Incorrect number of references '%d'\n",
+				refs);
+			res = AST_TEST_FAIL;
+			break;
+	}
+
+	/* Based on success or failure, the refcount could change
+	 */
+	while(ao2_ref(obj, -1) > 1);
+
+	ast_sched_context_destroy(con);
+
+	return res;
+}
+
 static struct ast_cli_entry cli_sched[] = {
 	AST_CLI_DEFINE(handle_cli_sched_bench, "Benchmark ast_sched add/del performance"),
 };
@@ -343,6 +470,7 @@
 static int unload_module(void)
 {
 	AST_TEST_UNREGISTER(sched_test_order);
+	AST_TEST_UNREGISTER(sched_test_freebird);
 	ast_cli_unregister_multiple(cli_sched, ARRAY_LEN(cli_sched));
 	return 0;
 }
@@ -350,6 +478,7 @@
 static int load_module(void)
 {
 	AST_TEST_REGISTER(sched_test_order);
+	AST_TEST_REGISTER(sched_test_freebird);
 	ast_cli_register_multiple(cli_sched, ARRAY_LEN(cli_sched));
 	return AST_MODULE_LOAD_SUCCESS;
 }

-- 
To view, visit https://gerrit.asterisk.org/c/asterisk/+/17922
To unsubscribe, or for help writing mail filters, visit https://gerrit.asterisk.org/settings

Gerrit-Project: asterisk
Gerrit-Branch: 18
Gerrit-Change-Id: Icfb16b3acbc29cf5b4cef74183f7531caaefe21d
Gerrit-Change-Number: 17922
Gerrit-PatchSet: 1
Gerrit-Owner: Michael Bradeen <mbradeen at sangoma.com>
Gerrit-MessageType: newchange
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.digium.com/pipermail/asterisk-code-review/attachments/20220121/02da5521/attachment-0001.html>


More information about the asterisk-code-review mailing list