[Asterisk-code-review] loader: Retry dlopen when loading fails (asterisk[master])

George Joseph asteriskteam at digium.com
Sat Jan 23 19:33:49 CST 2016


George Joseph has uploaded a new change for review.

  https://gerrit.asterisk.org/2074

Change subject: loader: Retry dlopen when loading fails
......................................................................

loader: Retry dlopen when loading fails

Although we use the RTLD_LAZY flag when calling dlopen
the first time on a module, this only defers resolution
for function calls.  Pointer references to functions are
determined at link time so dlopen expects them to be there.
Since we don't cross-module link, pointers to functions
in other modules won't be available and dlopen will fail.

If the failing module isn't a GLOBAL_SYMBOLS module, then
dlopen will be called again after all the GLOBAL_SYMBOLS
modules have been loaded and they'll eventually resolve.

If the calling module IS a GLOBAL_SYMBOLS module itself
and a third module depends on it, then there's an issue
because the second time through the dlopen loop,
GLOBAL_SYMBOLS modules aren't given any special treatment
and since the order in which dlopen is called isn't
deterministic, the dependent may again be tried before the
module it needs is loaded.

We don't currently have this situation but we will shortly.

Simple solution:  Save modules that fail load_resource
because of a dlopen error in a list and retry them
immediately after the first pass. Keep retrying until
the failed list is empty or we reach a #defined max
retries. Error messages are suppressed until the final
pass which also gets rid of those confusing error messages
about module failures that are later corrected.

Change-Id: Iddae1d97cd2f00b94e61662447432765755f64bb
---
M main/loader.c
1 file changed, 75 insertions(+), 20 deletions(-)


  git pull ssh://gerrit.asterisk.org:29418/asterisk refs/changes/74/2074/1

diff --git a/main/loader.c b/main/loader.c
index 954b288..3c971fc 100644
--- a/main/loader.c
+++ b/main/loader.c
@@ -515,9 +515,11 @@
 #endif
 }
 
-static enum ast_module_load_result load_resource(const char *resource_name, unsigned int global_symbols_only, struct ast_heap *resource_heap, int required);
+static enum ast_module_load_result load_resource(const char *resource_name, unsigned int global_symbols_only, unsigned int suppress_logging, struct ast_heap *resource_heap, int required);
 
-static struct ast_module *load_dynamic_module(const char *resource_in, unsigned int global_symbols_only, struct ast_heap *resource_heap)
+#define MODULE_LOCAL_ONLY (void *)-1
+
+static struct ast_module *load_dynamic_module(const char *resource_in, unsigned int global_symbols_only, unsigned int suppress_logging, struct ast_heap *resource_heap)
 {
 	char fn[PATH_MAX] = "";
 	void *lib = NULL;
@@ -545,8 +547,10 @@
 	if (missing_so)
 		strcat(resource_being_loaded->resource, ".so");
 
-	if (!(lib = dlopen(fn, RTLD_LAZY | RTLD_LOCAL))) {
-		ast_log(LOG_WARNING, "Error loading module '%s': %s\n", resource_in, dlerror());
+	if (!(lib = dlopen(fn, RTLD_LAZY | RTLD_GLOBAL))) {
+		if (!suppress_logging) {
+			ast_log(LOG_WARNING, "Error loading module '%s': %s\n", resource_in, dlerror());
+		}
 		ast_free(resource_being_loaded);
 		return NULL;
 	}
@@ -573,7 +577,7 @@
 	   and this one does not, then close it and return */
 	if (global_symbols_only && !wants_global) {
 		logged_dlclose(resource_in, lib);
-		return NULL;
+		return MODULE_LOCAL_ONLY;
 	}
 
 	logged_dlclose(resource_in, lib);
@@ -1053,7 +1057,7 @@
  *
  *  If the ast_heap is not provided, the module's load function will be executed
  *  immediately */
-static enum ast_module_load_result load_resource(const char *resource_name, unsigned int global_symbols_only, struct ast_heap *resource_heap, int required)
+static enum ast_module_load_result load_resource(const char *resource_name, unsigned int global_symbols_only, unsigned int suppress_logging, struct ast_heap *resource_heap, int required)
 {
 	struct ast_module *mod;
 	enum ast_module_load_result res = AST_MODULE_LOAD_SUCCESS;
@@ -1067,14 +1071,16 @@
 			return AST_MODULE_LOAD_SKIP;
 	} else {
 #ifdef LOADABLE_MODULES
-		if (!(mod = load_dynamic_module(resource_name, global_symbols_only, resource_heap))) {
-			/* don't generate a warning message during load_modules() */
+		mod = load_dynamic_module(resource_name, global_symbols_only, suppress_logging, resource_heap);
+		if (mod == MODULE_LOCAL_ONLY) {
+				return AST_MODULE_LOAD_SKIP;
+		}
+		if (!mod) {
 			if (!global_symbols_only) {
 				ast_log(LOG_WARNING, "Module '%s' could not be loaded.\n", resource_name);
 				return required ? AST_MODULE_LOAD_FAILURE : AST_MODULE_LOAD_DECLINE;
-			} else {
-				return required ? AST_MODULE_LOAD_FAILURE : AST_MODULE_LOAD_SKIP;
 			}
+			return required ? AST_MODULE_LOAD_FAILURE : AST_MODULE_LOAD_SKIP;
 		}
 #else
 		ast_log(LOG_WARNING, "Module support is not available. Module '%s' could not be loaded.\n", resource_name);
@@ -1111,7 +1117,7 @@
 {
 	int res;
 	AST_DLLIST_LOCK(&module_list);
-	res = load_resource(resource_name, 0, NULL, 0);
+	res = load_resource(resource_name, 0, 0, NULL, 0);
 	if (!res) {
 		ast_test_suite_event_notify("MODULE_LOAD", "Message: %s", resource_name);
 	}
@@ -1168,6 +1174,8 @@
 	return b_pri - a_pri;
 }
 
+AST_LIST_HEAD_NOLOCK(load_retries, load_order_entry);
+
 /*! loads modules in order by load_pri, updates mod_count
 	\return -1 on failure to load module, -2 on failure to load required module, otherwise 0
 */
@@ -1176,8 +1184,13 @@
 	struct ast_heap *resource_heap;
 	struct load_order_entry *order;
 	struct ast_module *mod;
+	struct load_retries load_retries;
 	int count = 0;
 	int res = 0;
+	int i = 0;
+#define LOAD_RETRIES 3
+
+	AST_LIST_HEAD_INIT_NOLOCK(&load_retries);
 
 	if(!(resource_heap = ast_heap_create(8, mod_load_cmp, -1))) {
 		return -1;
@@ -1185,21 +1198,27 @@
 
 	/* first, add find and add modules to heap */
 	AST_LIST_TRAVERSE_SAFE_BEGIN(load_order, order, entry) {
-		switch (load_resource(order->resource, global_symbols, resource_heap, order->required)) {
+		switch (load_resource(order->resource, global_symbols, 1, resource_heap, order->required)) {
 		case AST_MODULE_LOAD_SUCCESS:
-		case AST_MODULE_LOAD_DECLINE:
-			AST_LIST_REMOVE_CURRENT(entry);
-			ast_free(order->resource);
-			ast_free(order);
+			/* We're supplying a heap so SUCCESS isn't possible but we still have to test for it. */
 			break;
 		case AST_MODULE_LOAD_FAILURE:
-			ast_log(LOG_ERROR, "*** Failed to load module %s - %s\n", order->resource, order->required ? "Required" : "Not required");
-			fprintf(stderr, "*** Failed to load module %s - %s\n", order->resource, order->required ? "Required" : "Not required");
-			res = order->required ? -2 : -1;
-			goto done;
+		case AST_MODULE_LOAD_DECLINE:
+			/*
+			 * DECLINE or FAILURE means there was an issue with dlopen or module_register which might be retryable.
+			 * LOAD_FAILURE only happens for required modules but we're still going to retry.
+			 */
+			AST_LIST_REMOVE_CURRENT(entry);
+			AST_LIST_INSERT_TAIL(&load_retries, order, entry);
+			break;
 		case AST_MODULE_LOAD_SKIP:
+			/*
+			 * SKIP means that dlopen worked but global_symbols was set and this module doesn't qualify.
+			 * Leave it in load_order for the next call of load_resource_list.
+			 */
 			break;
 		case AST_MODULE_LOAD_PRIORITY:
+			/* load_resource worked and the module was added to the priority heap */
 			AST_LIST_REMOVE_CURRENT(entry);
 			ast_free(order->resource);
 			ast_free(order);
@@ -1208,6 +1227,42 @@
 	}
 	AST_LIST_TRAVERSE_SAFE_END;
 
+	/* Retry the failures until the list is empty or we reach LOAD_RETRIES */
+	for (i = 0; !AST_LIST_EMPTY(&load_retries) && i < LOAD_RETRIES; i++) {
+		AST_LIST_TRAVERSE_SAFE_BEGIN(&load_retries, order, entry) {
+			/* Suppress l;og messages unless this is the last pass */
+			switch (load_resource(order->resource, global_symbols, (i < LOAD_RETRIES - 1), resource_heap, order->required)) {
+			/* These are all retryable. */
+			case AST_MODULE_LOAD_SUCCESS:
+			case AST_MODULE_LOAD_DECLINE:
+			case AST_MODULE_LOAD_SKIP:
+				break;
+			case AST_MODULE_LOAD_FAILURE:
+				/* LOAD_FAILURE only happens for required modules */
+				if (i < LOAD_RETRIES - 1) {
+					/* This was the last chance to load a required module*/
+					ast_log(LOG_ERROR, "*** Failed to load module %s - Required\n", order->resource);
+					fprintf(stderr, "*** Failed to load module %s - Required\n", order->resource);
+					res =  -2;
+					goto done;
+				}
+				break;;
+			case AST_MODULE_LOAD_PRIORITY:
+				/* load_resource worked and the module was added to the priority heap */
+				AST_LIST_REMOVE_CURRENT(entry);
+				ast_free(order->resource);
+				ast_free(order);
+				break;
+			}
+		}
+		AST_LIST_TRAVERSE_SAFE_END;
+	}
+
+	while ((order = AST_LIST_REMOVE_HEAD(&load_retries, entry))) {
+		ast_free(order->resource);
+		ast_free(order);
+	}
+
 	/* second remove modules from heap sorted by priority */
 	while ((mod = ast_heap_pop(resource_heap))) {
 		switch (start_resource(mod)) {

-- 
To view, visit https://gerrit.asterisk.org/2074
To unsubscribe, visit https://gerrit.asterisk.org/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Iddae1d97cd2f00b94e61662447432765755f64bb
Gerrit-PatchSet: 1
Gerrit-Project: asterisk
Gerrit-Branch: master
Gerrit-Owner: George Joseph <george.joseph at fairview5.com>



More information about the asterisk-code-review mailing list