[asterisk-commits] tilghman: trunk r93804 - in /trunk: main/ utils/

SVN commits to the Asterisk project asterisk-commits at lists.digium.com
Tue Dec 18 17:06:05 CST 2007


Author: tilghman
Date: Tue Dec 18 17:06:05 2007
New Revision: 93804

URL: http://svn.digium.com/view/asterisk?view=rev&rev=93804
Log:
Add a canary process, for high priority mode (asterisk -p) to ensure that if
Asterisk goes into a busy loop, the machine will be recoverable.  We'd still
need to do a restart to put Asterisk back into high priority mode, but at
least a reboot won't be required. (Closes issue #11559)

Added:
    trunk/utils/astcanary.c   (with props)
Modified:
    trunk/main/asterisk.c
    trunk/utils/Makefile

Modified: trunk/main/asterisk.c
URL: http://svn.digium.com/view/asterisk/trunk/main/asterisk.c?view=diff&rev=93804&r1=93803&r2=93804
==============================================================================
--- trunk/main/asterisk.c (original)
+++ trunk/main/asterisk.c Tue Dec 18 17:06:05 2007
@@ -233,6 +233,8 @@
 static int shuttingdown;
 static int restartnow;
 static pthread_t consolethread = AST_PTHREADT_NULL;
+static int canary_pid = 0;
+static char canary_filename[128];
 
 static char randompool[256];
 
@@ -2623,6 +2625,35 @@
 	}
 
 	return NULL;
+}
+
+static void *canary_thread(void *unused)
+{
+	struct stat canary_stat;
+	struct timeval tv;
+
+	/* Give the canary time to sing */
+	sleep(120);
+
+	for (;;) {
+		stat(canary_filename, &canary_stat);
+		tv = ast_tvnow();
+		if (tv.tv_sec > canary_stat.st_mtime + 60) {
+			ast_log(LOG_WARNING, "Canary is dead!!! Reducing priority\n");
+			ast_set_priority(0);
+			pthread_exit(NULL);
+		}
+
+		/* Check the canary once a minute */
+		sleep(60);
+	}
+}
+
+/* Used by libc's atexit(3) function */
+static void canary_exit(void)
+{
+	if (canary_pid > 0)
+		kill(canary_pid, SIGKILL);
 }
 
 int main(int argc, char *argv[])
@@ -2808,10 +2839,49 @@
 	if ((!runuser) && !ast_strlen_zero(ast_config_AST_RUN_USER))
 		runuser = ast_config_AST_RUN_USER;
 
+	/* Must install this signal handler up here to ensure that if the canary
+	 * fails to execute that it doesn't kill the Asterisk process.
+	 */
+	signal(SIGCHLD, child_handler);
+
 #ifndef __CYGWIN__
 
-	if (isroot) 
+	if (isroot) {
 		ast_set_priority(ast_opt_high_priority);
+		if (ast_opt_high_priority) {
+			snprintf(canary_filename, sizeof(canary_filename), "%s/alt.asterisk.canary.tweet.tweet.tweet", ast_config_AST_RUN_DIR);
+
+			canary_pid = fork();
+			if (canary_pid == 0) {
+				char canary_binary[128], *lastslash;
+				int fd;
+
+				/* Reset signal handler */
+				signal(SIGCHLD, SIG_DFL);
+
+				for (fd = 0; fd < 100; fd++)
+					close(fd);
+
+				execlp("astcanary", "astcanary", canary_filename, NULL);
+
+				/* If not found, try the same path as used to execute asterisk */
+				ast_copy_string(canary_binary, argv[0], sizeof(canary_binary));
+				if ((lastslash = strrchr(canary_binary, '/'))) {
+					ast_copy_string(lastslash + 1, "astcanary", sizeof(canary_binary) + canary_binary - (lastslash + 1));
+					execl(canary_binary, "astcanary", canary_filename, NULL);
+				}
+
+				/* Should never happen */
+				_exit(1);
+			} else if (canary_pid > 0) {
+				pthread_t dont_care;
+				ast_pthread_create_detached(&dont_care, NULL, canary_thread, NULL);
+			}
+
+			/* Kill the canary when we exit */
+			atexit(canary_exit);
+		}
+	}
 
 	if (isroot && rungroup) {
 		struct group *gr;
@@ -2975,7 +3045,6 @@
 	signal(SIGINT, __quit_handler);
 	signal(SIGTERM, __quit_handler);
 	signal(SIGHUP, hup_handler);
-	signal(SIGCHLD, child_handler);
 	signal(SIGPIPE, SIG_IGN);
 
 	/* ensure that the random number generators are seeded with a different value every time

Modified: trunk/utils/Makefile
URL: http://svn.digium.com/view/asterisk/trunk/utils/Makefile?view=diff&rev=93804&r1=93803&r2=93804
==============================================================================
--- trunk/utils/Makefile (original)
+++ trunk/utils/Makefile Tue Dec 18 17:06:05 2007
@@ -17,7 +17,7 @@
 .PHONY: clean all uninstall
 
 # to get check_expr, add it to the ALL_UTILS list
-ALL_UTILS:=astman smsq stereorize streamplayer aelparse muted check_expr conf2ael hashtest2 hashtest
+ALL_UTILS:=astman smsq stereorize streamplayer aelparse muted check_expr conf2ael hashtest2 hashtest astcanary
 UTILS:=$(ALL_UTILS)
 
 LIBS += $(BKTR_LIB)	# astobj2 with devmode uses backtrace

Added: trunk/utils/astcanary.c
URL: http://svn.digium.com/view/asterisk/trunk/utils/astcanary.c?view=auto&rev=93804
==============================================================================
--- trunk/utils/astcanary.c (added)
+++ trunk/utils/astcanary.c Tue Dec 18 17:06:05 2007
@@ -1,0 +1,84 @@
+/*
+ * Asterisk -- An open source telephony toolkit.
+ *
+ * Copyright (C) 2007, Digium, Inc.
+ *
+ * Tilghman Lesher <tlesher AT digium DOT com>
+ *
+ * See http://www.asterisk.org for more information about
+ * the Asterisk project. Please do not directly contact
+ * any of the maintainers of this project for assistance;
+ * the project provides a web site, mailing lists and IRC
+ * channels for your use.
+ *
+ * This program is free software, distributed under the terms of
+ * the GNU General Public License Version 2. See the LICENSE file
+ * at the top of the source tree.
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <utime.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdlib.h>
+
+/*!\brief
+ * At one time, canaries were carried along with coal miners down
+ * into a mine.  Their purpose was to alert the miners when they
+ * had drilled into a pocket of methane gas or another noxious
+ * substance.  The canary, being the most sensitive animal would
+ * immediately fall over.  Seeing this, the miners could take
+ * action to escape the mine, seeing an imminent danger.
+ *
+ * This process serves a similar purpose, though with the realtime
+ * priority being the reason.  When a thread starts running away
+ * with the processor, it is typically difficult to tell what
+ * thread caused the problem, as the machine acts as if it is
+ * locked up (in fact, what has happened is that Asterisk runs at
+ * a higher priority than even the login shell, so the runaway
+ * thread hogs all available CPU time.
+ *
+ * If that happens, this canary process will cease to get any
+ * process time, which we can monitor with a realtime thread in
+ * Asterisk.  Should that happen, that monitoring thread may take
+ * immediate action to slow down Asterisk to regular priority,
+ * thus allowing an administrator to login to the system and
+ * restart Asterisk or perhaps take another course of action
+ * (such as retrieving a backtrace to let the developers know
+ * what precisely went wrong).
+ *
+ * Note that according to POSIX.1, all threads inside a single
+ * process must share the same priority, so when the monitoring
+ * thread deprioritizes itself, it deprioritizes all threads at
+ * the same time.  This is also why this canary must exist as a
+ * completely separate process and not simply as a thread within
+ * Asterisk itself.
+ */
+
+int main(int argc, char *argv[])
+{
+	int fd;
+	/* Run at normal priority */
+	setpriority(PRIO_PROCESS, 0, 0);
+	for (;;) {
+		/* Update the modification times (checked from Asterisk) */
+		if (utime(argv[1], NULL)) {
+			/* Recreate the file if it doesn't exist */
+			if ((fd = open(argv[1], O_RDWR | O_TRUNC | O_CREAT)) > -1)
+				close(fd);
+			else
+				exit(1);
+			continue;
+		}
+
+		/* Run occasionally */
+		sleep(5);
+	}
+
+	/* Never reached */
+	return 0;
+}
+

Propchange: trunk/utils/astcanary.c
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: trunk/utils/astcanary.c
------------------------------------------------------------------------------
    svn:keywords = Author Date Id Revision

Propchange: trunk/utils/astcanary.c
------------------------------------------------------------------------------
    svn:mime-type = text/plain




More information about the asterisk-commits mailing list