[Asterisk-Dev] better pattern matcher

Reini Urban r.urban at inode.at
Wed Jun 11 01:05:37 MST 2003


Reini Urban wrote:
> And attached is the patch to pbx.c to be applied to CVS.

Please throw away the previous patch. I swapped the args.
This patch now is correct.

Some other easy to implement extensions:
* negation as in [^0] ("any char but 0")
* explicit quantifiers as in X{2,4} ("from 2 to 4 digits"),
                           or X{2,}  ("at least 2 digits"),
* grouping as in N(1X){1,2}  ("one or two sequences of 1X")
* alternation as in (01|0|99) ("01 or 0 or 99")
* regex-style quantifiers like ?, + and * are probably not needed 
because our namespace is exhausted, '*' is a valid char and we don't 
want to repeat the emacs quirk by having to escape the magic pattern 
characters (aka "\*") and we can support it via "{}" grouping.
     ? <=> {0,1}
     + <=> {1,}
     * <=> {0,}
* which characters are free to use for pattern matching?
   I guess all besides 0-9a-zA-Z*#
   esp. needed are ( ) { } \ |
* Do we need capturing?
   Something like "_(XXXX).00X" where the first 4 digits are stored
   in a magic variable $1.

>>> well, for the beginning I added support to continue to match after a "."
>>> to be able to add extensions after the dialed number and not only 
>>> before. :)
>>>
>>> e.g "_0N.8500" matches my voicemail extension for all dialed numbers.
>>>
>>> I'll commit this short patch on Tuesday, because I have to do more 
>>> testing and the weekend is free.

-- 
Reini Urban - Entwicklung - http://inode.at
-------------- next part --------------
Index: pbx.c
===================================================================
RCS file: /usr/cvsroot/asterisk/pbx.c,v
retrieving revision 1.21
diff -u -b -b -u -r1.21 pbx.c
--- pbx.c	22 May 2003 04:38:46 -0000	1.21
+++ pbx.c	11 Jun 2003 08:03:48 -0000
@@ -148,6 +148,7 @@
     struct ast_hint *next;
 };
 
+int ast_extension_patmatch(const char *pattern, const char *data);
 
 static int pbx_builtin_prefix(struct ast_channel *, void *);
 static int pbx_builtin_suffix(struct ast_channel *, void *);
@@ -488,86 +489,100 @@
 	free(p);
 }
 
-#define EXTENSION_MATCH_CORE(data,pattern,match) {\
-	/* All patterns begin with _ */\
-	if (pattern[0] != '_') \
-		return 0;\
-	/* Start optimistic */\
-	match=1;\
-	pattern++;\
-	while(match && *data && *pattern && (*pattern != '/')) {\
-		switch(toupper(*pattern)) {\
-		case '[': \
-		{\
-			int i,border=0;\
-			char *where;\
-			match=0;\
-			pattern++;\
-			where=strchr(pattern,']');\
-			if (where)\
-				border=(int)(where-pattern);\
-			if (!where || border > strlen(pattern)) {\
-				ast_log(LOG_WARNING, "Wrong usage of [] in the extension\n");\
-				return match;\
-			}\
-			for (i=0; i<border; i++) {\
-				int res=0;\
-				if (i+2<border)\
-					if (pattern[i+1]=='-') {\
-						if (*data >= pattern[i] && *data <= pattern[i+2]) {\
-							res=1;\
-						} else {\
-							i+=2;\
-							continue;\
-						}\
-					}\
-				if (res==1 || *data==pattern[i]) {\
-					match = 1;\
-					break;\
-				}\
-			}\
-			pattern+=border;\
-			break;\
-		}\
-		case 'N':\
-			if ((*data < '2') || (*data > '9'))\
-				match=0;\
-			break;\
-		case 'X':\
-			if ((*data < '0') || (*data > '9'))\
-				match = 0;\
-			break;\
-		case 'Z':\
-			if ((*data < '1') || (*data > '9'))\
-				match = 0;\
-			break;\
-		case '.':\
-			/* Must match */\
-			return 1;\
-		case ' ':\
-		case '-':\
-			/* Ignore these characters */\
-			data--;\
-			break;\
-		default:\
-			if (*data != *pattern)\
-				match =0;\
-		}\
-		data++;\
-		pattern++;\
-	}\
+/* derived from code by Steffen Offermann 1991 (public domain)
+   http://www.cs.umu.se/~isak/Snippets/xstrcmp.c
+*/
+int ast_extension_patmatch(const char *pattern, const char *data) 
+{
+    ast_log(LOG_DEBUG, " >>> %s =~ /%s/\n", data, pattern);
+    switch (toupper(*pattern))
+	{
+	case '\0':
+	    ast_log(LOG_DEBUG, " !>>> %s => %s\n", data, !*data ? "OK" : "FAIL");
+	    return !*data;
+	    
+	case ' ':
+	case '-':
+	    /* Ignore these characters in the pattern */
+	    return *data && ast_extension_patmatch(pattern+1, data);
+
+	case '.' : /* wildcard as '*' in glob(). Match any sequence of characters */
+	    if (! *(pattern+1) ) 
+		return *data;
+	    else
+		return ast_extension_patmatch(pattern+1, data) || (*data && ast_extension_patmatch(pattern, data+1));
+
+/* wildcard character: Match any char */
+#if 0
+	case '?' :
+	    return *data && ast_extension_patmatch(pattern+1, data+1);
+#endif
+	case 'X': /* 0-9 */
+	    return ((*data >= '0') && (*data <= '9')) && ast_extension_patmatch(pattern+1, data+1);
+	    
+	case 'Z': /* 1-9 */
+	    return ((*data >= '1') && (*data <= '9')) && ast_extension_patmatch(pattern+1, data+1);
+	    
+	case 'N': /* 2-9 */
+	    return ((*data >= '2') && (*data <= '9')) && ast_extension_patmatch(pattern+1, data+1);
+	    
+	case '[': /* Character ranges: [0-9a-zA-Z]. Negation like [^0] not yet supported. */
+	    /* Begin Mark Spencer CODE */
+	    {
+		int i,border=0;
+		char *where;
+		pattern++;
+		where=strchr(pattern,']');
+		if (where)
+		    border=(int)(where-pattern);
+		if (!where || border > strlen(pattern)) {
+		    ast_log(LOG_WARNING, "Wrong [%s] pattern usage\n", pattern);
+		    return 0;
+		}
+		for (i=0; i<border; i++) {
+		    if (i+2<border) {
+			if (*data==pattern[i])
+			    return 1;
+		        else if (pattern[i+1]=='-') {
+			    if (*data >= pattern[i] && *data <= pattern[i+2]) {
+				return ast_extension_patmatch(where+1, data+1);
+			    } else {
+				i+=2;
+				continue;
+			    }
+			}
+		    }
+		}
+		pattern+=border;
+		break;
+	    }
+	    /* End Mark Spencer CODE */
+	    
+	default  :
+	    return (toupper(*pattern) == toupper(*data)) && ast_extension_patmatch(pattern+1, data+1);
+	}
+    return 0;
 }
 
 int ast_extension_match(char *pattern, char *data)
 {
 	int match;
-	/* If they're the same return */
-	if (!strcmp(pattern, data))
-		return 1;
-	EXTENSION_MATCH_CORE(data,pattern,match);
-	/* Must be at the end of both */
-	if (*data || (*pattern && (*pattern != '/')))
-		match = 0;
+	if (!*pattern) {
+	    ast_log(LOG_WARNING, "ast_extension_match: empty pattern\n");
+	    return 0;
+	}
+	if (!*data) {
+	    ast_log(LOG_WARNING, "ast_extension_match: empty data\n");
+	    return 0;
+	}
+	if (pattern[0] != '_') {
+	    match = (strcmp(pattern, data) == 0);
+	    ast_log(LOG_DEBUG, "ast_extension_match %s == /%s/ => %d\n", data, pattern, match);
+	} else {
+	    ast_log(LOG_DEBUG, "ast_extension_match %s =~ /%s/", data, pattern);
+	    match = ast_extension_patmatch(pattern+1,data);
+	    ast_log(LOG_DEBUG, " => %d\n", match);
+	}
 	return match;
 }
 
@@ -583,7 +598,9 @@
 		(!needmore || (strlen(pattern) > strlen(data)))) {
 		return 1;
 	}
-	EXTENSION_MATCH_CORE(data,pattern,match);
+	if (pattern[0] == '_') {
+	    match = ast_extension_patmatch(pattern+1,data);
+	}
 	/* If there's more or we don't care about more, return non-zero, otlherwise it's a miss */
 	if (!needmore || *pattern) {
 		return match;


More information about the asterisk-dev mailing list