[Asterisk-Dev] better pattern matcher
Reini Urban
r.urban at inode.at
Wed Jun 11 01:05:37 MST 2003
Reini Urban wrote:
> And attached is the patch to pbx.c to be applied to CVS.
Please throw away the previous patch. I swapped the args.
This patch now is correct.
Some other easy to implement extensions:
* negation as in [^0] ("any char but 0")
* explicit quantifiers as in X{2,4} ("from 2 to 4 digits"),
or X{2,} ("at least 2 digits"),
* grouping as in N(1X){1,2} ("one or two sequences of 1X")
* alternation as in (01|0|99) ("01 or 0 or 99")
* regex-style quantifiers like ?, + and * are probably not needed
because our namespace is exhausted, '*' is a valid char and we don't
want to repeat the emacs quirk by having to escape the magic pattern
characters (aka "\*") and we can support it via "{}" grouping.
? <=> {0,1}
+ <=> {1,}
* <=> {0,}
* which characters are free to use for pattern matching?
I guess all besides 0-9a-zA-Z*#
esp. needed are ( ) { } \ |
* Do we need capturing?
Something like "_(XXXX).00X" where the first 4 digits are stored
in a magic variable $1.
>>> well, for the beginning I added support to continue to match after a "."
>>> to be able to add extensions after the dialed number and not only
>>> before. :)
>>>
>>> e.g "_0N.8500" matches my voicemail extension for all dialed numbers.
>>>
>>> I'll commit this short patch on Tuesday, because I have to do more
>>> testing and the weekend is free.
--
Reini Urban - Entwicklung - http://inode.at
-------------- next part --------------
Index: pbx.c
===================================================================
RCS file: /usr/cvsroot/asterisk/pbx.c,v
retrieving revision 1.21
diff -u -b -b -u -r1.21 pbx.c
--- pbx.c 22 May 2003 04:38:46 -0000 1.21
+++ pbx.c 11 Jun 2003 08:03:48 -0000
@@ -148,6 +148,7 @@
struct ast_hint *next;
};
+int ast_extension_patmatch(const char *pattern, const char *data);
static int pbx_builtin_prefix(struct ast_channel *, void *);
static int pbx_builtin_suffix(struct ast_channel *, void *);
@@ -488,86 +489,100 @@
free(p);
}
-#define EXTENSION_MATCH_CORE(data,pattern,match) {\
- /* All patterns begin with _ */\
- if (pattern[0] != '_') \
- return 0;\
- /* Start optimistic */\
- match=1;\
- pattern++;\
- while(match && *data && *pattern && (*pattern != '/')) {\
- switch(toupper(*pattern)) {\
- case '[': \
- {\
- int i,border=0;\
- char *where;\
- match=0;\
- pattern++;\
- where=strchr(pattern,']');\
- if (where)\
- border=(int)(where-pattern);\
- if (!where || border > strlen(pattern)) {\
- ast_log(LOG_WARNING, "Wrong usage of [] in the extension\n");\
- return match;\
- }\
- for (i=0; i<border; i++) {\
- int res=0;\
- if (i+2<border)\
- if (pattern[i+1]=='-') {\
- if (*data >= pattern[i] && *data <= pattern[i+2]) {\
- res=1;\
- } else {\
- i+=2;\
- continue;\
- }\
- }\
- if (res==1 || *data==pattern[i]) {\
- match = 1;\
- break;\
- }\
- }\
- pattern+=border;\
- break;\
- }\
- case 'N':\
- if ((*data < '2') || (*data > '9'))\
- match=0;\
- break;\
- case 'X':\
- if ((*data < '0') || (*data > '9'))\
- match = 0;\
- break;\
- case 'Z':\
- if ((*data < '1') || (*data > '9'))\
- match = 0;\
- break;\
- case '.':\
- /* Must match */\
- return 1;\
- case ' ':\
- case '-':\
- /* Ignore these characters */\
- data--;\
- break;\
- default:\
- if (*data != *pattern)\
- match =0;\
- }\
- data++;\
- pattern++;\
- }\
+/* derived from code by Steffen Offermann 1991 (public domain)
+ http://www.cs.umu.se/~isak/Snippets/xstrcmp.c
+*/
+int ast_extension_patmatch(const char *pattern, const char *data)
+{
+ ast_log(LOG_DEBUG, " >>> %s =~ /%s/\n", data, pattern);
+ switch (toupper(*pattern))
+ {
+ case '\0':
+ ast_log(LOG_DEBUG, " !>>> %s => %s\n", data, !*data ? "OK" : "FAIL");
+ return !*data;
+
+ case ' ':
+ case '-':
+ /* Ignore these characters in the pattern */
+ return *data && ast_extension_patmatch(pattern+1, data);
+
+ case '.' : /* wildcard as '*' in glob(). Match any sequence of characters */
+ if (! *(pattern+1) )
+ return *data;
+ else
+ return ast_extension_patmatch(pattern+1, data) || (*data && ast_extension_patmatch(pattern, data+1));
+
+/* wildcard character: Match any char */
+#if 0
+ case '?' :
+ return *data && ast_extension_patmatch(pattern+1, data+1);
+#endif
+ case 'X': /* 0-9 */
+ return ((*data >= '0') && (*data <= '9')) && ast_extension_patmatch(pattern+1, data+1);
+
+ case 'Z': /* 1-9 */
+ return ((*data >= '1') && (*data <= '9')) && ast_extension_patmatch(pattern+1, data+1);
+
+ case 'N': /* 2-9 */
+ return ((*data >= '2') && (*data <= '9')) && ast_extension_patmatch(pattern+1, data+1);
+
+ case '[': /* Character ranges: [0-9a-zA-Z]. Negation like [^0] not yet supported. */
+ /* Begin Mark Spencer CODE */
+ {
+ int i,border=0;
+ char *where;
+ pattern++;
+ where=strchr(pattern,']');
+ if (where)
+ border=(int)(where-pattern);
+ if (!where || border > strlen(pattern)) {
+ ast_log(LOG_WARNING, "Wrong [%s] pattern usage\n", pattern);
+ return 0;
+ }
+ for (i=0; i<border; i++) {
+ if (i+2<border) {
+ if (*data==pattern[i])
+ return 1;
+ else if (pattern[i+1]=='-') {
+ if (*data >= pattern[i] && *data <= pattern[i+2]) {
+ return ast_extension_patmatch(where+1, data+1);
+ } else {
+ i+=2;
+ continue;
+ }
+ }
+ }
+ }
+ pattern+=border;
+ break;
+ }
+ /* End Mark Spencer CODE */
+
+ default :
+ return (toupper(*pattern) == toupper(*data)) && ast_extension_patmatch(pattern+1, data+1);
+ }
+ return 0;
}
int ast_extension_match(char *pattern, char *data)
{
int match;
- /* If they're the same return */
- if (!strcmp(pattern, data))
- return 1;
- EXTENSION_MATCH_CORE(data,pattern,match);
- /* Must be at the end of both */
- if (*data || (*pattern && (*pattern != '/')))
- match = 0;
+ if (!*pattern) {
+ ast_log(LOG_WARNING, "ast_extension_match: empty pattern\n");
+ return 0;
+ }
+ if (!*data) {
+ ast_log(LOG_WARNING, "ast_extension_match: empty data\n");
+ return 0;
+ }
+ if (pattern[0] != '_') {
+ match = (strcmp(pattern, data) == 0);
+ ast_log(LOG_DEBUG, "ast_extension_match %s == /%s/ => %d\n", data, pattern, match);
+ } else {
+ ast_log(LOG_DEBUG, "ast_extension_match %s =~ /%s/", data, pattern);
+ match = ast_extension_patmatch(pattern+1,data);
+ ast_log(LOG_DEBUG, " => %d\n", match);
+ }
return match;
}
@@ -583,7 +598,9 @@
(!needmore || (strlen(pattern) > strlen(data)))) {
return 1;
}
- EXTENSION_MATCH_CORE(data,pattern,match);
+ if (pattern[0] == '_') {
+ match = ast_extension_patmatch(pattern+1,data);
+ }
/* If there's more or we don't care about more, return non-zero, otlherwise it's a miss */
if (!needmore || *pattern) {
return match;
More information about the asterisk-dev
mailing list