[asterisk-commits] murf: branch murf/utf8-whatif r90037 - in /team/murf/utf8-whatif: include/ast...

Wed Nov 28 14:03:42 CST 2007

Author: murf
Date: Wed Nov 28 14:03:42 2007
New Revision: 90037

URL: http://svn.digium.com/view/asterisk?view=rev&rev=90037
Log:
Almost done upgrading to utf8 in patterns

Modified:
    team/murf/utf8-whatif/include/asterisk/unicode.h
    team/murf/utf8-whatif/main/pbx.c
    team/murf/utf8-whatif/main/unicode.c

Modified: team/murf/utf8-whatif/include/asterisk/unicode.h
URL: http://svn.digium.com/view/asterisk/team/murf/utf8-whatif/include/asterisk/unicode.h?view=diff&rev=90037&r1=90036&r2=90037
==============================================================================

--- team/murf/utf8-whatif/include/asterisk/unicode.h (original)
+++ team/murf/utf8-whatif/include/asterisk/unicode.h Wed Nov 28 14:03:42 2007
@@ -37,9 +37,11 @@
 
 /* convert a string of 8859-1 chars into a string of ucs4 chars --
    mainly just by turning it from 8 bits to 32 bits/char.
+   if a char is found above 0xff in the ucs4 set, error is set, and
+   that char translates as '?'
  */
 
-ucs4_t *ast_8859_1_to_ucs4(unsigned char *in, ucs4_t *ucs, int outlen, unsigned char **next);
+ucs4_t *ast_8859_1_to_ucs4(unsigned char *in, ucs4_t *ucs, int outlen, unsigned char **next, int *error);
 
 
 /* convert a string of ucs4 chars into a string of 8859-1 chars --
@@ -48,4 +50,9 @@
 
 unsigned char *ast_ucs4_to_8859_1(ucs4_t *ucs, unsigned char *out, int outlen, ucs4_t **next);
 
+/* returns -1, 0, or 1 if a is less than, equal to, or greater than b, respectively */
+int ucs4_strcmp(ucs4_t *ustra, ucs4_t *ustrb);
 
+/* like strdup, but for ucs4_t strings */
+ucs4_t *ucs4_strdup(ucs4_t *ustr);
+

Modified: team/murf/utf8-whatif/main/pbx.c
URL: http://svn.digium.com/view/asterisk/team/murf/utf8-whatif/main/pbx.c?view=diff&rev=90037&r1=90036&r2=90037
==============================================================================
--- team/murf/utf8-whatif/main/pbx.c (original)
+++ team/murf/utf8-whatif/main/pbx.c Wed Nov 28 14:03:42 2007
@@ -56,6 +56,7 @@
 #include "asterisk/utils.h"
 #include "asterisk/causes.h"
 #include "asterisk/musiconhold.h"
+#include "asterisk/unicode.h"
 #include "asterisk/app.h"
 #include "asterisk/devicestate.h"
 #include "asterisk/stringfields.h"
@@ -176,12 +177,22 @@
 	const char pattern[0];
 };
 
+/*! \brief match_char_range: represent a range of characters in the char classes */
+struct match_char_range
+{
+	ucs4_t start;
+	ucs4_t end;
+	struct match_char_range *next; /* simple linked list */
+};
+
+	
 /*! \brief match_char: forms a syntax tree for quick matching of extension patterns */
 struct match_char
 {
 	int is_pattern; /* the pattern started with '_' */
 	int deleted;    /* if this is set, then... don't return it */
-	char *x;       /* the pattern itself-- matches a single char */
+	ucs4_t *x;       /* the pattern itself, a string of characters-- any one of which could match a single char; in UCS4 format */
+	struct match_char_range *mcr; /* a link list of char ranges in the char class; check these also, as they are part of x */
 	int specificity; /* simply the strlen of x, or 10 for X, 9 for Z, and 8 for N; and '.' and '!' will add 11 ? */
 	struct match_char *alt_char;
 	struct match_char *next_char;
@@ -321,9 +332,9 @@
 static int pbx_builtin_importvar(struct ast_channel *, void *);
 static void set_ext_pri(struct ast_channel *c, const char *exten, int pri); 
 static void new_find_extension(const char *str, struct scoreboard *score, struct match_char *tree, int length, int spec, const char *callerid);
-static struct match_char *already_in_tree(struct match_char *current, char *pat);
+static struct match_char *already_in_tree(struct match_char *current, ucs4_t *pat, struct match_char_range *mcr_list);
 static struct match_char *add_exten_to_pattern_tree(struct ast_context *con, struct ast_exten *e1, int findonly);
-static struct match_char *add_pattern_node(struct ast_context *con, struct match_char *current, char *pattern, int is_pattern, int already, int specificity);
+static struct match_char *add_pattern_node(struct ast_context *con, struct match_char *current, struct match_char_range *mcr_list, ucs4_t *pattern, int is_pattern, int already, int specificity);
 static void create_match_char_tree(struct ast_context *con);
 static struct ast_exten *get_canmatch_exten(struct match_char *node);
 static void destroy_pattern_tree(struct match_char *pattern_tree);
@@ -853,19 +864,76 @@
 	}
 }
 
+static char *form_match_string_rep(struct match_char *node, char *buf, int buflen)
+{
+	/* first, copy in the node->x string, converting to utf8; then,
+	   the char ranges with the same process */
+	ucs4_t *z;
+	struct match_char_range *w;
+	ast_ucs4_to_utf8(node->x, (unsigned char *)buf, buflen, &z);
+	
+	if (*z) { 
+		if (strlen(buf) +3 < buflen) {
+			strcat(buf,"...");
+			buflen -= 3;
+			return buf;
+		} else
+			return buf;
+	}
+	if (node->mcr)
+	{
+		char max_range_buffer[1024], *mcrp;
+		ucs4_t ucs4buf[2];
+		ucs4buf[1] = 0;
+		max_range_buffer[0] = '[';
+		max_range_buffer[1] = 0;
+		mcrp = max_range_buffer+1;
+		
+		for(w=node->mcr;w;w=w->next)
+		{
+			ucs4buf[0] = w->start;
+			ast_ucs4_to_utf8(ucs4buf, (unsigned char *)mcrp, 1022-(mcrp-max_range_buffer), 0);
+			while (*mcrp)
+				mcrp++;
+			*mcrp++ = '-';
+			ucs4buf[0] = w->end;
+			ast_ucs4_to_utf8(ucs4buf, (unsigned char *)mcrp, 1022-(mcrp-max_range_buffer), 0);
+			while (*mcrp)
+				mcrp++;
+			*mcrp = 0;
+		}
+		strcat(max_range_buffer,"]");
+
+		if (strlen(max_range_buffer) < buflen)
+		{
+			strcat(buf, max_range_buffer);
+			buflen -= mcrp-max_range_buffer;
+		} else {
+			if (buflen > 5) {
+				strcat(buf,"[...]");
+				return buf;
+			} else
+				return buf;
+		}
+	}
+	return buf;
+}
+
+
 void log_match_char_tree(struct match_char *node, char *prefix)
 {
 	char my_prefix[1024];
 	char extenstr[40];
+	char matchstr[25];
 	
+	matchstr[0] = 0;
 	extenstr[0] = 0;
+	
+	form_match_string_rep(node, matchstr, sizeof(matchstr));
+	
 	if (node && node->exten && node->exten)
 		sprintf(extenstr,"(%p)",node->exten);
-	
-	if (strlen(node->x) > 1 )
-		ast_log(LOG_DEBUG,"%s[%s]:%c:%c:%d:%s%s%s\n", prefix, node->x, node->is_pattern ? 'Y':'N', node->deleted? 'D':'-', node->specificity, node->exten? "EXTEN:":"", node->exten ? node->exten->exten : "", extenstr);
-	else
-		ast_log(LOG_DEBUG,"%s%s:%c:%c:%d:%s%s%s\n", prefix, node->x, node->is_pattern ? 'Y':'N', node->deleted? 'D':'-', node->specificity, node->exten? "EXTEN:":"", node->exten ? node->exten->exten : "", extenstr);
+	ast_log(LOG_DEBUG,"%s{%s}:%c:%c:%d:%s%s%s\n", prefix, matchstr, node->is_pattern ? 'Y':'N', node->deleted? 'D':'-', node->specificity, node->exten? "EXTEN:":"", node->exten ? node->exten->exten : "", extenstr);
 	strcpy(my_prefix,prefix);
 	strcat(my_prefix,"+       ");
 	if (node->next_char)
@@ -878,15 +946,17 @@
 {
 	char my_prefix[1024];
 	char extenstr[40];
+	char matchstr[25];
 	
+	matchstr[0] = 0;
 	extenstr[0] = 0;
+
+	form_match_string_rep(node, matchstr, sizeof(matchstr));
+
 	if (node && node->exten && node->exten)
 		sprintf(extenstr,"(%p)",node->exten);
 	
-	if (strlen(node->x) > 1)
-		ast_cli(fd, "%s[%s]:%c:%c:%d:%s%s%s\n", prefix, node->x, node->is_pattern ? 'Y':'N', node->deleted ? 'D' : '-', node->specificity, node->exten? "EXTEN:":"", node->exten ? node->exten->exten : "", extenstr);
-	else
-		ast_cli(fd, "%s%s:%c:%c:%d:%s%s%s\n", prefix, node->x, node->is_pattern ? 'Y':'N', node->deleted ? 'D' : '-', node->specificity, node->exten? "EXTEN:":"", node->exten ? node->exten->exten : "", extenstr);
+	ast_cli(fd, "%s{%s}:%c:%c:%d:%s%s%s\n", prefix, matchstr, node->is_pattern ? 'Y':'N', node->deleted ? 'D' : '-', node->specificity, node->exten? "EXTEN:":"", node->exten ? node->exten->exten : "", extenstr);
 	strcpy(my_prefix,prefix);
 	strcat(my_prefix,"+       ");
 	if (node->next_char)
@@ -936,9 +1006,28 @@
 	return NULL;
 }
 
+static int matches_char_class(struct match_char *node, ucs4_t theChar)
+{
+	struct match_char_range *w;
+	if (ucs4_strchr(node->x, theChar))
+		return 1;
+	for(w=node->mcr; w; w=w->next)
+	{
+		if (theChar >= w->start && theChar <= w->end)
+			return 1;
+	}
+	return 0;
+}
+
+
 static void new_find_extension(const char *str, struct scoreboard *score, struct match_char *tree, int length, int spec, const char *callerid)
 {
 	struct match_char *p; /* note minimal stack storage requirements */
+	char *next, *next2;
+	int utf8_err = 0;
+	ucs4_t curr_char = ast_utf8_to_ucs4((unsigned char *)str, (unsigned char **)&next, &utf8_err);
+	ucs4_t next_char = ast_utf8_to_ucs4((unsigned char *)next, (unsigned char **)&next2, &utf8_err);
+	
 #ifdef DEBUG_THIS
 	if (tree)
 		ast_log(LOG_NOTICE,"new_find_extension called with %s on (sub)tree %s\n", str, tree->x);
@@ -946,72 +1035,72 @@
 		ast_log(LOG_NOTICE,"new_find_extension called with %s on (sub)tree NULL\n", str);
 #endif
 	for (p=tree; p; p=p->alt_char) {
-		if (p->x[0] == 'N' && p->x[1] == 0 && *str >= '2' && *str <= '9' ) {
-			if (p->exten && !(*(str+1))) /* if a shorter pattern matches along the way, might as well report it */
+		if (p->is_pattern && p->x[0] == 'N' && p->x[1] == 0 && curr_char >= '2' && curr_char <= '9' ) {
+			if (p->exten && !(next_char))
 				update_scoreboard(score, length+1, spec+p->specificity, p->exten,0,callerid, p->deleted, p);
 
-			if (p->next_char && ( *(str+1) || (p->next_char->x[0] == '/' && p->next_char->x[1] == 0))) {
-				if (*(str+1))
-					new_find_extension(str+1, score, p->next_char, length+1, spec+p->specificity, callerid);
+			if (p->next_char && ( next_char || (p->next_char->x[0] == '/' && p->next_char->x[1] == 0))) {
+				if (next_char)
+					new_find_extension(next, score, p->next_char, length+1, spec+p->specificity, callerid);
 				else
 					new_find_extension("/", score, p->next_char, length+1, spec+p->specificity, callerid);
-			} else if (p->next_char && !*(str+1)) {
+			} else if (p->next_char && !next_char) {
 				score->canmatch = 1;
 				score->canmatch_exten = get_canmatch_exten(p);
 			} else {
 				return;
 			}
-		} else if (p->x[0] == 'Z' && p->x[1] == 0 && *str >= '1' && *str <= '9' ) {
-			if (p->exten && !(*(str+1))) /* if a shorter pattern matches along the way, might as well report it */
+		} else if (p->is_pattern && p->x[0] == 'Z' && p->x[1] == 0 && curr_char >= '1' && curr_char <= '9' ) {
+			if (p->exten && !(next_char))
 				update_scoreboard(score, length+1, spec+p->specificity, p->exten,0,callerid, p->deleted,p);
 
-			if (p->next_char && ( *(str+1) || (p->next_char->x[0] == '/' && p->next_char->x[1] == 0))) {
-				if (*(str+1))
-					new_find_extension(str+1, score, p->next_char, length+1, spec+p->specificity, callerid);
+			if (p->next_char && ( next_char || (p->next_char->x[0] == '/' && p->next_char->x[1] == 0))) {
+				if (next_char)
+					new_find_extension(next, score, p->next_char, length+1, spec+p->specificity, callerid);
 				else
 					new_find_extension("/", score, p->next_char, length+1, spec+p->specificity, callerid);
-			} else if (p->next_char && !*(str+1)) {
+			} else if (p->next_char && !next_char) {
 				score->canmatch = 1;
 				score->canmatch_exten = get_canmatch_exten(p);
 			} else {
 				return;
 			}
-		} else if (p->x[0] == 'X' && p->x[1] == 0 && *str >= '0' && *str <= '9' ) {
-			if (p->exten && !(*(str+1))) /* if a shorter pattern matches along the way, might as well report it */
+		} else if (p->is_pattern && p->x[0] == 'X' && p->x[1] == 0 && curr_char >= '0' && curr_char <= '9' ) {
+			if (p->exten && !(next_char))
 				update_scoreboard(score, length+1, spec+p->specificity, p->exten,0,callerid, p->deleted,p);
 
-			if (p->next_char && ( *(str+1) || (p->next_char->x[0] == '/' && p->next_char->x[1] == 0))) {
-				if (*(str+1))
-					new_find_extension(str+1, score, p->next_char, length+1, spec+p->specificity, callerid);
+			if (p->next_char && ( next_char || (p->next_char->x[0] == '/' && p->next_char->x[1] == 0))) {
+				if (next_char)
+					new_find_extension(next, score, p->next_char, length+1, spec+p->specificity, callerid);
 				else
 					new_find_extension("/", score, p->next_char, length+1, spec+p->specificity, callerid);
-			} else if (p->next_char && !*(str+1)) {
+			} else if (p->next_char && !next_char) {
 				score->canmatch = 1;
 				score->canmatch_exten = get_canmatch_exten(p);
 			} else {
 				return;
 			}
-		} else if (p->x[0] == '.' && p->x[1] == 0) {
+		} else if (p->is_pattern && p->x[0] == '.' && p->x[1] == 0) {
 			/* how many chars will the . match against? */
 			int i = 0;
 			const char *str2 = str;
 			while (*str2++) {
 				i++;
 			}
-			if (p->exten && !(*(str+1)))
+			if (p->exten && !(next_char))
 				update_scoreboard(score, length+i, spec+(i*p->specificity), p->exten, '.', callerid, p->deleted, p);
 			if (p->next_char && p->next_char->x[0] == '/' && p->next_char->x[1] == 0) {
 				new_find_extension("/", score, p->next_char, length+i, spec+(p->specificity*i), callerid);
 			}
 			return;
-		} else if (p->x[0] == '!' && p->x[1] == 0) {
+		} else if (p->is_pattern && p->x[0] == '!' && p->x[1] == 0) {
 			/* how many chars will the . match against? */
 			int i = 0;
 			const char *str2 = str;
 			while (*str2++) {
 				i++;
 			}
-			if (p->exten && !(*(str+1)))
+			if (p->exten && !(next_char))
 				update_scoreboard(score, length+1, spec+(p->specificity*i), p->exten, '!', callerid, p->deleted, p);
 			if (p->next_char && p->next_char->x[0] == '/' && p->next_char->x[1] == 0) {
 				new_find_extension("/", score, p->next_char, length+i, spec+(p->specificity*i), callerid);
@@ -1022,18 +1111,35 @@
 			if (p->next_char && callerid && *callerid) {
 				new_find_extension(callerid, score, p->next_char, length+1, spec, callerid);
 			}
-		} else if (index(p->x, *str)) {
-			if (p->exten && !(*(str+1))) /* if a shorter pattern matches along the way, might as well report it */
+		} else if (p->is_pattern && matches_char_class(p, curr_char)) {
+			if (p->exten && !(next_char))
 				update_scoreboard(score, length+1, spec+p->specificity, p->exten,0,callerid, p->deleted, p);
 
 
-			if (p->next_char && ( *(str+1) || (p->next_char->x[0] == '/' && p->next_char->x[1] == 0))) {
-				if (*(str+1)) {
-					new_find_extension(str+1, score, p->next_char, length+1, spec+p->specificity, callerid);
+			if (p->next_char && ( next_char || (p->next_char->x[0] == '/' && p->next_char->x[1] == 0))) {
+				if (next_char) {
+					new_find_extension(next, score, p->next_char, length+1, spec+p->specificity, callerid);
 				} else {
 					new_find_extension("/", score, p->next_char, length+1, spec+p->specificity, callerid);
 				}
-			} else if (p->next_char && !*(str+1)) {
+			} else if (p->next_char && !next_char) {
+				score->canmatch = 1;
+				score->canmatch_exten = get_canmatch_exten(p);
+			} else {
+				return;
+			}
+		} else if (!p->is_pattern && curr_char == p->x[0]) {
+			if (p->exten && !(next_char))
+				update_scoreboard(score, length+1, spec+p->specificity, p->exten,0,callerid, p->deleted, p);
+
+
+			if (p->next_char && ( next_char || (p->next_char->x[0] == '/' && p->next_char->x[1] == 0))) {
+				if (next_char) {
+					new_find_extension(next, score, p->next_char, length+1, spec+p->specificity, callerid);
+				} else {
+					new_find_extension("/", score, p->next_char, length+1, spec+p->specificity, callerid);
+				}
+			} else if (p->next_char && !next_char) {
 				score->canmatch = 1;
 				score->canmatch_exten = get_canmatch_exten(p);
 			} else {
@@ -1057,22 +1163,69 @@
  * I guess forming this pattern tree would be analogous to compiling a regex.
  */
 
-static struct match_char *already_in_tree(struct match_char *current, char *pat)
+static void destroy_range_list(struct match_char_range *list)
+{
+	struct match_char_range *lnext;
+	
+	while (list) {
+		lnext = list->next;
+		list->next = 0;
+		free(list);
+		list = lnext;
+	}
+}
+
+static struct match_char *already_in_tree(struct match_char *current, ucs4_t *pat, struct match_char_range *mcr_list)
 {
 	struct match_char *t;
+	struct match_char_range *mr,*dr;
 	if (!current)
 		return 0;
 	for (t=current; t; t=t->alt_char) {
-		if (strcmp(pat,t->x) == 0) /* uh, we may want to sort exploded [] contents to make matching easy */
-			return t;
+		if (ucs4_strcmp(pat,t->x) == 0) { /* uh, we may want to sort exploded [] contents to make matching more reliable. Sort of a cononical representation */
+			/* It's not a match till we verify that the ranges are also equal */
+			if (!t->mcr && !mcr_list)
+				return t;
+			if ((t->mcr && !mcr_list) || (!t->mcr && mcr_list))
+				return 0;
+			
+			for (mr = t->mcr; mr; mr=mr->next) {
+				int found = 0;
+				for(dr=mcr_list;dr;dr=dr->next)
+				{
+					if (mr->start == dr->start && mr->end == dr->end) {
+						found = 1;
+						break;
+					}
+				}
+				if (!found)
+					return 0;
+			}
+			/* if we get here, each of the ranges in this node corresponds to one in the proposed node */
+			for (mr = mcr_list; mr; mr=mr->next) {
+				int found = 0;
+				for(dr = t->mcr; dr; dr=dr->next)
+				{
+					if (mr->start == dr->start && mr->end == dr->end) {
+						found = 1;
+						break;
+					}
+				}
+				if (!found)
+					return 0;
+			}
+			/* if we get here, each of the ranges in the proposed node corresponds to one in the this node */
+			return t; /* the char set matches, and so do the ranges. Jackpot */
+		}
 	}
 	return 0;
 }
 
-static struct match_char *add_pattern_node(struct ast_context *con, struct match_char *current, char *pattern, int is_pattern, int already, int specificity)
+static struct match_char *add_pattern_node(struct ast_context *con, struct match_char *current, struct match_char_range *mcr_list, ucs4_t *pattern, int is_pattern, int already, int specificity)
 {
 	struct match_char *m = ast_calloc(1,sizeof(struct match_char));
-	m->x = ast_strdup(pattern);
+	m->x = ucs4_strdup(pattern);
+	m->mcr = mcr_list;
 	m->is_pattern = is_pattern;
 	if (specificity == 1 && is_pattern && pattern[0] == 'N')
 		m->specificity = 98;
@@ -1111,11 +1264,16 @@
 	int specif;
 	int already;
 	int pattern = 0;
-	char buf[256];
 	char extenbuf[512];
+	ucs4_t buf[256];
+	ucs4_t extenbufu[512];
+	ucs4_t curr_char;
 	char *s1 = extenbuf;
+	char *s2;
+	char *next_char;
 	int l1 = strlen(e1->exten) + strlen(e1->cidmatch) + 2;
-	
+	int error = 0;
+	struct match_char_range *mcr_list = 0;
 
 	strncpy(extenbuf,e1->exten,sizeof(extenbuf));
 	if (e1->matchcid &&  l1 <= sizeof(extenbuf)) {
@@ -1130,90 +1288,107 @@
 #endif
 	m1 = con->pattern_tree; /* each pattern starts over at the root of the pattern tree */
 	already = 1;
-
-	if ( *s1 == '_') {
+	curr_char = ast_utf8_to_ucs4((unsigned char *)s1, (unsigned char **)&next_char, &error);
+	
+	if ( curr_char == '_') {
 		pattern = 1;
-		s1++;
+		s1 = next_char;
 	}
 	while( *s1 ) {
-		if (pattern && *s1 == '[' && *(s1-1) != '\\') {
-			char *s2 = buf;
+		curr_char = ast_utf8_to_ucs4((unsigned char *)s1, (unsigned char **)&next_char, &error);
+		if (pattern && curr_char == '[' && *(s1-1) != '\\') {
+			ucs4_t *s2 = buf;
+			char *s3u, *s4u;
+			ucs4_t this_char;
+			ucs4_t that_char;
+			
 			buf[0] = 0;
 			s1++; /* get past the '[' */
 			while (*s1 != ']' && *(s1-1) != '\\' ) {
-				if (*s1 == '\\') {
-					if (*(s1+1) == ']') {
+				this_char = ast_utf8_to_ucs4((unsigned char *)s1, (unsigned char **)&s3u, &error);
+				that_char = ast_utf8_to_ucs4((unsigned char *)s3, (unsigned char **)&s4u, &error);
+				if (this_char == '\\') {
+					if (that_char == ']') {
 						*s2++ = ']';
-						s1++;s1++;
-					} else if (*(s1+1) == '\\') {
+						s1 = s4u;
+					} else if (that_char == '\\') {
 						*s2++ = '\\';
-						s1++;s1++;
-					} else if (*(s1+1) == '-') {
+						s1 = s4u;
+					} else if (that_char == '-') {
 						*s2++ = '-';
-						s1++; s1++;
-					} else if (*(s1+1) == '[') {
+						s1 = s4u;
+					} else if (that_char == '[') {
 						*s2++ = '[';
-						s1++; s1++;
+						s1 = s4u;
 					}
-				} else if (*s1 == '-') { /* remember to add some error checking to all this! */
-					char s3 = *(s1-1);
-					char s4 = *(s1+1);
-					for (s3++; s3 <= s4; s3++) {
-						*s2++ = s3;
-					}
-					s1++; s1++;
+				} else if (this_char == '-' ) { /* remember to add some error checking to all this! */
+					struct match_char_range *mcr1 = ast_calloc(sizeof(struct match_char_range));
+					mcr1->start = *(s2-1);
+					s2--; /* remove the begin char from the char class */
+					mcr1->end = that_char;
+					mcr1->next = mcr_list;
+					mcr_list = mcr1;
+					
+					s1 = s4u;
 				} else {
-					*s2++ = *s1++;
+					*s2++ = this_char;
+					s1 = s3u;
 				}
 			}
-			*s2 = 0; /* null terminate the exploded range */
+			*s2 = 0; /* null terminate the character class */
 			specif = strlen(buf);
 		} else {
 			
-			if (*s1 == '\\') {
-				s1++;
-				buf[0] = *s1;
+			if (curr_char == '\\') {
+				curr_char = ast_utf8_to_ucs4((unsigned char *)next_char, (unsigned char **)&next_char, &error);
+				buf[0] = curr_char;
+				s1 = next_char;
 			} else {
 				if (pattern) {
-					if (*s1 == 'n') /* make sure n,x,z patterns are canonicalized to N,X,Z */
-						*s1 = 'N';
-					else if (*s1 == 'x')
-						*s1 = 'X';
-					else if (*s1 == 'z')
-						*s1 = 'Z';
+					if (curr_char == 'n') /* make sure n,x,z patterns are canonicalized to N,X,Z */
+						curr_char = 'N';
+					else if (curr_char == 'x')
+						curr_char = 'X';
+					else if (curr_char == 'z')
+						curr_char = 'Z';
 				}
-				buf[0] = *s1;
+				buf[0] = curr_char;
 			}
 			buf[1] = 0;
 			specif = 1;
 		}
 		m2 = 0;
-		if (already && (m2=already_in_tree(m1,buf)) && m2->next_char) {
-			if (!(*(s1+1))) {  /* if this is the end of the pattern, but not the end of the tree, then mark this node with the exten...
-								a shorter pattern might win if the longer one doesn't match */
+		if (already && (m2=already_in_tree(m1,buf,mcr_list)) && m2->next_char) {
+			if (!(*next_char)) {  /* if this is the end of the pattern, but not the end of the tree, then mark this node with the exten...
+								a shorter pattern might match a shorter data string... */
 				m2->exten = e1;
 				m2->deleted = 0;
 			}
+			destroy_range_list(mcr_list);
+			mcr_list = 0;
 			m1 = m2->next_char; /* m1 points to the node to compare against */
 		} else {
 			if (m2) {
+				destroy_range_list(mcr_list);
+				mcr_list = 0;
 				if (findonly)
 					return m2;
 				m1 = m2;
 			} else {
 				if (findonly)
 					return m1;
-				m1 = add_pattern_node(con, m1, buf, pattern, already,specif); /* m1 is the node just added */
+				m1 = add_pattern_node(con, m1, mcr_list, buf, pattern, already,specif); /* m1 is the node just added */
+				mcr_list = 0; /* it's now hanging off the newly created node */
 			}
 			
-			if (!(*(s1+1))) {
+			if (!(*next_char)) {
 				m1->deleted = 0;
 				m1->exten = e1;
 			}
 			
 			already = 0;
 		}
-		s1++; /* advance to next char */
+		s1 = next_char; /* advance to next char */
 	}
 	return m1;
 }
@@ -1239,6 +1414,7 @@
 	}
 	ast_hashtab_end_traversal(t1);
 }
+
 
 static void destroy_pattern_tree(struct match_char *pattern_tree) /* pattern tree is a simple binary tree, sort of, so the proper way to destroy it is... recursively! */
 {
@@ -1328,6 +1504,7 @@
 
 	/* always return unless we have a set of chars */
 	switch (c) {
+
 	default:	/* ordinary character */
 		return 0x0000 | (c & 0xff);
 
@@ -7639,8 +7816,8 @@
 int ast_parseable_goto(struct ast_channel *chan, const char *goto_string)
 {
 	char *exten, *pri, *context;
-	char *stringp;
-	int ipri;
+	char *int;
+	stringp ipri;
 	int mode = 0;
 
 	if (ast_strlen_zero(goto_string)) {

Modified: team/murf/utf8-whatif/main/unicode.c
URL: http://svn.digium.com/view/asterisk/team/murf/utf8-whatif/main/unicode.c?view=diff&rev=90037&r1=90036&r2=90037
==============================================================================
--- team/murf/utf8-whatif/main/unicode.c (original)
+++ team/murf/utf8-whatif/main/unicode.c Wed Nov 28 14:03:42 2007
@@ -174,13 +174,17 @@
    mainly just by turning it from 32 bits to 8 bits/char.
  */
 
-unsigned char *ast_ucs4_to_8859_1(ucs4_t *ucs, unsigned char *out, int outlen, ucs4_t **next)
+unsigned char *ast_ucs4_to_8859_1(ucs4_t *ucs, unsigned char *out, int outlen, ucs4_t **next, int *error)
 {
 	unsigned char *buf = out;
-	
+	*error = 0;
 	while (*ucs && outlen > 1)
 	{
-		*buf++ = *ucs++; /* this should lop off all but the lowest 8 bits */
+		if (*ucs > 0xff) {
+			*buf++ = '?';
+			*error = 1;
+		} else
+			*buf++ = *ucs++; /* this should lop off all but the lowest 8 bits */
 		outlen--;
 		/* this is pretty brutal, but should be sufficient for simple 8859-1 conversion */
 	}
@@ -228,5 +232,34 @@
 		return NULL;
 }
 
-
-
+int ucs4_strcmp(ucs4_t *ustra, ucs4_t *ustrb)
+{
+	if (!ustra || !ustrb)
+		return 1;
+	
+	while (*ustra && *ustrb) {
+		if (*ustra < *ustrb)
+			return -1;
+		if (*ustra > *ustrb)
+			return 1;
+		ustra++;
+		ustrb++;
+	}
+	if (*ustra)
+		return 1;
+	else if (*ustrb)
+		return -1;
+	return 0; /* equal length and chars */
+}
+
+ucs4_t *ucs4_strdup(ucs4_t *ustr)
+{
+	int size = 0,i;
+	ucs4_t *p = ustr;
+	while (*p++)
+		size++;
+	p = ast_calloc(size,sizeof(ucs4_t));
+	for (i=0; i<size; i++)
+		p[i] = ustr[i];
+	return p;
+}