[asterisk-commits] murf: branch murf/utf8-whatif r90151 - in /team/murf/utf8-whatif: include/ast...

Thu Nov 29 10:19:50 CST 2007

Author: murf
Date: Thu Nov 29 10:19:50 2007
New Revision: 90151

URL: http://svn.digium.com/view/asterisk?view=rev&rev=90151
Log:
Compiles. Tried a call, it went thru. Amazing. A lot of testing & fixes to do, I bet.

Modified:
    team/murf/utf8-whatif/include/asterisk/unicode.h
    team/murf/utf8-whatif/main/pbx.c
    team/murf/utf8-whatif/main/unicode.c

Modified: team/murf/utf8-whatif/include/asterisk/unicode.h
URL: http://svn.digium.com/view/asterisk/team/murf/utf8-whatif/include/asterisk/unicode.h?view=diff&rev=90151&r1=90150&r2=90151
==============================================================================

--- team/murf/utf8-whatif/include/asterisk/unicode.h (original)
+++ team/murf/utf8-whatif/include/asterisk/unicode.h Thu Nov 29 10:19:50 2007
@@ -23,16 +23,20 @@
 /* convert a single (possibly multi-byte) utf8 char to UCS4; next pts to char following the utf-8 char. */
 /* error is set if a problem is found in the utf8 encoding; the first byte will be returned as-is */
 
-ucs4_t ast_utf8_to_ucs4(unsigned char *utf8, unsigned char **next, int *error);
+ucs4_t ast_utf8_to_ucs4(char *utf8, char **next, int *error);
 
 /* convert a string of ucs4 chars into a string of utf-8 chars */
 
-unsigned char *ast_ucs4_to_utf8(ucs4_t *ucs, unsigned char *out, int outlen, ucs4_t **next);
+char *ast_ucs4_to_utf8(ucs4_t *ucs, char *out, int outlen, ucs4_t **next);
 
 
 /* return a pointer the first place in ustr where theChar can be found, or 0 if nothing found */
 
 ucs4_t *ucs4_strchr(ucs4_t *ustr, ucs4_t theChar);
+
+/* return the number of ucs4_t chars before a null entry */
+
+int ucs4_strlen(const ucs4_t *ustr);
 
 
 /* convert a string of 8859-1 chars into a string of ucs4 chars --
@@ -41,14 +45,14 @@
    that char translates as '?'
  */
 
-ucs4_t *ast_8859_1_to_ucs4(unsigned char *in, ucs4_t *ucs, int outlen, unsigned char **next, int *error);
+ucs4_t *ast_8859_1_to_ucs4(char *in, ucs4_t *ucs, int outlen, char **next, int *error);
 
 
 /* convert a string of ucs4 chars into a string of 8859-1 chars --
    mainly just by turning it from 32 bits to 8 bits/char.
  */
 
-unsigned char *ast_ucs4_to_8859_1(ucs4_t *ucs, unsigned char *out, int outlen, ucs4_t **next);
+char *ast_ucs4_to_8859_1(ucs4_t *ucs, char *out, int outlen, ucs4_t **next, int *err);
 
 /* returns -1, 0, or 1 if a is less than, equal to, or greater than b, respectively */
 int ucs4_strcmp(ucs4_t *ustra, ucs4_t *ustrb);

Modified: team/murf/utf8-whatif/main/pbx.c
URL: http://svn.digium.com/view/asterisk/team/murf/utf8-whatif/main/pbx.c?view=diff&rev=90151&r1=90150&r2=90151
==============================================================================
--- team/murf/utf8-whatif/main/pbx.c (original)
+++ team/murf/utf8-whatif/main/pbx.c Thu Nov 29 10:19:50 2007
@@ -870,7 +870,7 @@
 	   the char ranges with the same process */
 	ucs4_t *z;
 	struct match_char_range *w;
-	ast_ucs4_to_utf8(node->x, (unsigned char *)buf, buflen, &z);
+	ast_ucs4_to_utf8(node->x, buf, buflen, &z);
 	
 	if (*z) { 
 		if (strlen(buf) +3 < buflen) {
@@ -892,12 +892,12 @@
 		for(w=node->mcr;w;w=w->next)
 		{
 			ucs4buf[0] = w->start;
-			ast_ucs4_to_utf8(ucs4buf, (unsigned char *)mcrp, 1022-(mcrp-max_range_buffer), 0);
+			ast_ucs4_to_utf8(ucs4buf, mcrp, 1022-(mcrp-max_range_buffer), 0);
 			while (*mcrp)
 				mcrp++;
 			*mcrp++ = '-';
 			ucs4buf[0] = w->end;
-			ast_ucs4_to_utf8(ucs4buf, (unsigned char *)mcrp, 1022-(mcrp-max_range_buffer), 0);
+			ast_ucs4_to_utf8(ucs4buf, mcrp, 1022-(mcrp-max_range_buffer), 0);
 			while (*mcrp)
 				mcrp++;
 			*mcrp = 0;
@@ -1025,8 +1025,8 @@
 	struct match_char *p; /* note minimal stack storage requirements */
 	char *next, *next2;
 	int utf8_err = 0;
-	ucs4_t curr_char = ast_utf8_to_ucs4((unsigned char *)str, (unsigned char **)&next, &utf8_err);
-	ucs4_t next_char = ast_utf8_to_ucs4((unsigned char *)next, (unsigned char **)&next2, &utf8_err);
+	ucs4_t curr_char = ast_utf8_to_ucs4((char *)str, &next, &utf8_err);
+	ucs4_t next_char = ast_utf8_to_ucs4(next, &next2, &utf8_err);
 	
 #ifdef DEBUG_THIS
 	if (tree)
@@ -1266,10 +1266,8 @@
 	int pattern = 0;
 	char extenbuf[512];
 	ucs4_t buf[256];
-	ucs4_t extenbufu[512];
 	ucs4_t curr_char;
 	char *s1 = extenbuf;
-	char *s2;
 	char *next_char;
 	int l1 = strlen(e1->exten) + strlen(e1->cidmatch) + 2;
 	int error = 0;
@@ -1288,14 +1286,14 @@
 #endif
 	m1 = con->pattern_tree; /* each pattern starts over at the root of the pattern tree */
 	already = 1;
-	curr_char = ast_utf8_to_ucs4((unsigned char *)s1, (unsigned char **)&next_char, &error);
+	curr_char = ast_utf8_to_ucs4(s1, &next_char, &error);
 	
 	if ( curr_char == '_') {
 		pattern = 1;
 		s1 = next_char;
 	}
 	while( *s1 ) {
-		curr_char = ast_utf8_to_ucs4((unsigned char *)s1, (unsigned char **)&next_char, &error);
+		curr_char = ast_utf8_to_ucs4(s1, &next_char, &error);
 		if (pattern && curr_char == '[' && *(s1-1) != '\\') {
 			ucs4_t *s2 = buf;
 			char *s3u, *s4u;
@@ -1305,8 +1303,8 @@
 			buf[0] = 0;
 			s1++; /* get past the '[' */
 			while (*s1 != ']' && *(s1-1) != '\\' ) {
-				this_char = ast_utf8_to_ucs4((unsigned char *)s1, (unsigned char **)&s3u, &error);
-				that_char = ast_utf8_to_ucs4((unsigned char *)s3, (unsigned char **)&s4u, &error);
+				this_char = ast_utf8_to_ucs4(s1, &s3u, &error);
+				that_char = ast_utf8_to_ucs4(s3u, &s4u, &error);
 				if (this_char == '\\') {
 					if (that_char == ']') {
 						*s2++ = ']';
@@ -1322,7 +1320,7 @@
 						s1 = s4u;
 					}
 				} else if (this_char == '-' ) { /* remember to add some error checking to all this! */
-					struct match_char_range *mcr1 = ast_calloc(sizeof(struct match_char_range));
+					struct match_char_range *mcr1 = ast_calloc(1,sizeof(struct match_char_range));
 					mcr1->start = *(s2-1);
 					s2--; /* remove the begin char from the char class */
 					mcr1->end = that_char;
@@ -1336,11 +1334,11 @@
 				}
 			}
 			*s2 = 0; /* null terminate the character class */
-			specif = strlen(buf);
+			specif = ucs4_strlen(buf);
 		} else {
 			
 			if (curr_char == '\\') {
-				curr_char = ast_utf8_to_ucs4((unsigned char *)next_char, (unsigned char **)&next_char, &error);
+				curr_char = ast_utf8_to_ucs4(next_char, &next_char, &error);
 				buf[0] = curr_char;
 				s1 = next_char;
 			} else {
@@ -7808,8 +7806,8 @@
 int ast_parseable_goto(struct ast_channel *chan, const char *goto_string)
 {
 	char *exten, *pri, *context;
-	char *int;
-	stringp ipri;
+	char *stringp;
+	int ipri;
 	int mode = 0;
 
 	if (ast_strlen_zero(goto_string)) {

Modified: team/murf/utf8-whatif/main/unicode.c
URL: http://svn.digium.com/view/asterisk/team/murf/utf8-whatif/main/unicode.c?view=diff&rev=90151&r1=90150&r2=90151
==============================================================================
--- team/murf/utf8-whatif/main/unicode.c (original)
+++ team/murf/utf8-whatif/main/unicode.c Thu Nov 29 10:19:50 2007
@@ -17,75 +17,78 @@
  * at the top of the source tree.
  */
 
+#include <stdio.h>
+#include <stdlib.h>
 #include "asterisk/unicode.h"
-#define NULL 0
 
 /* convert a single (possibly multi-byte) utf8 char to UCS4; next pts to char following the utf-8 char. */
 /* error is set if a problem is found in the utf8 encoding; the first byte will be returned as-is */
 
-ucs4_t ast_utf8_to_ucs4(unsigned char *utf8, unsigned char **next, int *error)
-{
-	if (((*utf8) & 0x80) == 0) { /* single byte value */
+ucs4_t ast_utf8_to_ucs4(char *utf8, char **next, int *error)
+{
+	unsigned char *utf88 = (unsigned char *)utf8;
+	
+	if (((*utf88) & 0x80) == 0) { /* single byte value */
 		if (next)
 			*next = utf8 + 1;
 		if (error)
 			*error = 0;
-		return (ucs4_t)(*utf8);
-	}
-	if (((*utf8) & 0xE0) == 0xC0) { /* two byte value */
-		if (((*utf8+1) & 0xC0) == 0x80) { /* well formed */
-			ucs4_t t = (ucs4_t)(((*utf8) & 0x1F) << 6);
+		return (ucs4_t)(*utf88);
+	}
+	if (((*utf88) & 0xE0) == 0xC0) { /* two byte value */
+		if ((*(utf88+1) & 0xC0) == 0x80) { /* well formed */
+			ucs4_t t = (ucs4_t)(((*utf88) & 0x1F) << 6);
 			if (next)
 				*next = utf8 + 2;
 			if (error)
 				*error = 0;
-			return t + ((*utf8+1) & 0x3F);
+			return t + (*(utf88+1) & 0x3F);
 		} else { /* something is wrong! */
 			if (next)
 				*next = utf8 + 1;
 			if (error)
 				*error = 1;
-			return (ucs4_t)(*utf8);
-		}
-	}
-	if (((*utf8) & 0xF0) == 0xE0) { /* three byte value */
-		if (((*utf8+1) & 0xC0) == 0x80
-			&& ((*utf8+2) & 0xC0) == 0x80) { /* well formed */
+			return (ucs4_t)(*utf88);
+		}
+	}
+	if (((*utf88) & 0xF0) == 0xE0) { /* three byte value */
+		if ((*(utf88+1) & 0xC0) == 0x80
+			&& (*(utf88+2) & 0xC0) == 0x80) { /* well formed */
 			
-			ucs4_t t1 = (ucs4_t)(((*utf8) & 0x0F) << 12);
-			ucs4_t t2 = (ucs4_t)(((*utf8+1) & 0x3F) << 6);
+			ucs4_t t1 = (ucs4_t)(((*utf88) & 0x0F) << 12);
+			ucs4_t t2 = (ucs4_t)((*(utf88+1) & 0x3F) << 6);
 			if (next)
 				*next = utf8 + 3;
 			if (error)
 				*error = 0;
-			return t1 + t2 + ((*utf8+2) & 0x3F);
+			return t1 + t2 + (*(utf88+2) & 0x3F);
 		} else {
 			if (next)
 				*next = utf8 + 1;
 			if (error)
 				*error = 1;
-			return (ucs4_t)(*utf8);
-		}
-	}
-	if (((*utf8) & 0xF8) == 0xF0) { /* four byte value */
-		if (((*utf8+1) & 0xC0) == 0x80
-			&& ((*utf8+2) & 0xC0) == 0x80
-			&& ((*utf8+3) & 0xC0) == 0x80) { /* well formed */
+			return (ucs4_t)(*utf88);
+		}
+	}
+	if (((*utf88) & 0xF8) == 0xF0) { /* four byte value */
+		if ((*(utf88+1) & 0xC0) == 0x80
+			&& (*(utf88+2) & 0xC0) == 0x80
+			&& (*(utf88+3) & 0xC0) == 0x80) { /* well formed */
 			
-			ucs4_t t1 = (ucs4_t)(((ucs4_t)(*utf8) & 0x07) << 18);
-			ucs4_t t2 = (ucs4_t)(((ucs4_t)(*utf8+1) & 0x3F) << 12); /*  */
-			ucs4_t t3 = (ucs4_t)(((ucs4_t)(*utf8+2) & 0x3F) << 6);
+			ucs4_t t1 = (ucs4_t)(((ucs4_t)(*utf88) & 0x07) << 18);
+			ucs4_t t2 = (ucs4_t)(((ucs4_t)(*(utf88+1) & 0x3F)) << 12); /*  */
+			ucs4_t t3 = (ucs4_t)(((ucs4_t)(*(utf88+2) & 0x3F)) << 6);
 			if (next)
 				*next = utf8 + 4;
 			if (error)
 				*error = 0;
-			return t1 + t2 + t3 + ((*utf8+3) & 0x3F);
+			return t1 + t2 + t3 + (*(utf88+3) & 0x3F);
 		} else {
 			if (next)
 				*next = utf8 + 1;
 			if (error)
 				*error = 1;
-			return (ucs4_t)(*utf8);
+			return (ucs4_t)(*utf88);
 		}
 	}
 	/* if we are here, something is terribly wrong! */
@@ -93,90 +96,93 @@
 		*next = utf8 + 1;
 	if (error)
 		*error = 1;
-	return (ucs4_t)(*utf8);
+	return (ucs4_t)(*utf88);
 }
 
 
 /* convert a string of ucs4 chars into a string of utf-8 chars */
 
-unsigned char *ast_ucs4_to_utf8(ucs4_t *ucs, unsigned char *out, int outlen, ucs4_t **next)
-{
-	unsigned char *buf = out;
+char *ast_ucs4_to_utf8(ucs4_t *ucs, char *out, int outlen, ucs4_t **next)
+{
+	unsigned char *buf = (unsigned char *)out;
 	
 	while (*ucs && outlen > 0)
 	{
+		unsigned char t;
+		
 		if (*ucs < 0x80) { /* one byte out */
 			if (outlen < 2) {
 				/* no room left */
 				*next = ucs;
-				*out = 0;
-				return buf;
-			} else {
-				*out++ == (char)(*ucs);
+				*buf = 0;
+				return out;
+			} else {
+				t = *ucs;
+				*buf++ = t;
 				outlen -= 1;
 			}
 		} else if (*ucs < 0x800) { /* two bytes out */
 			if (outlen < 3) {
 				/* no room left */
 				*next = ucs;
-				*out = 0;
-				return buf;
+				*buf = 0;
+				return out;
 			} else {
 				ucs4_t b1,b2;
 				b1 = ((*ucs) >> 6);
 				b2 = ((*ucs) & 0x3F);
-				*out++ == 0xC0 + (char)(b1);
-				*out++ == 0x80 + (char)(b2);
+				*buf++ = 0xC0 + (char)(b1);
+				*buf++ = 0x80 + (char)(b2);
 				outlen -= 2;
 			}
 		} else if (*ucs < 0x10000) { /* three bytes out */
 			if (outlen < 4) {
 				/* no room left */
 				*next = ucs;
-				*out = 0;
-				return buf;
+				*buf = 0;
+				return out;
 			} else {
 				ucs4_t b1,b2,b3;
 				b1 = ((*ucs) >> 12);
 				b2 = (((*ucs) & 0xFFF) >> 6);
 				b3 = ((*ucs) & 0x3F);
-				*out++ == 0xE0 + (char)(b1);
-				*out++ == 0x80 + ((char)(b2));
-				*out++ == 0x80 + ((char)(b3));
+				*buf++ = 0xE0 + (char)(b1);
+				*buf++ = 0x80 + ((char)(b2));
+				*buf++ = 0x80 + ((char)(b3));
 				outlen -= 3;
 			}
 		} else { /* four bytes out */
 			if (outlen < 5) {
 				/* no room left */
 				*next = ucs;
-				*out = 0;
-				return buf;
+				*buf = 0;
+				return out;
 			} else {
 				ucs4_t b1,b2,b3,b4;
 				b1 = ((*ucs) >> 18);
 				b2 = (((*ucs) & 0x3FFFF) >> 12);
 				b3 = (((*ucs) & 0xFFF) >> 6);
 				b4 = ((*ucs) & 0x3F);
-				*out++ == 0xF0 + (char)(b1);
-				*out++ == 0x80 + (char)(b2);
-				*out++ == 0x80 + (char)(b3);
-				*out++ == 0x80 + (char)(b4);
+				*buf++ = 0xF0 + (char)(b1);
+				*buf++ = 0x80 + (char)(b2);
+				*buf++ = 0x80 + (char)(b3);
+				*buf++ = 0x80 + (char)(b4);
 				outlen -= 4;
 			}
 		}
 		ucs++;
 	}
-	*out = 0;
-	return buf;
+	*buf = 0;
+	return out;
 }
 
 /* convert a string of ucs4 chars into a string of 8859-1 chars --
    mainly just by turning it from 32 bits to 8 bits/char.
  */
 
-unsigned char *ast_ucs4_to_8859_1(ucs4_t *ucs, unsigned char *out, int outlen, ucs4_t **next, int *error)
-{
-	unsigned char *buf = out;
+char *ast_ucs4_to_8859_1(ucs4_t *ucs, char *out, int outlen, ucs4_t **next, int *error)
+{
+	unsigned char *buf = (unsigned char *)out;
 	*error = 0;
 	while (*ucs && outlen > 1)
 	{
@@ -198,7 +204,7 @@
    mainly just by turning it from 8 bits to 32 bits/char.
  */
 
-ucs4_t *ast_8859_1_to_ucs4(unsigned char *in, ucs4_t *ucs, int outlen, unsigned char **next)
+ucs4_t *ast_8859_1_to_ucs4(char *in, ucs4_t *ucs, int outlen, char **next, int *error)
 {
 	ucs4_t *buf = ucs;
 	
@@ -258,8 +264,19 @@
 	ucs4_t *p = ustr;
 	while (*p++)
 		size++;
-	p = ast_calloc(size,sizeof(ucs4_t));
+	p = calloc(size,sizeof(ucs4_t));
+	if (!p)
+		return 0;
 	for (i=0; i<size; i++)
 		p[i] = ustr[i];
 	return p;
 }
+
+int ucs4_strlen(const ucs4_t *ustr)
+{
+	int size = 0;
+	const ucs4_t *p = ustr;
+	while (*p++)
+		size++;
+	return size;
+}