[asterisk-commits] murf: branch murf/utf8-whatif r90151 - in /team/murf/utf8-whatif: include/ast...
SVN commits to the Asterisk project
asterisk-commits at lists.digium.com
Thu Nov 29 10:19:50 CST 2007
Author: murf
Date: Thu Nov 29 10:19:50 2007
New Revision: 90151
URL: http://svn.digium.com/view/asterisk?view=rev&rev=90151
Log:
Compiles. Tried a call, it went thru. Amazing. A lot of testing & fixes to do, I bet.
Modified:
team/murf/utf8-whatif/include/asterisk/unicode.h
team/murf/utf8-whatif/main/pbx.c
team/murf/utf8-whatif/main/unicode.c
Modified: team/murf/utf8-whatif/include/asterisk/unicode.h
URL: http://svn.digium.com/view/asterisk/team/murf/utf8-whatif/include/asterisk/unicode.h?view=diff&rev=90151&r1=90150&r2=90151
==============================================================================
--- team/murf/utf8-whatif/include/asterisk/unicode.h (original)
+++ team/murf/utf8-whatif/include/asterisk/unicode.h Thu Nov 29 10:19:50 2007
@@ -23,16 +23,20 @@
/* convert a single (possibly multi-byte) utf8 char to UCS4; next pts to char following the utf-8 char. */
/* error is set if a problem is found in the utf8 encoding; the first byte will be returned as-is */
-ucs4_t ast_utf8_to_ucs4(unsigned char *utf8, unsigned char **next, int *error);
+ucs4_t ast_utf8_to_ucs4(char *utf8, char **next, int *error);
/* convert a string of ucs4 chars into a string of utf-8 chars */
-unsigned char *ast_ucs4_to_utf8(ucs4_t *ucs, unsigned char *out, int outlen, ucs4_t **next);
+char *ast_ucs4_to_utf8(ucs4_t *ucs, char *out, int outlen, ucs4_t **next);
/* return a pointer the first place in ustr where theChar can be found, or 0 if nothing found */
ucs4_t *ucs4_strchr(ucs4_t *ustr, ucs4_t theChar);
+
+/* return the number of ucs4_t chars before a null entry */
+
+int ucs4_strlen(const ucs4_t *ustr);
/* convert a string of 8859-1 chars into a string of ucs4 chars --
@@ -41,14 +45,14 @@
that char translates as '?'
*/
-ucs4_t *ast_8859_1_to_ucs4(unsigned char *in, ucs4_t *ucs, int outlen, unsigned char **next, int *error);
+ucs4_t *ast_8859_1_to_ucs4(char *in, ucs4_t *ucs, int outlen, char **next, int *error);
/* convert a string of ucs4 chars into a string of 8859-1 chars --
mainly just by turning it from 32 bits to 8 bits/char.
*/
-unsigned char *ast_ucs4_to_8859_1(ucs4_t *ucs, unsigned char *out, int outlen, ucs4_t **next);
+char *ast_ucs4_to_8859_1(ucs4_t *ucs, char *out, int outlen, ucs4_t **next, int *err);
/* returns -1, 0, or 1 if a is less than, equal to, or greater than b, respectively */
int ucs4_strcmp(ucs4_t *ustra, ucs4_t *ustrb);
Modified: team/murf/utf8-whatif/main/pbx.c
URL: http://svn.digium.com/view/asterisk/team/murf/utf8-whatif/main/pbx.c?view=diff&rev=90151&r1=90150&r2=90151
==============================================================================
--- team/murf/utf8-whatif/main/pbx.c (original)
+++ team/murf/utf8-whatif/main/pbx.c Thu Nov 29 10:19:50 2007
@@ -870,7 +870,7 @@
the char ranges with the same process */
ucs4_t *z;
struct match_char_range *w;
- ast_ucs4_to_utf8(node->x, (unsigned char *)buf, buflen, &z);
+ ast_ucs4_to_utf8(node->x, buf, buflen, &z);
if (*z) {
if (strlen(buf) +3 < buflen) {
@@ -892,12 +892,12 @@
for(w=node->mcr;w;w=w->next)
{
ucs4buf[0] = w->start;
- ast_ucs4_to_utf8(ucs4buf, (unsigned char *)mcrp, 1022-(mcrp-max_range_buffer), 0);
+ ast_ucs4_to_utf8(ucs4buf, mcrp, 1022-(mcrp-max_range_buffer), 0);
while (*mcrp)
mcrp++;
*mcrp++ = '-';
ucs4buf[0] = w->end;
- ast_ucs4_to_utf8(ucs4buf, (unsigned char *)mcrp, 1022-(mcrp-max_range_buffer), 0);
+ ast_ucs4_to_utf8(ucs4buf, mcrp, 1022-(mcrp-max_range_buffer), 0);
while (*mcrp)
mcrp++;
*mcrp = 0;
@@ -1025,8 +1025,8 @@
struct match_char *p; /* note minimal stack storage requirements */
char *next, *next2;
int utf8_err = 0;
- ucs4_t curr_char = ast_utf8_to_ucs4((unsigned char *)str, (unsigned char **)&next, &utf8_err);
- ucs4_t next_char = ast_utf8_to_ucs4((unsigned char *)next, (unsigned char **)&next2, &utf8_err);
+ ucs4_t curr_char = ast_utf8_to_ucs4((char *)str, &next, &utf8_err);
+ ucs4_t next_char = ast_utf8_to_ucs4(next, &next2, &utf8_err);
#ifdef DEBUG_THIS
if (tree)
@@ -1266,10 +1266,8 @@
int pattern = 0;
char extenbuf[512];
ucs4_t buf[256];
- ucs4_t extenbufu[512];
ucs4_t curr_char;
char *s1 = extenbuf;
- char *s2;
char *next_char;
int l1 = strlen(e1->exten) + strlen(e1->cidmatch) + 2;
int error = 0;
@@ -1288,14 +1286,14 @@
#endif
m1 = con->pattern_tree; /* each pattern starts over at the root of the pattern tree */
already = 1;
- curr_char = ast_utf8_to_ucs4((unsigned char *)s1, (unsigned char **)&next_char, &error);
+ curr_char = ast_utf8_to_ucs4(s1, &next_char, &error);
if ( curr_char == '_') {
pattern = 1;
s1 = next_char;
}
while( *s1 ) {
- curr_char = ast_utf8_to_ucs4((unsigned char *)s1, (unsigned char **)&next_char, &error);
+ curr_char = ast_utf8_to_ucs4(s1, &next_char, &error);
if (pattern && curr_char == '[' && *(s1-1) != '\\') {
ucs4_t *s2 = buf;
char *s3u, *s4u;
@@ -1305,8 +1303,8 @@
buf[0] = 0;
s1++; /* get past the '[' */
while (*s1 != ']' && *(s1-1) != '\\' ) {
- this_char = ast_utf8_to_ucs4((unsigned char *)s1, (unsigned char **)&s3u, &error);
- that_char = ast_utf8_to_ucs4((unsigned char *)s3, (unsigned char **)&s4u, &error);
+ this_char = ast_utf8_to_ucs4(s1, &s3u, &error);
+ that_char = ast_utf8_to_ucs4(s3u, &s4u, &error);
if (this_char == '\\') {
if (that_char == ']') {
*s2++ = ']';
@@ -1322,7 +1320,7 @@
s1 = s4u;
}
} else if (this_char == '-' ) { /* remember to add some error checking to all this! */
- struct match_char_range *mcr1 = ast_calloc(sizeof(struct match_char_range));
+ struct match_char_range *mcr1 = ast_calloc(1,sizeof(struct match_char_range));
mcr1->start = *(s2-1);
s2--; /* remove the begin char from the char class */
mcr1->end = that_char;
@@ -1336,11 +1334,11 @@
}
}
*s2 = 0; /* null terminate the character class */
- specif = strlen(buf);
+ specif = ucs4_strlen(buf);
} else {
if (curr_char == '\\') {
- curr_char = ast_utf8_to_ucs4((unsigned char *)next_char, (unsigned char **)&next_char, &error);
+ curr_char = ast_utf8_to_ucs4(next_char, &next_char, &error);
buf[0] = curr_char;
s1 = next_char;
} else {
@@ -7808,8 +7806,8 @@
int ast_parseable_goto(struct ast_channel *chan, const char *goto_string)
{
char *exten, *pri, *context;
- char *int;
- stringp ipri;
+ char *stringp;
+ int ipri;
int mode = 0;
if (ast_strlen_zero(goto_string)) {
Modified: team/murf/utf8-whatif/main/unicode.c
URL: http://svn.digium.com/view/asterisk/team/murf/utf8-whatif/main/unicode.c?view=diff&rev=90151&r1=90150&r2=90151
==============================================================================
--- team/murf/utf8-whatif/main/unicode.c (original)
+++ team/murf/utf8-whatif/main/unicode.c Thu Nov 29 10:19:50 2007
@@ -17,75 +17,78 @@
* at the top of the source tree.
*/
+#include <stdio.h>
+#include <stdlib.h>
#include "asterisk/unicode.h"
-#define NULL 0
/* convert a single (possibly multi-byte) utf8 char to UCS4; next pts to char following the utf-8 char. */
/* error is set if a problem is found in the utf8 encoding; the first byte will be returned as-is */
-ucs4_t ast_utf8_to_ucs4(unsigned char *utf8, unsigned char **next, int *error)
-{
- if (((*utf8) & 0x80) == 0) { /* single byte value */
+ucs4_t ast_utf8_to_ucs4(char *utf8, char **next, int *error)
+{
+ unsigned char *utf88 = (unsigned char *)utf8;
+
+ if (((*utf88) & 0x80) == 0) { /* single byte value */
if (next)
*next = utf8 + 1;
if (error)
*error = 0;
- return (ucs4_t)(*utf8);
- }
- if (((*utf8) & 0xE0) == 0xC0) { /* two byte value */
- if (((*utf8+1) & 0xC0) == 0x80) { /* well formed */
- ucs4_t t = (ucs4_t)(((*utf8) & 0x1F) << 6);
+ return (ucs4_t)(*utf88);
+ }
+ if (((*utf88) & 0xE0) == 0xC0) { /* two byte value */
+ if ((*(utf88+1) & 0xC0) == 0x80) { /* well formed */
+ ucs4_t t = (ucs4_t)(((*utf88) & 0x1F) << 6);
if (next)
*next = utf8 + 2;
if (error)
*error = 0;
- return t + ((*utf8+1) & 0x3F);
+ return t + (*(utf88+1) & 0x3F);
} else { /* something is wrong! */
if (next)
*next = utf8 + 1;
if (error)
*error = 1;
- return (ucs4_t)(*utf8);
- }
- }
- if (((*utf8) & 0xF0) == 0xE0) { /* three byte value */
- if (((*utf8+1) & 0xC0) == 0x80
- && ((*utf8+2) & 0xC0) == 0x80) { /* well formed */
+ return (ucs4_t)(*utf88);
+ }
+ }
+ if (((*utf88) & 0xF0) == 0xE0) { /* three byte value */
+ if ((*(utf88+1) & 0xC0) == 0x80
+ && (*(utf88+2) & 0xC0) == 0x80) { /* well formed */
- ucs4_t t1 = (ucs4_t)(((*utf8) & 0x0F) << 12);
- ucs4_t t2 = (ucs4_t)(((*utf8+1) & 0x3F) << 6);
+ ucs4_t t1 = (ucs4_t)(((*utf88) & 0x0F) << 12);
+ ucs4_t t2 = (ucs4_t)((*(utf88+1) & 0x3F) << 6);
if (next)
*next = utf8 + 3;
if (error)
*error = 0;
- return t1 + t2 + ((*utf8+2) & 0x3F);
+ return t1 + t2 + (*(utf88+2) & 0x3F);
} else {
if (next)
*next = utf8 + 1;
if (error)
*error = 1;
- return (ucs4_t)(*utf8);
- }
- }
- if (((*utf8) & 0xF8) == 0xF0) { /* four byte value */
- if (((*utf8+1) & 0xC0) == 0x80
- && ((*utf8+2) & 0xC0) == 0x80
- && ((*utf8+3) & 0xC0) == 0x80) { /* well formed */
+ return (ucs4_t)(*utf88);
+ }
+ }
+ if (((*utf88) & 0xF8) == 0xF0) { /* four byte value */
+ if ((*(utf88+1) & 0xC0) == 0x80
+ && (*(utf88+2) & 0xC0) == 0x80
+ && (*(utf88+3) & 0xC0) == 0x80) { /* well formed */
- ucs4_t t1 = (ucs4_t)(((ucs4_t)(*utf8) & 0x07) << 18);
- ucs4_t t2 = (ucs4_t)(((ucs4_t)(*utf8+1) & 0x3F) << 12); /* */
- ucs4_t t3 = (ucs4_t)(((ucs4_t)(*utf8+2) & 0x3F) << 6);
+ ucs4_t t1 = (ucs4_t)(((ucs4_t)(*utf88) & 0x07) << 18);
+ ucs4_t t2 = (ucs4_t)(((ucs4_t)(*(utf88+1) & 0x3F)) << 12); /* */
+ ucs4_t t3 = (ucs4_t)(((ucs4_t)(*(utf88+2) & 0x3F)) << 6);
if (next)
*next = utf8 + 4;
if (error)
*error = 0;
- return t1 + t2 + t3 + ((*utf8+3) & 0x3F);
+ return t1 + t2 + t3 + (*(utf88+3) & 0x3F);
} else {
if (next)
*next = utf8 + 1;
if (error)
*error = 1;
- return (ucs4_t)(*utf8);
+ return (ucs4_t)(*utf88);
}
}
/* if we are here, something is terribly wrong! */
@@ -93,90 +96,93 @@
*next = utf8 + 1;
if (error)
*error = 1;
- return (ucs4_t)(*utf8);
+ return (ucs4_t)(*utf88);
}
/* convert a string of ucs4 chars into a string of utf-8 chars */
-unsigned char *ast_ucs4_to_utf8(ucs4_t *ucs, unsigned char *out, int outlen, ucs4_t **next)
-{
- unsigned char *buf = out;
+char *ast_ucs4_to_utf8(ucs4_t *ucs, char *out, int outlen, ucs4_t **next)
+{
+ unsigned char *buf = (unsigned char *)out;
while (*ucs && outlen > 0)
{
+ unsigned char t;
+
if (*ucs < 0x80) { /* one byte out */
if (outlen < 2) {
/* no room left */
*next = ucs;
- *out = 0;
- return buf;
- } else {
- *out++ == (char)(*ucs);
+ *buf = 0;
+ return out;
+ } else {
+ t = *ucs;
+ *buf++ = t;
outlen -= 1;
}
} else if (*ucs < 0x800) { /* two bytes out */
if (outlen < 3) {
/* no room left */
*next = ucs;
- *out = 0;
- return buf;
+ *buf = 0;
+ return out;
} else {
ucs4_t b1,b2;
b1 = ((*ucs) >> 6);
b2 = ((*ucs) & 0x3F);
- *out++ == 0xC0 + (char)(b1);
- *out++ == 0x80 + (char)(b2);
+ *buf++ = 0xC0 + (char)(b1);
+ *buf++ = 0x80 + (char)(b2);
outlen -= 2;
}
} else if (*ucs < 0x10000) { /* three bytes out */
if (outlen < 4) {
/* no room left */
*next = ucs;
- *out = 0;
- return buf;
+ *buf = 0;
+ return out;
} else {
ucs4_t b1,b2,b3;
b1 = ((*ucs) >> 12);
b2 = (((*ucs) & 0xFFF) >> 6);
b3 = ((*ucs) & 0x3F);
- *out++ == 0xE0 + (char)(b1);
- *out++ == 0x80 + ((char)(b2));
- *out++ == 0x80 + ((char)(b3));
+ *buf++ = 0xE0 + (char)(b1);
+ *buf++ = 0x80 + ((char)(b2));
+ *buf++ = 0x80 + ((char)(b3));
outlen -= 3;
}
} else { /* four bytes out */
if (outlen < 5) {
/* no room left */
*next = ucs;
- *out = 0;
- return buf;
+ *buf = 0;
+ return out;
} else {
ucs4_t b1,b2,b3,b4;
b1 = ((*ucs) >> 18);
b2 = (((*ucs) & 0x3FFFF) >> 12);
b3 = (((*ucs) & 0xFFF) >> 6);
b4 = ((*ucs) & 0x3F);
- *out++ == 0xF0 + (char)(b1);
- *out++ == 0x80 + (char)(b2);
- *out++ == 0x80 + (char)(b3);
- *out++ == 0x80 + (char)(b4);
+ *buf++ = 0xF0 + (char)(b1);
+ *buf++ = 0x80 + (char)(b2);
+ *buf++ = 0x80 + (char)(b3);
+ *buf++ = 0x80 + (char)(b4);
outlen -= 4;
}
}
ucs++;
}
- *out = 0;
- return buf;
+ *buf = 0;
+ return out;
}
/* convert a string of ucs4 chars into a string of 8859-1 chars --
mainly just by turning it from 32 bits to 8 bits/char.
*/
-unsigned char *ast_ucs4_to_8859_1(ucs4_t *ucs, unsigned char *out, int outlen, ucs4_t **next, int *error)
-{
- unsigned char *buf = out;
+char *ast_ucs4_to_8859_1(ucs4_t *ucs, char *out, int outlen, ucs4_t **next, int *error)
+{
+ unsigned char *buf = (unsigned char *)out;
*error = 0;
while (*ucs && outlen > 1)
{
@@ -198,7 +204,7 @@
mainly just by turning it from 8 bits to 32 bits/char.
*/
-ucs4_t *ast_8859_1_to_ucs4(unsigned char *in, ucs4_t *ucs, int outlen, unsigned char **next)
+ucs4_t *ast_8859_1_to_ucs4(char *in, ucs4_t *ucs, int outlen, char **next, int *error)
{
ucs4_t *buf = ucs;
@@ -258,8 +264,19 @@
ucs4_t *p = ustr;
while (*p++)
size++;
- p = ast_calloc(size,sizeof(ucs4_t));
+ p = calloc(size,sizeof(ucs4_t));
+ if (!p)
+ return 0;
for (i=0; i<size; i++)
p[i] = ustr[i];
return p;
}
+
+int ucs4_strlen(const ucs4_t *ustr)
+{
+ int size = 0;
+ const ucs4_t *p = ustr;
+ while (*p++)
+ size++;
+ return size;
+}
More information about the asterisk-commits
mailing list