[svn-commits] murf: branch murf/utf8-whatif r89703 - in /team/murf/utf8-whatif: include/ast...
SVN commits to the Digium repositories
svn-commits at lists.digium.com
Tue Nov 27 13:45:41 CST 2007
Author: murf
Date: Tue Nov 27 13:45:40 2007
New Revision: 89703
URL: http://svn.digium.com/view/asterisk?view=rev&rev=89703
Log:
Eh, haven't tested any of this. Wrote it Saturday, I think
Added:
team/murf/utf8-whatif/include/asterisk/unicode.h (with props)
team/murf/utf8-whatif/main/unicode.c (with props)
Modified:
team/murf/utf8-whatif/main/Makefile
Added: team/murf/utf8-whatif/include/asterisk/unicode.h
URL: http://svn.digium.com/view/asterisk/team/murf/utf8-whatif/include/asterisk/unicode.h?view=auto&rev=89703
==============================================================================
--- team/murf/utf8-whatif/include/asterisk/unicode.h (added)
+++ team/murf/utf8-whatif/include/asterisk/unicode.h Tue Nov 27 13:45:40 2007
@@ -1,0 +1,51 @@
+/*
+ * Asterisk -- An open source telephony toolkit.
+ *
+ * Copyright (C) 2007, Digium, Inc.
+ *
+ * Steve Murphy <murf at digium.com>
+ *
+ * See http://www.asterisk.org for more information about
+ * the Asterisk project. Please do not directly contact
+ * any of the maintainers of this project for assistance;
+ * the project provides a web site, mailing lists and IRC
+ * channels for your use.
+ *
+ * This program is free software, distributed under the terms of
+ * the GNU General Public License Version 2. See the LICENSE file
+ * at the top of the source tree.
+ */
+
+#include <sys/types.h>
+typedef u_int32_t ucs4_t;
+
+
+/* convert a single (possibly multi-byte) utf8 char to UCS4; next pts to char following the utf-8 char. */
+/* error is set if a problem is found in the utf8 encoding; the first byte will be returned as-is */
+
+ucs4_t ast_utf8_to_ucs4(unsigned char *utf8, unsigned char **next, int *error);
+
+/* convert a string of ucs4 chars into a string of utf-8 chars */
+
+unsigned char *ast_ucs4_to_utf8(ucs4_t *ucs, unsigned char *out, int outlen, ucs4_t **next);
+
+
+/* return a pointer the first place in ustr where theChar can be found, or 0 if nothing found */
+
+ucs4_t *ucs4_strchr(ucs4_t *ustr, ucs4_t theChar);
+
+
+/* convert a string of 8859-1 chars into a string of ucs4 chars --
+ mainly just by turning it from 8 bits to 32 bits/char.
+ */
+
+ucs4_t *ast_8859_1_to_ucs4(unsigned char *in, ucs4_t *ucs, int outlen, unsigned char **next);
+
+
+/* convert a string of ucs4 chars into a string of 8859-1 chars --
+ mainly just by turning it from 32 bits to 8 bits/char.
+ */
+
+unsigned char *ast_ucs4_to_8859_1(ucs4_t *ucs, unsigned char *out, int outlen, ucs4_t **next);
+
+
Propchange: team/murf/utf8-whatif/include/asterisk/unicode.h
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: team/murf/utf8-whatif/include/asterisk/unicode.h
------------------------------------------------------------------------------
svn:keywords = Author Id Date Revision
Propchange: team/murf/utf8-whatif/include/asterisk/unicode.h
------------------------------------------------------------------------------
svn:mime-type = text/plain
Modified: team/murf/utf8-whatif/main/Makefile
URL: http://svn.digium.com/view/asterisk/team/murf/utf8-whatif/main/Makefile?view=diff&rev=89703&r1=89702&r2=89703
==============================================================================
--- team/murf/utf8-whatif/main/Makefile (original)
+++ team/murf/utf8-whatif/main/Makefile Tue Nov 27 13:45:40 2007
@@ -27,7 +27,7 @@
netsock.o slinfactory.o ast_expr2.o ast_expr2f.o \
cryptostub.o sha1.o http.o fixedjitterbuf.o abstract_jb.o \
strcompat.o threadstorage.o dial.o event.o adsistub.o audiohook.o \
- astobj2.o hashtab.o
+ astobj2.o hashtab.o unicode.o
# we need to link in the objects statically, not as a library, because
# otherwise modules will not have them available if none of the static
Added: team/murf/utf8-whatif/main/unicode.c
URL: http://svn.digium.com/view/asterisk/team/murf/utf8-whatif/main/unicode.c?view=auto&rev=89703
==============================================================================
--- team/murf/utf8-whatif/main/unicode.c (added)
+++ team/murf/utf8-whatif/main/unicode.c Tue Nov 27 13:45:40 2007
@@ -1,0 +1,232 @@
+
+/*
+ * Asterisk -- An open source telephony toolkit.
+ *
+ * Copyright (C) 2007, Digium, Inc.
+ *
+ * Steve Murphy <murf at digium.com>
+ *
+ * See http://www.asterisk.org for more information about
+ * the Asterisk project. Please do not directly contact
+ * any of the maintainers of this project for assistance;
+ * the project provides a web site, mailing lists and IRC
+ * channels for your use.
+ *
+ * This program is free software, distributed under the terms of
+ * the GNU General Public License Version 2. See the LICENSE file
+ * at the top of the source tree.
+ */
+
+#include "asterisk/unicode.h"
+#define NULL 0
+
+/* convert a single (possibly multi-byte) utf8 char to UCS4; next pts to char following the utf-8 char. */
+/* error is set if a problem is found in the utf8 encoding; the first byte will be returned as-is */
+
+ucs4_t ast_utf8_to_ucs4(unsigned char *utf8, unsigned char **next, int *error)
+{
+ if (((*utf8) & 0x80) == 0) { /* single byte value */
+ if (next)
+ *next = utf8 + 1;
+ if (error)
+ *error = 0;
+ return (ucs4_t)(*utf8);
+ }
+ if (((*utf8) & 0xE0) == 0xC0) { /* two byte value */
+ if (((*utf8+1) & 0xC0) == 0x80) { /* well formed */
+ ucs4_t t = (ucs4_t)(((*utf8) & 0x1F) << 6);
+ if (next)
+ *next = utf8 + 2;
+ if (error)
+ *error = 0;
+ return t + ((*utf8+1) & 0x3F);
+ } else { /* something is wrong! */
+ if (next)
+ *next = utf8 + 1;
+ if (error)
+ *error = 1;
+ return (ucs4_t)(*utf8);
+ }
+ }
+ if (((*utf8) & 0xF0) == 0xE0) { /* three byte value */
+ if (((*utf8+1) & 0xC0) == 0x80
+ && ((*utf8+2) & 0xC0) == 0x80) { /* well formed */
+
+ ucs4_t t1 = (ucs4_t)(((*utf8) & 0x0F) << 12);
+ ucs4_t t2 = (ucs4_t)(((*utf8+1) & 0x3F) << 6);
+ if (next)
+ *next = utf8 + 3;
+ if (error)
+ *error = 0;
+ return t1 + t2 + ((*utf8+2) & 0x3F);
+ } else {
+ if (next)
+ *next = utf8 + 1;
+ if (error)
+ *error = 1;
+ return (ucs4_t)(*utf8);
+ }
+ }
+ if (((*utf8) & 0xF8) == 0xF0) { /* four byte value */
+ if (((*utf8+1) & 0xC0) == 0x80
+ && ((*utf8+2) & 0xC0) == 0x80
+ && ((*utf8+3) & 0xC0) == 0x80) { /* well formed */
+
+ ucs4_t t1 = (ucs4_t)(((ucs4_t)(*utf8) & 0x07) << 18);
+ ucs4_t t2 = (ucs4_t)(((ucs4_t)(*utf8+1) & 0x3F) << 12); /* */
+ ucs4_t t3 = (ucs4_t)(((ucs4_t)(*utf8+2) & 0x3F) << 6);
+ if (next)
+ *next = utf8 + 4;
+ if (error)
+ *error = 0;
+ return t1 + t2 + t3 + ((*utf8+3) & 0x3F);
+ } else {
+ if (next)
+ *next = utf8 + 1;
+ if (error)
+ *error = 1;
+ return (ucs4_t)(*utf8);
+ }
+ }
+ /* if we are here, something is terribly wrong! */
+ if (next)
+ *next = utf8 + 1;
+ if (error)
+ *error = 1;
+ return (ucs4_t)(*utf8);
+}
+
+
+/* convert a string of ucs4 chars into a string of utf-8 chars */
+
+unsigned char *ast_ucs4_to_utf8(ucs4_t *ucs, unsigned char *out, int outlen, ucs4_t **next)
+{
+ unsigned char *buf = out;
+
+ while (*ucs && outlen > 0)
+ {
+ if (*ucs < 0x80) { /* one byte out */
+ if (outlen < 2) {
+ /* no room left */
+ *next = ucs;
+ *out = 0;
+ return buf;
+ } else {
+ *out++ == (char)(*ucs);
+ outlen -= 1;
+ }
+ } else if (*ucs < 0x800) { /* two bytes out */
+ if (outlen < 3) {
+ /* no room left */
+ *next = ucs;
+ *out = 0;
+ return buf;
+ } else {
+ ucs4_t b1,b2;
+ b1 = ((*ucs) >> 6);
+ b2 = ((*ucs) & 0x3F);
+ *out++ == 0xC0 + (char)(b1);
+ *out++ == 0x80 + (char)(b2);
+ outlen -= 2;
+ }
+ } else if (*ucs < 0x10000) { /* three bytes out */
+ if (outlen < 4) {
+ /* no room left */
+ *next = ucs;
+ *out = 0;
+ return buf;
+ } else {
+ ucs4_t b1,b2,b3;
+ b1 = ((*ucs) >> 12);
+ b2 = (((*ucs) & 0xFFF) >> 6);
+ b3 = ((*ucs) & 0x3F);
+ *out++ == 0xE0 + (char)(b1);
+ *out++ == 0x80 + ((char)(b2));
+ *out++ == 0x80 + ((char)(b3));
+ outlen -= 3;
+ }
+ } else { /* four bytes out */
+ if (outlen < 5) {
+ /* no room left */
+ *next = ucs;
+ *out = 0;
+ return buf;
+ } else {
+ ucs4_t b1,b2,b3,b4;
+ b1 = ((*ucs) >> 18);
+ b2 = (((*ucs) & 0x3FFFF) >> 12);
+ b3 = (((*ucs) & 0xFFF) >> 6);
+ b4 = ((*ucs) & 0x3F);
+ *out++ == 0xF0 + (char)(b1);
+ *out++ == 0x80 + (char)(b2);
+ *out++ == 0x80 + (char)(b3);
+ *out++ == 0x80 + (char)(b4);
+ outlen -= 4;
+ }
+ }
+ ucs++;
+ }
+ *out = 0;
+ return buf;
+}
+
+/* convert a string of ucs4 chars into a string of 8859-1 chars --
+ mainly just by turning it from 32 bits to 8 bits/char.
+ */
+
+unsigned char *ast_ucs4_to_8859_1(ucs4_t *ucs, unsigned char *out, int outlen, ucs4_t **next)
+{
+ unsigned char *buf = out;
+
+ while (*ucs && outlen > 1)
+ {
+ *buf++ = *ucs++; /* this should lop off all but the lowest 8 bits */
+ outlen--;
+ /* this is pretty brutal, but should be sufficient for simple 8859-1 conversion */
+ }
+ if (next)
+ *next = ucs;
+ *buf = 0; /* end the output string */
+ return out;
+}
+
+/* convert a string of 8859-1 chars into a string of ucs4 chars --
+ mainly just by turning it from 8 bits to 32 bits/char.
+ */
+
+ucs4_t *ast_8859_1_to_ucs4(unsigned char *in, ucs4_t *ucs, int outlen, unsigned char **next)
+{
+ ucs4_t *buf = ucs;
+
+ while (*in && outlen > 1)
+ {
+ *buf++ = *in++; /* this should lop off all but the lowest 8 bits */
+ outlen--;
+ /* this is pretty brutal, but should be sufficient for simple 8859-1 conversion */
+ }
+ if (next)
+ *next = in;
+ *buf = 0; /* end the output string */
+ return ucs;
+}
+
+
+
+/* return a pointer the first place in ustr where theChar can be found, or 0 if nothing found */
+
+ucs4_t *ucs4_strchr(ucs4_t *ustr, ucs4_t theChar)
+{
+ if (!ustr)
+ return NULL;
+
+ while (*ustr && *ustr != theChar)
+ ustr++;
+
+ if (*ustr)
+ return ustr; /* the first match to theChar */
+ else
+ return NULL;
+}
+
+
+
Propchange: team/murf/utf8-whatif/main/unicode.c
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: team/murf/utf8-whatif/main/unicode.c
------------------------------------------------------------------------------
svn:keywords = Author Id Date Revision
Propchange: team/murf/utf8-whatif/main/unicode.c
------------------------------------------------------------------------------
svn:mime-type = text/plain
More information about the svn-commits
mailing list