[asterisk-commits] murf: branch murf/utf8-whatif r93186 - /team/murf/utf8-whatif/main/editline/

SVN commits to the Asterisk project asterisk-commits at lists.digium.com
Mon Dec 17 07:18:48 CST 2007


Author: murf
Date: Mon Dec 17 07:18:48 2007
New Revision: 93186

URL: http://svn.digium.com/view/asterisk?view=rev&rev=93186
Log:
I feel a bit stupid, but... I went out to see if maybe, hope beyond hope, the maintainers of the libedit stuff might have upgraded their stuff to include the capability to handle utf-8. They did not, it appears. So, after thinking a little further, I decided to proceed a little further down this path. I thought about basing this code on ucs4 instead of char; but even in unicode, we still have a break between 'visible characters' and 'chars in the string'. (Composite chars, overlays, etc); so either way, this code will have to account for this. At least, in utf8, from any of the bytes in a sequence, you can point to exactly which bytes are involved in any visible character.

Modified:
    team/murf/utf8-whatif/main/editline/chared.c
    team/murf/utf8-whatif/main/editline/chared.h
    team/murf/utf8-whatif/main/editline/common.c
    team/murf/utf8-whatif/main/editline/refresh.c

Modified: team/murf/utf8-whatif/main/editline/chared.c
URL: http://svn.digium.com/view/asterisk/team/murf/utf8-whatif/main/editline/chared.c?view=diff&rev=93186&r1=93185&r2=93186
==============================================================================
--- team/murf/utf8-whatif/main/editline/chared.c (original)
+++ team/murf/utf8-whatif/main/editline/chared.c Mon Dec 17 07:18:48 2007
@@ -91,6 +91,36 @@
 	el->el_line.lastchar += num;
 }
 
+static int realchars_after(EditLine *el, int vischars)
+{
+	int reallen=0;
+	char *b = el->el_line.cursor, *e;
+	
+	while (vischars > 0  && reallen + el->el_line.cursor < el->el_line.lastchar)
+	{
+		c_utf8_seq(el, &b, &e);
+		reallen += e-b+1;
+		b = e+1;
+		vischars--;
+	}
+	return reallen;
+}
+
+static int realchars_before(EditLine *el, int vischars)
+{
+	int reallen=0;
+	char *b = el->el_line.cursor - 1, *e;
+	
+	while (vischars > 0  && b > el->el_line.buffer)
+	{
+		c_utf8_seq(el, &b, &e);
+		reallen += e-b+1;
+		b = b-1;
+		vischars--;
+	}
+	return reallen;
+}
+
 
 /* c_delafter():
  *	Delete num characters after the cursor
@@ -98,7 +128,8 @@
 protected void
 c_delafter(EditLine *el, int num)
 {
-
+	num = realchars_after(el,num);
+	
 	if (el->el_line.cursor + num > el->el_line.lastchar)
 		num = el->el_line.lastchar - el->el_line.cursor;
 
@@ -122,7 +153,8 @@
 protected void
 c_delbefore(EditLine *el, int num)
 {
-
+	num = realchars_before(el,num);
+	
 	if (el->el_line.cursor - num < el->el_line.buffer)
 		num = el->el_line.cursor - el->el_line.buffer;
 

Modified: team/murf/utf8-whatif/main/editline/chared.h
URL: http://svn.digium.com/view/asterisk/team/murf/utf8-whatif/main/editline/chared.h?view=diff&rev=93186&r1=93185&r2=93186
==============================================================================
--- team/murf/utf8-whatif/main/editline/chared.h (original)
+++ team/murf/utf8-whatif/main/editline/chared.h Mon Dec 17 07:18:48 2007
@@ -156,4 +156,5 @@
 protected int	 ch_enlargebufs(EditLine *, size_t);
 protected void	 ch_end(EditLine *);
 
+
 #endif /* _h_el_chared */

Modified: team/murf/utf8-whatif/main/editline/common.c
URL: http://svn.digium.com/view/asterisk/team/murf/utf8-whatif/main/editline/common.c?view=diff&rev=93186&r1=93185&r2=93186
==============================================================================
--- team/murf/utf8-whatif/main/editline/common.c (original)
+++ team/murf/utf8-whatif/main/editline/common.c Mon Dec 17 07:18:48 2007
@@ -50,6 +50,49 @@
  */
 #include "el.h"
 
+static void 
+c_utf8_seq(EditLine *el, char **begin, char **end)
+{
+	char *cp = *begin;
+	char *b, *e;
+	int count = 3; /* max size of utf8 char sequence */
+	
+	b = cp;
+	e = cp;
+
+	while (count > 0 && (((unsigned int)*b) & 0xc0) == 0x80 && b > el->el_line.buffer) {
+		count--;
+		b--;
+	}
+	
+	if ( (((unsigned int)*b) & 0xe0) == 0xc0 
+		 && b+1 <= el->el_line.lastchar 
+         && b+1 >= cp             /* is the input char ptr in this sequence? (it's poosible to back into a previous sequence) */
+		 && (((unsigned int)*(b+1)) & 0xc0) == 0x80) { /* two byte, string long enough, and following char is a continuation */
+		*begin = b;
+		*end = b+1;
+	} else if ( (((unsigned int)*b) & 0xf0) == 0xe0 
+			  && b+2 <= el->el_line.lastchar 
+			  && b+2 >= cp        /* is the input char ptr in this sequence? */
+			  && (((unsigned int)*(b+1)) & 0xc0) == 0x80 
+			  && (((unsigned int)*(b+1)) & 0xc0) == 0x80) { /* three byte, string long enough, and following chars are all continuations */
+		*begin = b;
+		*end = b+2;
+	} else if ( (((unsigned int)*b) & 0xf8) == 0xf0 
+			  && b+3 <= el->el_line.lastchar 
+			  && b+3 >= cp        /* is the input char ptr in this sequence? */
+			  && (((unsigned int)*(b+1)) & 0xc0) == 0x80 
+			  && (((unsigned int)*(b+2)) & 0xc0) == 0x80 
+			  && (((unsigned int)*(b+3)) & 0xc0) == 0x80) { /* four byte, string long enough, and following chars are all continuations */
+		*begin = b;
+		*end = b+3;
+	} else {
+		/* if something doesn't line up, then just report the byte all by itself */
+		*begin = cp;
+		*end = cp;
+	}
+}
+
 /* ed_end_of_file():
  *	Indicate end of file
  *	[^D]
@@ -301,11 +344,16 @@
 /*ARGSUSED*/
 ed_next_char(EditLine *el, int c)
 {
-
+	char *b,*e;
+	
 	if (el->el_line.cursor >= el->el_line.lastchar)
 		return (CC_ERROR);
-
-	el->el_line.cursor += el->el_state.argument;
+	
+	b = el->el_line.cursor;
+	c_utf8_seq(el, &b, &e);
+	el->el_line.cursor = e + el->el_state.argument;
+	
+	/* el->el_line.cursor += el->el_state.argument; */
 	if (el->el_line.cursor > el->el_line.lastchar)
 		el->el_line.cursor = el->el_line.lastchar;
 
@@ -352,9 +400,16 @@
 /*ARGSUSED*/
 ed_prev_char(EditLine *el, int c)
 {
+	char *b,*e;
+	b = el->el_line.cursor;
+	
+	c_utf8_seq(el, &b, &e);
 
 	if (el->el_line.cursor > el->el_line.buffer) {
-		el->el_line.cursor -= el->el_state.argument;
+		el->el_line.cursor = e - el->el_state.argument;
+
+		/* el->el_line.cursor -= el->el_state.argument; */
+
 		if (el->el_line.cursor < el->el_line.buffer)
 			el->el_line.cursor = el->el_line.buffer;
 

Modified: team/murf/utf8-whatif/main/editline/refresh.c
URL: http://svn.digium.com/view/asterisk/team/murf/utf8-whatif/main/editline/refresh.c?view=diff&rev=93186&r1=93185&r2=93186
==============================================================================
--- team/murf/utf8-whatif/main/editline/refresh.c (original)
+++ team/murf/utf8-whatif/main/editline/refresh.c Mon Dec 17 07:18:48 2007
@@ -946,8 +946,13 @@
 
 	/* do input buffer to el->el_line.cursor */
 	for (cp = el->el_line.buffer; cp < el->el_line.cursor; cp++) {
+		char *b, *e;
+		
 		c = *cp;
-		h++;		/* all chars at least this long */
+		b = cp;
+		c_utf8_seq(el, &b, &e);
+		
+		h += e-b+1;		/* all chars at least this long */
 
 		if (c == '\n') {/* handle newline in data part too */
 			h = 0;




More information about the asterisk-commits mailing list