[asterisk-dev] patch for video display on chan_oss.c

Wed Jul 25 13:28:51 CDT 2007

A student of mine, Sergio Fadda (in Cc), came up with a preliminary patch to
receive video on chan_oss (in fact the original code was developed
on chan_alsa so it should be relatively straightforward to port to it),
so i am attaching it in case people are interested to give it a try.

At the moment it only works with h263plus video (e.g. one of
the formats supported by linphone, and possibly ekiga as well).

In addition to chan_oss, you need to:

+ have SDL and ffmpeg installed (no autoconf support yet);
+ have a recent trunk (77023) or at least update main/rtp.c as in

	http://svn.digium.com/view/asterisk?view=rev&revision=77023

+ manually patch the top level Makefile as descrived in chan_oss.c
  to add these lines near the main 'all:' target:

	# GCC configuration flags for SDL library
	ASTCFLAGS+=`sdl-config --cflags`
	# Add library for ffmpeg and SDL lib.
	SOLINK+=-lavcodec -lz -lm -g `sdl-config --libs`

+ add the following to your sip.conf

        [general](+) 
                allow=h263p


+ apply this patch to main/rtp.c (there are probably ways to
  achieve the same through the config files but i have no idea how):

@@ -1509,5 +1511,6 @@
        [31] = {1, AST_FORMAT_H261},
        [34] = {1, AST_FORMAT_H263},
        [97] = {1, AST_FORMAT_ILBC},
+       [98] = {1, AST_FORMAT_H263_PLUS},
        [99] = {1, AST_FORMAT_H264},
        [101] = {0, AST_RTP_DTMF},
        [102] = {1, AST_FORMAT_T140},   /* Real time text chat */
        [103] = {1, AST_FORMAT_H263_PLUS},

+ have an entry in your dialplan that ponts to your console e.g.

	exten => 999,1,Dial(Console/dsp)


Once done this, answering an incoming call should create a window to
display the incoming video. Or you can call a video-enabled phone
and receive the video stream.
There are known issues e.g. if you do a 'console hangup' while the
video is displayed, it won't close well and probably crash asterisk.
You better close the call on the remote end.

We are working on webcam support to actually produce a video stream.

	cheers
	luigi
-------------- next part --------------
Index: channels/chan_oss.c
===================================================================

--- channels/chan_oss.c	(revision 77053)
+++ channels/chan_oss.c	(working copy)
@@ -38,6 +38,13 @@
 
 #include "asterisk.h"
 
+/*
+ * experimental support to decode a video session.
+ */
+//#define DROP_PACKETS	5	// if set, simulate this percentage of lost video packets
+#define HAVE_SDL	1
+#define HAVE_FFMPEG	1
+
 ASTERISK_FILE_VERSION(__FILE__, "$Revision$")
 
 #include <stdio.h>
@@ -77,6 +84,13 @@
 #include "asterisk/musiconhold.h"
 #include "asterisk/app.h"
 
+#if HAVE_FFMPEG
+#include <ffmpeg/avcodec.h>
+#endif
+#if HAVE_SDL
+#include <SDL/SDL.h>
+#endif
+
 /* ringtones we use */
 #include "busy.h"
 #include "ringtone.h"
@@ -286,6 +300,384 @@
 
 static int oss_debug;
 
+#if HAVE_FFMPEG && HAVE_SDL
+/*
+ * In order to decode video you need the following patch to the
+ * main Makefile:
+
+@@ -269,6 +273,11 @@
+   SOLINK=-shared -fpic -L/usr/local/ssl/lib
+ endif
+ 
++# GCC configuration flags for SDL library
++ASTCFLAGS+=`sdl-config --cflags`
++# Add library for ffmpeg and SDL lib.
++SOLINK+=-lavcodec -lz -lm -g `sdl-config --libs`
++
+ # This is used when generating the doxygen documentation
+ ifneq ($(DOT),:)
+   HAVEDOT=yes
+
+Then you need to add to sip.conf:
+	[general](+)
+		allow=h263p
+
+and this one to main/rtp.c:
+
+@@ -1509,5 +1511,6 @@
+        [31] = {1, AST_FORMAT_H261},
+        [34] = {1, AST_FORMAT_H263},
+        [97] = {1, AST_FORMAT_ILBC},
++       [98] = {1, AST_FORMAT_H263_PLUS},
+        [99] = {1, AST_FORMAT_H264},
+        [101] = {0, AST_RTP_DTMF},
+
+ */
+
+/* Structures for ffmpeg processing */
+/*
+ * Information for decoding incoming video stream.
+ * We need one of these for each incoming video stream.
+ */
+struct video_desc {
+	AVCodecContext          *context;
+	AVCodec                 *codec;
+	AVFrame                 *frame;
+	AVCodecParserContext    *parser;
+	int                     completed;
+	uint8_t                 *data;
+	int                     datalen;
+	SDL_Surface             *screen;
+	int                     initialized;
+	SDL_Overlay             *bmp;
+	int                     lastrxframe;
+	int                     discard;
+};
+
+/* Helper function to process incoming video.
+ * For each incoming video call invoke ffmpeg_init() to intialize
+ * the decoding structure then incoming video frames are processed
+ * by write_video() which in turn calls pre_process_data(), to extract
+ * the bitstream; accumulates data into a buffer within video_desc. When
+ * a frame is complete (determined by the marker bit in the RTP header)
+ * call decode_video() to decoding and if it successful call show_frame()
+ * to display the frame.
+ *
+ */
+/* Initialize the decoding structure */
+static void ffmpeg_init(struct video_desc *);
+/* Uninitialize the decoding structure */
+static void ffmpeg_uninit(struct video_desc *);
+/* Clean the bitstream in the RTP payload */
+static uint8_t *pre_process_data(uint8_t *, int *);
+/* Decode video frame once completed */
+static int decode_video(struct video_desc *);
+/* Dispaly decoded frame */
+static void show_frame(struct video_desc *);
+
+static struct video_desc *get_video_desc(struct ast_channel *c);
+
+/* Macros used as a wrapper around the actual video format we want to use */
+#define AST_FORMAT_CUSTOM (AST_FORMAT_H263_PLUS)
+#define CODEC_ID_CUSTOM CODEC_ID_H263
+static int write_video(struct ast_channel *chan, struct ast_frame *f);
+
+/*
+ * It initializes the video_desc struct which contains all the structures
+ * needed by ffmpeg and SDL libraries.
+ * - Registering of all codecs supported by the ffmpeg.
+ * - Searching for H.263+ decoder (H.263 decoder can decode H.263+ stream).
+ * - Allocation and initialization of codec context.
+ * - Initialization of codec parser (it should be used
+ *     to reconstruct the entire bitstream from a fragmented stream)
+ * - Allocation of a new frame
+ * - Initializzation of the SDL environment to support the video
+ */
+static void ffmpeg_init(struct video_desc *env)
+{
+	env->codec              = NULL;
+	env->context            = NULL;
+	env->frame              = NULL;
+	env->parser             = NULL;
+	env->data               = NULL;
+	env->completed          = 0;
+	env->datalen            = 0;
+	env->screen             = NULL;
+	env->initialized        = 0;
+	env->bmp                = NULL;
+	env->lastrxframe        = -1;
+
+	avcodec_init();
+	/*
+	 * Register all codecs supported by the ffmpeg library.
+	 */
+	avcodec_register_all();
+
+	/*
+	 * Searching for the H.263+ decoder; in the decoding process
+	 * the H.263 decoder in compatible with H.263+ stream.
+	 */
+	env->codec = avcodec_find_decoder(CODEC_ID_H263);
+	if(!env->codec) {
+		ast_log(LOG_WARNING, "Unable to find the H.263 decoder\n");;
+		return;
+	}
+
+	/*
+	* Initialize the codec context.
+	*/
+	env->context = avcodec_alloc_context();
+	if(avcodec_open(env->context, env->codec) < 0) {
+		ast_log(LOG_WARNING, "Unable to open the codec context\n");
+		return;
+	}
+
+	env->parser = av_parser_init(CODEC_ID_H263);
+	if(!env->parser) {
+		ast_log(LOG_WARNING, "Unable to initialize the H.263 codec parser\n");
+		return;
+	}
+
+	env->frame = avcodec_alloc_frame();
+	if(!env->frame) {
+		ast_log(LOG_WARNING, "Unable to allocate the video frame\n");
+		return;
+	}
+
+	// SDL specific
+	if(SDL_Init(SDL_INIT_VIDEO)) {
+		fprintf(stderr, "Could not initialize SDL - %s\n", SDL_GetError());
+		return;
+	}
+
+	env->initialized = 1;
+}
+
+/*
+ * Freeing all memory used and uninitialize
+ * the ffmpeg and SDL environments.
+ */
+static void ffmpeg_uninit(struct video_desc *env)
+{
+	if (!env) {
+		ast_log(LOG_WARNING, "ffmpeg_uninit on null\n");
+		return;
+	}
+	if(env->context) {
+		avcodec_close(env->context);
+		av_free(env->context);
+	}
+	if(env->frame)
+		av_free(env->frame);
+	if(env->data)
+		free(env->data);
+	if(env->bmp)
+		SDL_FreeYUVOverlay(env->bmp);
+	SDL_Quit();
+	bzero(env, sizeof(struct video_desc));
+	env->initialized = 0;
+}
+
+#define MAKE_MASK(bits)                ( (1<<(bits)) -1 )
+
+/*
+ * Get the P flag from the H.263+ header from the RTP payload (see RFC 2429).
+ */
+static inline unsigned int rfc2429_get_P(const uint8_t *header){
+	return (header[0]>>2) & 0x1;
+}
+
+/*
+ * Get the PLEN variable from the H.263+ header from the RTP payload (see RFC 2429).
+ */
+static inline unsigned int rfc2429_get_PLEN(const uint8_t *header){
+	unsigned short *p=(unsigned short*)header;
+return (ntohs(p[0])>>3) & MAKE_MASK(6);
+}
+
+/*
+ * It skips the extra header in the bitstream and constructs a valid
+ * H.263+ bitstream start code (see RFC 2429).
+ */
+static uint8_t *pre_process_data(uint8_t *data, int *len)
+{
+	int PLEN;
+	int P;
+
+	if(data == NULL)
+		return NULL;
+	if(*len < 2)
+		return NULL;
+
+	PLEN = rfc2429_get_PLEN(data);
+	P = rfc2429_get_P(data);
+
+	if(PLEN > 0) {
+		data += PLEN;
+		(*len) -= PLEN;
+	}
+	if(P)
+		data[0] = data[1] = 0;
+	else {
+		data += 2;
+		(*len) -= 2;
+	}
+
+	return data;
+}
+
+/*
+ * It decodes a valid H.263 frame.
+ * The av_parser_parse should merge a randomly choped up stream into proper frames.
+ * After that, if we have a valid frame, we decode it until the entire frame is processed.
+ */
+static int decode_video(struct video_desc *env)
+{
+	uint8_t *aux = env->data;
+	int len = env->datalen;
+	int ret;
+	uint8_t *data;
+	int datalen;
+
+	while(len) {
+		ret = av_parser_parse(env->parser, env->context, &data, &datalen, aux, len, 0, 0);
+		if(datalen) {
+			ret = avcodec_decode_video(env->context, env->frame, &(env->completed), data, datalen);
+			if(ret < 0) {
+				ast_log(LOG_NOTICE, "Errore nella decodifica\n");
+				return 0;
+			}
+			aux += ret;
+			len -= ret;
+		}
+	}
+
+	return 1;
+}
+
+/*
+ * It displays the decoded video frame using the SDL library.
+ * - Set the video mode to use the resolution specified by the codec context
+ * - Create a YUV Overlay to copy into it the decoded frame
+ * - After the decoded frame is copied into the overlay, we display it
+ * TODO: change the call img_convert(): it is deprecated.
+ */
+static void show_frame(struct video_desc *env)
+{
+	AVPicture pict;
+	SDL_Rect rect;
+
+	if(env->screen == NULL) {
+		env->screen = SDL_SetVideoMode(env->context->width, env->context->height, 0, 0);
+		if(!env->screen) {
+			ast_log(LOG_ERROR, "SDL: could not set video mode - exiting\n");
+			return;
+		}
+		SDL_WM_SetCaption("Asterisk console Video Output", NULL);
+	}
+
+	if(!env->bmp)
+		env->bmp = SDL_CreateYUVOverlay(env->context->width, env->context->height,
+			SDL_YV12_OVERLAY, env->screen);
+
+ast_log(LOG_WARNING, "locked sdl\n");
+	SDL_LockYUVOverlay(env->bmp);
+	pict.data[0] = env->bmp->pixels[0];
+	pict.data[1] = env->bmp->pixels[2];
+	pict.data[2] = env->bmp->pixels[1];
+	pict.linesize[0] = env->bmp->pitches[0];
+	pict.linesize[1] = env->bmp->pitches[2];
+	pict.linesize[2] = env->bmp->pitches[1];
+
+	img_convert(&pict, PIX_FMT_YUV420P,
+		(AVPicture *)env->frame, env->context->pix_fmt,
+		env->context->width, env->context->height);
+	SDL_UnlockYUVOverlay(env->bmp);
+ast_log(LOG_WARNING, "unlocked sdl\n");
+
+	rect.x = 0; rect.y = 0;
+	rect.w = env->context->width;
+	rect.h = env->context->height;
+	SDL_DisplayYUVOverlay(env->bmp, &rect);
+}
+
+/*
+ * This function is called (by asterisk) for each video fragment that needs to be processed.
+ * We need to recontruct the entire video before we can decode it.
+ * After a video fragment is received we have to:
+ * - clean the bitstream with pre_process_data()
+ * - append the bitstream in a buffer
+ * - if the fragment is the last (RTP Marker) we decode it with decode_video()
+ * - after the decoding is completed we display the decoded frame with show_frame()
+ */
+static int write_video(struct ast_channel *chan, struct ast_frame *f)
+{
+	uint8_t *data;
+	int len;
+	struct video_desc *env = get_video_desc(chan);
+
+	if(!env->initialized)
+		return -1;	/* error */
+
+#if defined(DROP_PACKETS) && DROP_PACKETS > 0
+	/*
+	* Fragment of code to simulate lost/delayed packets
+	*/
+	if((random() % 10000) <= 100*DROP_PACKETS) {
+		ast_log(LOG_NOTICE, "Packet lost [%d]\n", f->seqno);
+		return 0;
+	}
+#endif
+	/*
+	* If there is the discard flag, every packet must be discarded.
+	* When a marked packet arrive we can restart the decoding.
+	*/
+	if(env->discard) {
+		if(f->subclass & 0x01) {
+			free(env->data);
+			env->data = NULL;
+			env->datalen = 0;
+			env->lastrxframe = f->seqno;
+			env->discard = 0;
+		}
+		return 0;
+	}
+
+	/*
+	* Only ordered fragment will be accepted.
+	* We can handle the circular seqno with the following operation
+	* (seqno is a 16 bits number)
+	*/
+	if((env->lastrxframe+1)%0x10000 != f->seqno && env->lastrxframe != -1) {
+		env->discard = 1;
+		return 0;
+	}
+
+	len = f->datalen;
+	data = pre_process_data(f->data, &len);
+	if(env->data == NULL)
+		env->data = malloc(len);
+	else
+		env->data = realloc(env->data, env->datalen+len);
+	memcpy(env->data+env->datalen, data, len);
+	env->datalen += len;
+	if(f->subclass & 0x01) // RTP Marker
+		if(decode_video(env)) {
+			show_frame(env);
+			env->completed = 0;
+			free(env->data);
+			env->data = NULL;
+			env->datalen = 0;
+		}
+	env->lastrxframe = f->seqno;
+
+	return 0;
+}
+
+#else
+#define	AST_FORMAT_CUSTOM 0
+#endif	/* FFMPEG */
+
 /*!
  * Each sound is made of 'datalen' samples of sound, repeated as needed to
  * generate 'samplen' samples of data, then followed by 'silencelen' samples
@@ -382,8 +774,20 @@
 	char oss_read_buf[FRAME_SIZE * 2 + AST_FRIENDLY_OFFSET];
 	int readpos;				/*!< read position above */
 	struct ast_frame read_f;	/*!< returned by oss_read */
+
+#if HAVE_FFMPEG
+	struct video_desc env;
+#endif
 };
 
+#if HAVE_FFMPEG
+static struct video_desc *get_video_desc(struct ast_channel *c)
+{
+	struct chan_oss_pvt *o = c->tech_pvt;
+	return o ? &(o->env) : NULL;
+}
+#endif
+
 static struct chan_oss_pvt oss_default = {
 	.cursound = -1,
 	.sounddev = -1,
@@ -420,7 +824,10 @@
 static const struct ast_channel_tech oss_tech = {
 	.type = "Console",
 	.description = tdesc,
-	.capabilities = AST_FORMAT_SLINEAR,
+	/* Format that we need to process.
+	 * This option is overriden by the configuration file
+	 */
+	.capabilities = AST_FORMAT_SLINEAR | AST_FORMAT_CUSTOM,
 	.requester = oss_request,
 	.send_digit_begin = oss_digit_begin,
 	.send_digit_end = oss_digit_end,
@@ -430,6 +837,8 @@
 	.read = oss_read,
 	.call = oss_call,
 	.write = oss_write,
+	/* We need this to declare the capabilities to process video frame */
+	.write_video = write_video,
 	.indicate = oss_indicate,
 	.fixup = oss_fixup,
 };
@@ -865,6 +1274,9 @@
 	c->tech_pvt = NULL;
 	o->owner = NULL;
 	ast_verbose(" << Hangup on console >> \n");
+#if HAVE_FFMPEG
+	ffmpeg_uninit(&o->env);
+#endif
 	ast_module_unref(ast_module_info->self);
 	if (o->hookstate) {
 		if (o->autoanswer || o->autohangup) {
@@ -1029,6 +1441,11 @@
 		setformat(o, O_RDWR);
 	c->fds[0] = o->sounddev;	/* -1 if device closed, override later */
 	c->nativeformats = AST_FORMAT_SLINEAR;
+
+	/* if the console makes the call, add video */
+	if (state == 5)
+		c->nativeformats |= AST_FORMAT_CUSTOM;
+
 	c->readformat = AST_FORMAT_SLINEAR;
 	c->writeformat = AST_FORMAT_SLINEAR;
 	c->tech_pvt = o;
@@ -1055,6 +1472,13 @@
 		}
 	}
 
+#if HAVE_FFMPEG
+	/* Let's initialize the environment only if a new call arrives */
+	/* Initializations for ffmpeg decoding */
+	/* XXX This should be allocated for each video session */
+	ffmpeg_init(&o->env);
+#endif
+
 	return c;
 }
 
@@ -1219,6 +1643,7 @@
 
 	if (a->argc != e->args)
 		return CLI_SHOWUSAGE;
+	/* XXX this is similar to what is done in oss_hangup */
 	o->cursound = -1;
 	o->nosound = 0;
 	if (!o->owner && !o->hookstate) { /* XXX maybe only one ? */
@@ -1229,6 +1654,9 @@
 	if (o->owner)
 		ast_queue_hangup(o->owner);
 	setformat(o, O_CLOSE);
+#if HAVE_FFMPEG
+	ffmpeg_uninit(&o->env);
+#endif
 	return CLI_SUCCESS;
 }