[asterisk-commits] rizzo: branch group/video_console r89604 - /team/group/video_console/channels/
SVN commits to the Asterisk project
asterisk-commits at lists.digium.com
Mon Nov 26 12:23:55 CST 2007
Author: rizzo
Date: Mon Nov 26 12:23:55 2007
New Revision: 89604
URL: http://svn.digium.com/view/asterisk?view=rev&rev=89604
Log:
add file for video support
Added:
team/group/video_console/channels/console_video.c (with props)
Added: team/group/video_console/channels/console_video.c
URL: http://svn.digium.com/view/asterisk/team/group/video_console/channels/console_video.c?view=auto&rev=89604
==============================================================================
--- team/group/video_console/channels/console_video.c (added)
+++ team/group/video_console/channels/console_video.c Mon Nov 26 12:23:55 2007
@@ -1,0 +1,2784 @@
+/*
+ * Experimental support for video sessions. We use SDL for rendering, ffmpeg
+ * as the codec library for encoding and decoding, and Video4Linux and X11
+ * to generate the local video stream.
+ *
+ * If one of these pieces is not available, either at compile time or at
+ * runtime, we do our best to run without it. Of course, no codec library
+ * means we can only deal with raw data, no SDL means we cannot do rendering,
+ * no V4L or X11 means we cannot generate data (but in principle we could
+ * stream from or record to a file).
+ *
+ * We need a recent (2007.07.12 or newer) version of ffmpeg to avoid warnings.
+ * Older versions might give 'deprecated' messages during compilation,
+ * thus not compiling in AST_DEVMODE, or don't have swscale, in which case
+ * you can try to compile #defining OLD_FFMPEG here.
+ *
+ * $Revision$
+ */
+
+//#define DROP_PACKETS 5 /* if set, drop this % of video packets */
+//#define OLD_FFMPEG 1 /* set for old ffmpeg with no swscale */
+
+/*
+ * Codecs are absolutely necessary or we cannot do anything.
+ * In principle SDL is optional too (used for rendering only, but we
+ * could still source data withouth it), however at the moment it is required.
+ */
+#if defined(HAVE_FFMPEG) && defined(HAVE_SDL)
+
+/*
+The code is structured as follows.
+
+When a new console channel is created, we call console_video_start()
+to initialize SDL, the source, and the encoder/ decoder for the
+formats in use (XXX the latter two should be done later, once the
+codec negotiation is complete). Also, a thread is created to handle
+the video source and generate frames.
+
+While communication is on, the local source is generated by the
+video thread, which wakes up periodically, generates frames and
+enqueues them in chan->readq. Incoming rtp frames are passed to
+console_write_video(), decoded and passed to SDL for display.
+
+For as unfortunate and confusing as it can be, we need to deal with a
+number of different video representations (size, codec/pixel format,
+codec parameters), as follows:
+
+ loc_src is the data coming from the camera/X11/etc.
+ The format is typically constrained by the video source.
+
+ enc_in is the input required by the encoder.
+ Typically constrained in size by the encoder type.
+
+ enc_out is the bitstream transmitted over RTP.
+ Typically negotiated while the call is established.
+
+ loc_dpy is the format used to display the local video source.
+ Depending on user preferences this can have the same size as
+ loc_src_fmt, or enc_in_fmt, or thumbnail size (e.g. PiP output)
+
+ dec_in is the incoming RTP bitstream. Negotiated
+ during call establishment, it is not necessarily the same as
+ enc_in_fmt
+
+ dec_out the output of the decoder.
+ The format is whatever the other side sends, and the
+ buffer is allocated by avcodec_decode_... so we only
+ copy the data here.
+
+ rem_dpy the format used to display the remote stream
+
+We store the format info together with the buffer storing the data.
+As a future optimization, a format/buffer may reference another one
+if the formats are equivalent. This will save some unnecessary format
+conversion.
+
+
+In order to handle video you need to add to sip.conf (and presumably
+iax.conf too) the following:
+
+ [general](+)
+ videosupport=yes
+ allow=h263 ; this or other video formats
+ allow=h263p ; this or other video formats
+
+ */
+
+#include <X11/Xlib.h> /* this should be conditional */
+
+#include <ffmpeg/avcodec.h>
+#ifndef OLD_FFMPEG
+#include <ffmpeg/swscale.h> /* requires a recent ffmpeg */
+#endif
+
+#include <SDL/SDL.h>
+#include <SDL/SDL_image.h> /* for loading images */
+
+/*
+ * In many places we use buffers to store the raw frames (but not only),
+ * so here is a structure to keep all the info. data = NULL means the
+ * structure is not initialized, so the other fields are invalid.
+ * size = 0 means the buffer is not malloc'ed so we don't have to free it.
+ */
+struct fbuf_t { /* frame buffers, dynamically allocated */
+ uint8_t *data; /* memory, malloced if size > 0, just reference
+ * otherwise */
+ int size; /* total size in bytes */
+ int used; /* space used so far */
+ int ebit; /* bits to ignore at the end */
+ int w;
+ int h;
+ int pix_fmt;
+};
+
+struct video_codec_desc; /* forward declaration */
+/*
+ * Descriptor of the local source, made of the following pieces:
+ * + configuration info (geometry, device name, fps...). These are read
+ * from the config file and copied here before calling video_out_init();
+ * + the frame buffer (buf) and source pixel format, allocated at init time;
+ * + the encoding and RTP info, including timestamps to generate
+ * frames at the correct rate;
+ * + source-specific info, i.e. fd for /dev/video, dpy-image for x11, etc,
+ * filled in by video_open
+ * NOTE: loc_src.data == NULL means the rest of the struct is invalid, and
+ * the video source is not available.
+ */
+struct video_out_desc {
+ /* video device support.
+ * videodevice and geometry are read from the config file.
+ * At the right time we try to open it and allocate a buffer.
+ * If we are successful, webcam_bufsize > 0 and we can read.
+ */
+ /* all the following is config file info copied from the parent */
+ char videodevice[64];
+ int fps;
+ int bitrate;
+ int qmin;
+
+ int sendvideo;
+
+ struct fbuf_t loc_src; /* local source buffer, allocated in video_open() */
+ struct fbuf_t enc_in; /* encoder input buffer, allocated in video_out_init() */
+ struct fbuf_t enc_out; /* encoder output buffer, allocated in video_out_init() */
+ struct fbuf_t loc_dpy; /* display source buffer, no buffer (managed by SDL in bmp[1]) */
+ struct fbuf_t keypad_dpy; /* keypad source buffer, XXX */
+
+ struct video_codec_desc *enc; /* encoder */
+ AVCodecContext *enc_ctx; /* encoding context */
+ AVCodec *codec;
+ AVFrame *frame; /* The initial part is an AVPicture */
+ int mtu;
+ struct timeval last_frame; /* when we read the last frame ? */
+
+ /* device specific info */
+ int fd; /* file descriptor, for webcam */
+ Display *dpy; /* x11 grabber info */
+ XImage *image;
+};
+
+/*
+ * Descriptor for the incoming stream, with a buffer for the bitstream
+ * extracted by the RTP packets, RTP reassembly info, and a frame buffer
+ * for the decoded frame (buf).
+ * and store the result in a suitable frame buffer for later display.
+ * NOTE: dec_ctx == NULL means the rest is invalid (e.g. because no
+ * codec, no memory, etc.) and we must drop all incoming frames.
+ *
+ * Incoming payload is stored in one of the dec_in[] buffers, which are
+ * emptied by the video thread. These buffers are organized in a circular
+ * queue, with dec_in_cur being the buffer in use by the incoming stream,
+ * and dec_in_dpy is the one being displayed. When the pointers need to
+ * be changed, we synchronize the access to them with dec_in_lock.
+ * When the list is full dec_in_cur = NULL (we cannot store new data),
+ * when the list is empty dec_in_dpy is NULL (we cannot display frames).
+ */
+struct video_in_desc {
+ struct video_codec_desc *dec; /* decoder */
+ AVCodecContext *dec_ctx; /* information about the codec in the stream */
+ AVCodec *codec; /* reference to the codec */
+ AVFrame *d_frame; /* place to store the decoded frame */
+ AVCodecParserContext *parser;
+ uint16_t next_seq; /* must be 16 bit */
+ int discard; /* flag for discard status */
+#define N_DEC_IN 3 /* number of incoming buffers */
+ struct fbuf_t *dec_in_cur; /* buffer being filled in */
+ struct fbuf_t *dec_in_dpy; /* buffer to display */
+ ast_mutex_t dec_in_lock;
+ struct fbuf_t dec_in[N_DEC_IN]; /* incoming bitstream, allocated/extended in fbuf_append() */
+ struct fbuf_t dec_out; /* decoded frame, no buffer (data is in AVFrame) */
+ struct fbuf_t rem_dpy; /* display remote image, no buffer (it is in win[WIN_REMOTE].bmp) */
+};
+
+/*
+ * Each codec is defined by a number of callbacks
+ */
+/*! \brief initialize the encoder */
+typedef int (*encoder_init_f)(struct video_out_desc *v);
+
+/*! \brief actually call the encoder */
+typedef int (*encoder_encode_f)(struct video_out_desc *v);
+
+/*! \brief encapsulate the bistream in RTP frames */
+typedef struct ast_frame *(*encoder_encap_f)(struct video_out_desc *out,
+ struct ast_frame **tail);
+
+/*! \brief inizialize the decoder */
+typedef int (*decoder_init_f)(struct video_in_desc *v);
+
+/*! \brief extract the bitstream from RTP frames and store in the fbuf.
+ * return 0 if ok, 1 on error
+ */
+typedef int (*decoder_decap_f)(struct fbuf_t *b, uint8_t *data, int len);
+
+/*! \brief actually call the decoder */
+typedef int (*decoder_decode_f)(struct video_in_desc *v, struct fbuf_t *b);
+
+struct video_codec_desc {
+ const char *name; /* format name */
+ int format; /* AST_FORMAT_* */
+ encoder_init_f enc_init;
+ encoder_encap_f enc_encap;
+ encoder_encode_f enc_run;
+ decoder_init_f dec_init;
+ decoder_decap_f dec_decap;
+ decoder_decode_f dec_run;
+};
+
+/* our representation of a displayed window. SDL can only do one main
+ * window so we map everything within that one
+ */
+enum { WIN_LOCAL, WIN_REMOTE, WIN_KEYPAD, WIN_ALL, WIN_MAX };
+
+struct display_window {
+ SDL_Overlay *bmp;
+ SDL_Rect rect; /* loc. of images */
+};
+
+/*
+ * The overall descriptor, with room for config info, video source and
+ * received data descriptors, SDL info, etc.
+ */
+struct video_desc {
+ char codec_name[64]; /* the codec we use */
+
+ pthread_t vthread; /* video thread */
+ int shutdown; /* set to shutdown vthread */
+ struct ast_channel *owner; /* owner channel */
+
+ struct video_in_desc in; /* remote video descriptor */
+ struct video_out_desc out; /* local video descriptor */
+
+ /* support for display. */
+ int sdl_ok;
+ SDL_Surface *screen; /* the main window */
+ SDL_Surface *keypad; /* the pixmap for the keypad */
+ char keypad_file[256]; /* image for the keypad */
+ SDL_Surface *keypad_bg; /* the keypad background, mapping locations to keys */
+ char keypad_mask[256]; /* background for the keypad */
+ char inbuf[256]; /* buffer for keypad input */
+ int inbuf_pos; /* next free position in inbuf */
+ int outfd; /* fd for output */
+ struct display_window win[WIN_MAX];
+};
+
+/*! The list of video formats we support. */
+#define CONSOLE_FORMAT_VIDEO ( \
+ AST_FORMAT_H263_PLUS | AST_FORMAT_H263 | \
+ AST_FORMAT_MP4_VIDEO | \
+ AST_FORMAT_H264 | AST_FORMAT_H261)
+
+static AVPicture *fill_pict(struct fbuf_t *b, AVPicture *p);
+
+static void fbuf_free(struct fbuf_t *b)
+{
+ struct fbuf_t x = *b;
+
+ if (b->data && b->size)
+ ast_free(b->data);
+ bzero(b, sizeof(*b));
+ /* restore some fields */
+ b->w = x.w;
+ b->h = x.h;
+ b->pix_fmt = x.pix_fmt;
+}
+
+/*
+ * Append a chunk of data to a buffer taking care of bit alignment
+ * Return 0 on success, != 0 on failure
+ */
+static int fbuf_append(struct fbuf_t *b, uint8_t *src, int len,
+ int sbit, int ebit)
+{
+ /*
+ * Allocate buffer. ffmpeg wants an extra FF_INPUT_BUFFER_PADDING_SIZE,
+ * and also wants 0 as a buffer terminator to prevent trouble.
+ */
+ int need = len + FF_INPUT_BUFFER_PADDING_SIZE;
+ int i;
+ uint8_t *dst, mask;
+
+ if (b->data == NULL) {
+ b->size = need;
+ b->used = 0;
+ b->ebit = 0;
+ b->data = ast_calloc(1, b->size);
+ } else if (b->used + need > b->size) {
+ b->size = b->used + need;
+ b->data = ast_realloc(b->data, b->size);
+ }
+ if (b->data == NULL) {
+ ast_log(LOG_WARNING, "alloc failure for %d, discard\n",
+ b->size);
+ return 1;
+ }
+ if (b->used == 0 && b->ebit != 0) {
+ ast_log(LOG_WARNING, "ebit not reset at start\n");
+ b->ebit = 0;
+ }
+ dst = b->data + b->used;
+ i = b->ebit + sbit; /* bits to ignore around */
+ if (i == 0) { /* easy case, just append */
+ /* do everything in the common block */
+ } else if (i == 8) { /* easy too, just handle the overlap byte */
+ mask = (1 << b->ebit) - 1;
+ /* update the last byte in the buffer */
+ dst[-1] &= ~mask; /* clear bits to ignore */
+ dst[-1] |= (*src & mask); /* append new bits */
+ src += 1; /* skip and prepare for common block */
+ len --;
+ } else { /* must shift the new block, not done yet */
+ ast_log(LOG_WARNING, "must handle shift %d %d at %d\n",
+ b->ebit, sbit, b->used);
+ return 1;
+ }
+ memcpy(dst, src, len);
+ b->used += len;
+ b->ebit = ebit;
+ b->data[b->used] = 0; /* padding */
+ return 0;
+}
+
+/*!
+ * Build an ast_frame for a given chunk of data, and link it into
+ * the queue, with possibly 'head' bytes at the beginning to
+ * fill in some fields later.
+ */
+static struct ast_frame *create_video_frame(uint8_t *start, uint8_t *end,
+ int format, int head, struct ast_frame *prev)
+{
+ int len = end-start;
+ uint8_t *data;
+ struct ast_frame *f;
+
+ data = ast_calloc(1, len+head);
+ f = ast_calloc(1, sizeof(*f));
+ if (f == NULL || data == NULL) {
+ ast_log(LOG_WARNING, "--- frame error f %p data %p len %d format %d\n",
+ f, data, len, format);
+ if (f)
+ ast_free(f);
+ if (data)
+ ast_free(data);
+ return NULL;
+ }
+ memcpy(data+head, start, len);
+ f->data = data;
+ f->mallocd = AST_MALLOCD_DATA | AST_MALLOCD_HDR;
+ //f->has_timing_info = 1;
+ //f->ts = ast_tvdiff_ms(ast_tvnow(), out->ts);
+ f->datalen = len+head;
+ f->frametype = AST_FRAME_VIDEO;
+ f->subclass = format;
+ f->samples = 0;
+ f->offset = 0;
+ f->src = "Console";
+ f->delivery.tv_sec = 0;
+ f->delivery.tv_usec = 0;
+ f->seqno = 0;
+ AST_LIST_NEXT(f, frame_list) = NULL;
+
+ if (prev)
+ AST_LIST_NEXT(prev, frame_list) = f;
+
+ return f;
+}
+
+/* some debugging code to check the bitstream:
+ * declare a bit buffer, initialize it, and fetch data from it.
+ */
+struct bitbuf {
+ const uint8_t *base;
+ int bitsize; /* total size in bits */
+ int ofs; /* next bit to read */
+};
+
+static struct bitbuf bitbuf_init(const uint8_t *base, int bitsize, int start_ofs)
+{
+ struct bitbuf a;
+ a.base = base;
+ a.bitsize = bitsize;
+ a.ofs = start_ofs;
+ return a;
+}
+
+static int bitbuf_left(struct bitbuf *b)
+{
+ return b->bitsize - b->ofs;
+}
+
+static uint32_t getbits(struct bitbuf *b, int n)
+{
+ int i, ofs;
+ const uint8_t *d;
+ uint8_t mask;
+ uint32_t retval = 0;
+ if (n> 31) {
+ ast_log(LOG_WARNING, "too many bits %d, max 32\n", n);
+ return 0;
+ }
+ if (n + b->ofs > b->bitsize) {
+ ast_log(LOG_WARNING, "bitbuf overflow %d of %d\n", n + b->ofs, b->bitsize);
+ n = b->bitsize - b->ofs;
+ }
+ ofs = 7 - b->ofs % 8; /* start from msb */
+ mask = 1 << ofs;
+ d = b->base + b->ofs / 8; /* current byte */
+ for (i=0 ; i < n; i++) {
+ retval += retval + (*d & mask ? 1 : 0); /* shift in new byte */
+ b->ofs++;
+ mask >>= 1;
+ if (mask == 0) {
+ d++;
+ mask = 0x80;
+ }
+ }
+ return retval;
+}
+
+static void check_h261(struct fbuf_t *b)
+{
+ struct bitbuf a = bitbuf_init(b->data, b->used * 8, 0);
+ uint32_t x, y;
+
+ x = getbits(&a, 20); /* PSC, 0000 0000 0000 0001 0000 */
+ if (x != 0x10) {
+ ast_log(LOG_WARNING, "bad PSC 0x%x\n", x);
+ return;
+ }
+ x = getbits(&a, 5); /* temporal reference */
+ y = getbits(&a, 6); /* ptype */
+ if (0)
+ ast_log(LOG_WARNING, "size %d TR %d PTY spl %d doc %d freeze %d %sCIF hi %d\n",
+ b->used,
+ x,
+ (y & 0x20) ? 1 : 0,
+ (y & 0x10) ? 1 : 0,
+ (y & 0x8) ? 1 : 0,
+ (y & 0x4) ? "" : "Q",
+ (y & 0x2) ? 1:0);
+ while ( (x = getbits(&a, 1)) == 1)
+ ast_log(LOG_WARNING, "PSPARE 0x%x\n", getbits(&a, 8));
+ // ast_log(LOG_WARNING, "PSPARE 0 - start GOB LAYER\n");
+ while ( (x = bitbuf_left(&a)) > 0) {
+ // ast_log(LOG_WARNING, "GBSC %d bits left\n", x);
+ x = getbits(&a, 16); /* GBSC 0000 0000 0000 0001 */
+ if (x != 0x1) {
+ ast_log(LOG_WARNING, "bad GBSC 0x%x\n", x);
+ break;
+ }
+ x = getbits(&a, 4); /* group number */
+ y = getbits(&a, 5); /* gquant */
+ if (x == 0) {
+ ast_log(LOG_WARNING, " bad GN %d\n", x);
+ break;
+ }
+ while ( (x = getbits(&a, 1)) == 1)
+ ast_log(LOG_WARNING, "GSPARE 0x%x\n", getbits(&a, 8));
+ while ( (x = bitbuf_left(&a)) > 0) { /* MB layer */
+ break;
+ }
+ }
+}
+
+void dump_buf(struct fbuf_t *b);
+void dump_buf(struct fbuf_t *b)
+{
+ int i, x, last2lines;
+ char buf[80];
+
+ last2lines = (b->used - 16) & ~0xf;
+ ast_log(LOG_WARNING, "buf size %d of %d\n", b->used, b->size);
+ for (i = 0; i < b->used; i++) {
+ x = i & 0xf;
+ if ( x == 0) { /* new line */
+ if (i != 0)
+ ast_log(LOG_WARNING, "%s\n", buf);
+ bzero(buf, sizeof(buf));
+ sprintf(buf, "%04x: ", i);
+ }
+ sprintf(buf + 6 + x*3, "%02x ", b->data[i]);
+ if (i > 31 && i < last2lines)
+ i = last2lines - 1;
+ }
+ if (buf[0])
+ ast_log(LOG_WARNING, "%s\n", buf);
+}
+/*
+ * Here starts the glue code for the various supported video codecs.
+ * For each of them, we need to provide routines for initialization,
+ * calling the encoder, encapsulating the bitstream in ast_frames,
+ * extracting payload from ast_frames, and calling the decoder.
+ */
+
+/*--- h263+ support --- */
+
+/*! \brief initialization of h263p */
+static int h263p_enc_init(struct video_out_desc *v)
+{
+ /* modes supported are
+ - Unrestricted Motion Vector (annex D)
+ - Advanced Prediction (annex F)
+ - Advanced Intra Coding (annex I)
+ - Deblocking Filter (annex J)
+ - Slice Structure (annex K)
+ - Alternative Inter VLC (annex S)
+ - Modified Quantization (annex T)
+ */
+ v->enc_ctx->flags |=CODEC_FLAG_H263P_UMV; /* annex D */
+ v->enc_ctx->flags |=CODEC_FLAG_AC_PRED; /* annex f ? */
+ v->enc_ctx->flags |=CODEC_FLAG_H263P_SLICE_STRUCT; /* annex k */
+ v->enc_ctx->flags |= CODEC_FLAG_H263P_AIC; /* annex I */
+
+ v->enc_ctx->gop_size = v->fps*5; // emit I frame every 5 seconds
+ return 0;
+}
+
+
+/*
+ * Create RTP/H.263 fragments to avoid IP fragmentation. We fragment on a
+ * PSC or a GBSC, but if we don't find a suitable place just break somewhere.
+ * Everything is byte-aligned.
+ */
+static struct ast_frame *h263p_encap(struct video_out_desc *out,
+ struct ast_frame **tail)
+{
+ struct ast_frame *cur = NULL, *first = NULL;
+ uint8_t *d = out->enc_out.data;
+ int len = out->enc_out.used;
+ int l = len; /* size of the current fragment. If 0, must look for a psc */
+
+ for (;len > 0; len -= l, d += l) {
+ uint8_t *data;
+ struct ast_frame *f;
+ int i, h;
+
+ if (len >= 3 && d[0] == 0 && d[1] == 0 && d[2] >= 0x80) {
+ /* we are starting a new block, so look for a PSC. */
+ for (i = 3; i < len - 3; i++) {
+ if (d[i] == 0 && d[i+1] == 0 && d[i+2] >= 0x80) {
+ l = i;
+ break;
+ }
+ }
+ }
+ if (l > out->mtu || l > len) { /* psc not found, split */
+ l = MIN(len, out->mtu);
+ }
+ if (l < 1 || l > out->mtu) {
+ ast_log(LOG_WARNING, "--- frame error l %d\n", l);
+ break;
+ }
+
+ if (d[0] == 0 && d[1] == 0) { /* we start with a psc */
+ h = 0;
+ } else { /* no psc, create a header */
+ h = 2;
+ }
+
+ f = create_video_frame(d, d+l, AST_FORMAT_H263_PLUS, h, cur);
+ if (!f)
+ break;
+
+ data = f->data;
+ if (h == 0) { /* we start with a psc */
+ data[0] |= 0x04; // set P == 1, and we are done
+ } else { /* no psc, create a header */
+ data[0] = data[1] = 0; // P == 0
+ }
+
+ if (!cur)
+ first = f;
+ cur = f;
+ }
+
+ if (cur)
+ cur->subclass |= 1; // RTP Marker
+
+ *tail = cur; /* end of the list */
+ return first;
+}
+
+/*! \brief extract the bitstreem from the RTP payload.
+ * This is format dependent.
+ * For h263+, the format is defined in RFC 2429
+ * and basically has a fixed 2-byte header as follows:
+ * 5 bits RR reserved, shall be 0
+ * 1 bit P indicate a start/end condition,
+ * in which case the payload should be prepended
+ * by two zero-valued bytes.
+ * 1 bit V there is an additional VRC header after this header
+ * 6 bits PLEN length in bytes of extra picture header
+ * 3 bits PEBIT how many bits to be ignored in the last byte
+ *
+ * XXX the code below is not complete.
+ */
+static int h263p_decap(struct fbuf_t *b, uint8_t *data, int len)
+{
+ int PLEN;
+
+ if (len < 2) {
+ ast_log(LOG_WARNING, "invalid framesize %d\n", len);
+ return 1;
+ }
+ PLEN = ( (data[0] & 1) << 5 ) | ( (data[1] & 0xf8) >> 3);
+
+ if (PLEN > 0) {
+ data += PLEN;
+ len -= PLEN;
+ }
+ if (data[0] & 4) /* bit P */
+ data[0] = data[1] = 0;
+ else {
+ data += 2;
+ len -= 2;
+ }
+ return fbuf_append(b, data, len, 0, 0); /* ignore trail bits */
+}
+
+
+/*
+ * generic encoder, used by the various protocols supported here.
+ * We assume that the buffer is empty at the beginning.
+ */
+static int ffmpeg_encode(struct video_out_desc *v)
+{
+ struct fbuf_t *b = &v->enc_out;
+ int i;
+
+ b->used = avcodec_encode_video(v->enc_ctx, b->data, b->size, v->frame);
+ i = avcodec_encode_video(v->enc_ctx, b->data + b->used, b->size - b->used, NULL); /* delayed frames ? */
+ if (i > 0) {
+ ast_log(LOG_WARNING, "have %d more bytes\n", i);
+ b->used += i;
+ }
+ return 0;
+}
+
+/*
+ * Generic decoder, which is used by h263p, h263 and h261 as it simply
+ * invokes ffmpeg's decoder.
+ * av_parser_parse should merge a randomly chopped up stream into
+ * proper frames. After that, if we have a valid frame, we decode it
+ * until the entire frame is processed.
+ */
+static int ffmpeg_decode(struct video_in_desc *v, struct fbuf_t *b)
+{
+ uint8_t *src = b->data;
+ int srclen = b->used;
+ int full_frame = 0;
+
+ if (srclen == 0) /* no data */
+ return 0;
+ if (0)
+ check_h261(b);
+ // ast_log(LOG_WARNING, "rx size %d\n", srclen);
+ while (srclen) {
+ uint8_t *data;
+ int datalen, ret;
+ int len = av_parser_parse(v->parser, v->dec_ctx, &data, &datalen, src, srclen, 0, 0);
+
+ src += len;
+ srclen -= len;
+ /* The parser might return something it cannot decode, so it skips
+ * the block returning no data
+ */
+ if (data == NULL || datalen == 0)
+ continue;
+ ret = avcodec_decode_video(v->dec_ctx, v->d_frame, &full_frame, data, datalen);
+ if (full_frame == 1) /* full frame */
+ break;
+ if (ret < 0) {
+ ast_log(LOG_NOTICE, "Error decoding\n");
+ break;
+ }
+ }
+ if (srclen != 0) /* update b with leftover data */
+ bcopy(src, b->data, srclen);
+ b->used = srclen;
+ b->ebit = 0;
+ return full_frame;
+}
+
+static struct video_codec_desc h263p_codec = {
+ .name = "h263p",
+ .format = AST_FORMAT_H263_PLUS,
+ .enc_init = h263p_enc_init,
+ .enc_encap = h263p_encap,
+ .enc_run = ffmpeg_encode,
+ .dec_init = NULL,
+ .dec_decap = h263p_decap,
+ .dec_run = ffmpeg_decode
+};
+
+/*--- Plain h263 support --------*/
+
+static int h263_enc_init(struct video_out_desc *v)
+{
+ /* XXX check whether these are supported */
+ v->enc_ctx->flags |= CODEC_FLAG_H263P_UMV;
+ v->enc_ctx->flags |= CODEC_FLAG_H263P_AIC;
+ v->enc_ctx->flags |= CODEC_FLAG_H263P_SLICE_STRUCT;
+ v->enc_ctx->flags |= CODEC_FLAG_AC_PRED;
+
+ v->enc_ctx->gop_size = v->fps*5;
+
+ return 0;
+}
+
+/*
+ * h263 encapsulation is specified in RFC2190. There are three modes
+ * defined (A, B, C), with 4, 8 and 12 bytes of header, respectively.
+ * The header is made as follows
+ * 0.....................|.......................|.............|....31
+ * F:1 P:1 SBIT:3 EBIT:3 SRC:3 I:1 U:1 S:1 A:1 R:4 DBQ:2 TRB:3 TR:8
+ * FP = 0- mode A, (only one word of header)
+ * FP = 10 mode B, and also means this is an I or P frame
+ * FP = 11 mode C, and also means this is a PB frame.
+ * SBIT, EBIT nuber of bits to ignore at beginning (msbits) and end (lsbits)
+ * SRC bits 6,7,8 from the h263 PTYPE field
+ * I = 0 intra-coded, 1 = inter-coded (bit 9 from PTYPE)
+ * U = 1 for Unrestricted Motion Vector (bit 10 from PTYPE)
+ * S = 1 for Syntax Based Arith coding (bit 11 from PTYPE)
+ * A = 1 for Advanced Prediction (bit 12 from PTYPE)
+ * R = reserved, must be 0
+ * DBQ = differential quantization, DBQUANT from h263, 0 unless we are using
+ * PB frames
+ * TRB = temporal reference for bframes, also 0 unless this is a PB frame
+ * TR = temporal reference for P frames, also 0 unless PB frame.
+ *
+ * Mode B and mode C description omitted.
+ *
+ * An RTP frame can start with a PSC 0000 0000 0000 0000 1000 0
+ * or with a GBSC, which also has the first 17 bits as a PSC.
+ * Note - PSC are byte-aligned, GOB not necessarily. PSC start with
+ * PSC:22 0000 0000 0000 0000 1000 00 picture start code
+ * TR:8 .... .... temporal reference
+ * PTYPE:13 or more ptype...
+ * If we don't fragment a GOB SBIT and EBIT = 0.
+ * reference, 8 bit)
+ *
+ * The assumption below is that we start with a PSC.
+ */
+static struct ast_frame *h263_encap(struct video_out_desc *out,
+ struct ast_frame **tail)
+{
+ uint8_t *d = out->enc_out.data;
+ int start = 0, i, len = out->enc_out.used;
+ struct ast_frame *f, *cur = NULL, *first = NULL;
+ const int pheader_len = 4; /* Use RFC-2190 Mode A */
+ uint8_t h263_hdr[12]; /* worst case, room for a type c header */
+ uint8_t *h = h263_hdr; /* shorthand */
+
+#define H263_MIN_LEN 6
+ if (len < H263_MIN_LEN) /* unreasonably small */
+ return NULL;
+
+ bzero(h263_hdr, sizeof(h263_hdr));
+ /* Now set the header bytes. Only type A by now,
+ * and h[0] = h[2] = h[3] = 0 by default.
+ * PTYPE starts 30 bits in the picture, so the first useful
+ * bit for us is bit 36 i.e. within d[4] (0 is the msbit).
+ * SRC = d[4] & 0x1c goes into data[1] & 0xe0
+ * I = d[4] & 0x02 goes into data[1] & 0x10
+ * U = d[4] & 0x01 goes into data[1] & 0x08
+ * S = d[5] & 0x80 goes into data[1] & 0x04
+ * A = d[5] & 0x40 goes into data[1] & 0x02
+ * R = 0 goes into data[1] & 0x01
+ * Optimizing it, we have
+ */
+ h[1] = ( (d[4] & 0x1f) << 3 ) | /* SRC, I, U */
+ ( (d[5] & 0xc0) >> 5 ); /* S, A, R */
+
+ /* now look for the next PSC or GOB header. First try to hit
+ * a '0' byte then look around for the 0000 0000 0000 0000 1 pattern
+ * which is both in the PSC and the GBSC.
+ */
+ for (i = H263_MIN_LEN, start = 0; start < len; start = i, i += 3) {
+ //ast_log(LOG_WARNING, "search at %d of %d/%d\n", i, start, len);
+ for (; i < len ; i++) {
+ uint8_t x, rpos, lpos;
+ int rpos_i; /* index corresponding to rpos */
+ if (d[i] != 0) /* cannot be in a GBSC */
+ continue;
+ if (i > len - 1)
+ break;
+ x = d[i+1];
+ if (x == 0) /* next is equally good */
+ continue;
+ /* see if around us we can make 16 '0' bits for the GBSC.
+ * Look for the first bit set on the right, and then
+ * see if we have enough 0 on the left.
+ * We are guaranteed to end before rpos == 0
+ */
+ for (rpos = 0x80, rpos_i = 8; rpos; rpos >>= 1, rpos_i--)
+ if (x & rpos) /* found the '1' bit in GBSC */
+ break;
+ x = d[i-1]; /* now look behind */
+ for (lpos = rpos; lpos ; lpos >>= 1)
+ if (x & lpos) /* too early, not a GBSC */
+ break;
+ if (lpos) /* as i said... */
+ continue;
+ /* now we have a GBSC starting somewhere in d[i-1],
+ * but it might be not byte-aligned
+ */
+ if (rpos == 0x80) { /* lucky case */
+ i = i - 1;
+ } else { /* XXX to be completed */
+ ast_log(LOG_WARNING, "unaligned GBSC 0x%x %d\n",
+ rpos, rpos_i);
+ }
+ break;
+ }
+ /* This frame is up to offset i (not inclusive).
+ * We do not split it yet even if larger than MTU.
+ */
+ f = create_video_frame(d + start, d+i, AST_FORMAT_H263,
+ pheader_len, cur);
+
+ if (!f)
+ break;
+ bcopy(h, f->data, 4); /* copy the h263 header */
+ /* XXX to do: if not aligned, fix sbit and ebit,
+ * then move i back by 1 for the next frame
+ */
+ if (!cur)
+ first = f;
+ cur = f;
+ }
+
+ if (cur)
+ cur->subclass |= 1; // RTP Marker
+
+ *tail = cur;
+ return first;
+}
+
+/* XXX We only drop the header here, but maybe we need more. */
+static int h263_decap(struct fbuf_t *b, uint8_t *data, int len)
+{
+ if (len < 4) {
+ ast_log(LOG_WARNING, "invalid framesize %d\n", len);
+ return 1; /* error */
+ }
+
+ if ( (data[0] & 0x80) == 0) {
+ len -= 4;
+ data += 4;
+ } else {
+ ast_log(LOG_WARNING, "unsupported mode 0x%x\n",
+ data[0]);
+ return 1;
+ }
+ return fbuf_append(b, data, len, 0, 0); /* XXX no bit alignment support yet */
+}
+
+static struct video_codec_desc h263_codec = {
+ .name = "h263",
+ .format = AST_FORMAT_H263,
+ .enc_init = h263_enc_init,
+ .enc_encap = h263_encap,
+ .enc_run = ffmpeg_encode,
+ .dec_init = NULL,
+ .dec_decap = h263_decap,
+ .dec_run = ffmpeg_decode
+
+};
+
+/*---- h261 support -----*/
+static int h261_enc_init(struct video_out_desc *v)
+{
+ /* It is important to set rtp_payload_size = 0, otherwise
+ * ffmpeg in h261 mode will produce output that it cannot parse.
+ * Also try to send I frames more frequently than with other codecs.
+ */
+ v->enc_ctx->rtp_payload_size = 0; /* important - ffmpeg fails otherwise */
+ v->enc_ctx->gop_size = v->fps*2; /* be more responsive */
+
+ return 0;
+}
+
+/*
+ * The encapsulation of H261 is defined in RFC4587 which obsoletes RFC2032
+ * The bitstream is preceded by a 32-bit header word:
+ * SBIT:3 EBIT:3 I:1 V:1 GOBN:4 MBAP:5 QUANT:5 HMVD:5 VMVD:5
+ * SBIT and EBIT are the bits to be ignored at beginning and end,
+ * I=1 if the stream has only INTRA frames - cannot change during the stream.
+ * V=0 if motion vector is not used. Cannot change.
+ * GOBN is the GOB number in effect at the start of packet, 0 if we
+ * start with a GOB header
+ * QUANT is the quantizer in effect, 0 if we start with GOB header
+ * HMVD reference horizontal motion vector. 10000 is forbidden
+ * VMVD reference vertical motion vector, as above.
+ * Packetization should occur at GOB boundaries, and if not possible
+ * with MacroBlock fragmentation. However it is likely that blocks
+ * are not bit-aligned so we must take care of this.
+ */
+static struct ast_frame *h261_encap(struct video_out_desc *out,
+ struct ast_frame **tail)
+{
+ uint8_t *d = out->enc_out.data;
+ int start = 0, i, len = out->enc_out.used;
+ struct ast_frame *f, *cur = NULL, *first = NULL;
+ const int pheader_len = 4;
+ uint8_t h261_hdr[4];
+ uint8_t *h = h261_hdr; /* shorthand */
+ int sbit = 0, ebit = 0;
+
+#define H261_MIN_LEN 10
+ if (len < H261_MIN_LEN) /* unreasonably small */
+ return NULL;
+
+ bzero(h261_hdr, sizeof(h261_hdr));
+
+ /* Similar to the code in h263_encap, but the marker there is longer.
+ * Start a few bytes within the bitstream to avoid hitting the marker
+ * twice. Note we might access the buffer at len, but this is ok because
+ * the caller has it oversized.
+ */
+ for (i = H261_MIN_LEN, start = 0; start < len - 1; start = i, i += 4) {
+#if 0 /* test - disable packetization */
+ i = len; /* wrong... */
+#else
+ int found = 0, found_ebit = 0; /* last GBSC position found */
+ for (; i < len ; i++) {
+ uint8_t x, rpos, lpos;
+ if (d[i] != 0) /* cannot be in a GBSC */
+ continue;
+ x = d[i+1];
+ if (x == 0) /* next is equally good */
+ continue;
+ /* See if around us we find 15 '0' bits for the GBSC.
+ * Look for the first bit set on the right, and then
+ * see if we have enough 0 on the left.
+ * We are guaranteed to end before rpos == 0
+ */
+ for (rpos = 0x80, ebit = 7; rpos; ebit--, rpos >>= 1)
+ if (x & rpos) /* found the '1' bit in GBSC */
+ break;
+ x = d[i-1]; /* now look behind */
+ for (lpos = (rpos >> 1); lpos ; lpos >>= 1)
+ if (x & lpos) /* too early, not a GBSC */
+ break;
+ if (lpos) /* as i said... */
+ continue;
+ /* now we have a GBSC starting somewhere in d[i-1],
+ * but it might be not byte-aligned. Just remember it.
+ */
+ if (i - start > out->mtu) /* too large, stop now */
+ break;
+ found_ebit = ebit;
+ found = i;
+ i += 4; /* continue forward */
+ }
+ if (i >= len) { /* trim if we went too forward */
+ i = len;
+ ebit = 0; /* hopefully... should ask the bitstream ? */
+ }
+ if (i - start > out->mtu && found) {
+ /* use the previous GBSC, hope is within the mtu */
+ i = found;
+ ebit = found_ebit;
+ }
+#endif /* test */
+ if (i - start < 4) /* XXX too short ? */
+ continue;
+ /* This frame is up to offset i (not inclusive).
+ * We do not split it yet even if larger than MTU.
+ */
+ f = create_video_frame(d + start, d+i, AST_FORMAT_H261,
+ pheader_len, cur);
+
+ if (!f)
+ break;
+ /* recompute header with I=0, V=1 */
+ h[0] = ( (sbit & 7) << 5 ) | ( (ebit & 7) << 2 ) | 1;
+ bcopy(h, f->data, 4); /* copy the h261 header */
+ if (ebit) /* not aligned, restart from previous byte */
+ i--;
+ sbit = (8 - ebit) & 7;
+ ebit = 0;
+ if (!cur)
+ first = f;
+ cur = f;
+ }
+ if (cur)
+ cur->subclass |= 1; // RTP Marker
+
+ *tail = cur;
+ return first;
+}
+
+/*
+ * Pieces might be unaligned so we really need to put them together.
+ */
+static int h261_decap(struct fbuf_t *b, uint8_t *data, int len)
+{
+ int ebit, sbit;
+
+ if (len < 8) {
+ ast_log(LOG_WARNING, "invalid framesize %d\n", len);
+ return 1;
+ }
+ sbit = (data[0] >> 5) & 7;
+ ebit = (data[0] >> 2) & 7;
+ len -= 4;
+ data += 4;
+ return fbuf_append(b, data, len, sbit, ebit);
+}
+
+static struct video_codec_desc h261_codec = {
+ .name = "h261",
+ .format = AST_FORMAT_H261,
+ .enc_init = h261_enc_init,
+ .enc_encap = h261_encap,
+ .enc_run = ffmpeg_encode,
+ .dec_init = NULL,
+ .dec_decap = h261_decap,
+ .dec_run = ffmpeg_decode
+};
+
+/* mpeg4 support */
+static int mpeg4_enc_init(struct video_out_desc *v)
+{
+#if 0
+ //v->enc_ctx->flags |= CODEC_FLAG_LOW_DELAY; /*don't use b frames ?*/
+ v->enc_ctx->flags |= CODEC_FLAG_AC_PRED;
+ v->enc_ctx->flags |= CODEC_FLAG_H263P_UMV;
+ v->enc_ctx->flags |= CODEC_FLAG_QPEL;
+ v->enc_ctx->flags |= CODEC_FLAG_4MV;
+ v->enc_ctx->flags |= CODEC_FLAG_GMC;
+ v->enc_ctx->flags |= CODEC_FLAG_LOOP_FILTER;
+ v->enc_ctx->flags |= CODEC_FLAG_H263P_SLICE_STRUCT;
+#endif
+ v->enc_ctx->gop_size = v->fps*5;
+ v->enc_ctx->rtp_payload_size = 0; /* important - ffmpeg fails otherwise */
+ return 0;
+}
+
+/* simplistic encapsulation - just split frames in mtu-size units */
+static struct ast_frame *mpeg4_encap(struct video_out_desc *out,
+ struct ast_frame **tail)
+{
+ struct ast_frame *f, *cur = NULL, *first = NULL;
+ uint8_t *d = out->enc_out.data;
+ uint8_t *end = d+out->enc_out.used;
+ int len;
+
+ for (;d < end; d += len, cur = f) {
+ len = MIN(out->mtu, end-d);
+ f = create_video_frame(d, d+len, AST_FORMAT_MP4_VIDEO, 0, cur);
+ if (!f)
+ break;
+ if (!first)
+ first = f;
+ }
+ if (cur)
+ cur->subclass |= 1;
+ *tail = cur;
+ return first;
+}
+
+static int mpeg4_decap(struct fbuf_t *b, uint8_t *data, int len)
+{
+ return fbuf_append(b, data, len, 0, 0);
+}
+
+static int mpeg4_decode(struct video_in_desc *v, struct fbuf_t *b)
+{
+ int full_frame = 0, datalen = b->used;
+ int ret = avcodec_decode_video(v->dec_ctx, v->d_frame, &full_frame,
+ b->data, datalen);
+ if (ret < 0) {
+ ast_log(LOG_NOTICE, "Error decoding\n");
+ ret = datalen; /* assume we used everything. */
+ }
+ datalen -= ret;
+ if (datalen > 0) /* update b with leftover bytes */
+ bcopy(b->data + ret, b->data, datalen);
+ b->used = datalen;
+ b->ebit = 0;
+ return full_frame;
+}
+
+static struct video_codec_desc mpeg4_codec = {
+ .name = "mpeg4",
+ .format = AST_FORMAT_MP4_VIDEO,
+ .enc_init = mpeg4_enc_init,
+ .enc_encap = mpeg4_encap,
+ .enc_run = ffmpeg_encode,
+ .dec_init = NULL,
+ .dec_decap = mpeg4_decap,
+ .dec_run = mpeg4_decode
+};
+
+static int h264_enc_init(struct video_out_desc *v)
+{
+ v->enc_ctx->flags |= CODEC_FLAG_TRUNCATED;
+ //v->enc_ctx->flags |= CODEC_FLAG_GLOBAL_HEADER;
+ //v->enc_ctx->flags2 |= CODEC_FLAG2_FASTPSKIP;
+ /* TODO: Maybe we need to add some other flags */
+ v->enc_ctx->gop_size = v->fps*5; // emit I frame every 5 seconds
+ v->enc_ctx->rtp_mode = 0;
+ v->enc_ctx->rtp_payload_size = 0;
+ v->enc_ctx->bit_rate_tolerance = v->enc_ctx->bit_rate;
+ return 0;
+}
+
+static int h264_dec_init(struct video_in_desc *v)
+{
+ v->dec_ctx->flags |= CODEC_FLAG_TRUNCATED;
+
+ return 0;
+}
+
+/*
+ * The structure of a generic H.264 stream is:
+ * - 0..n 0-byte(s), unused, optional. one zero-byte is always present
+ * in the first NAL before the start code prefix.
+ * - start code prefix (3 bytes): 0x000001
+ * (the first bytestream has a
+ * like these 0x00000001!)
+ * - NAL header byte ( F[1] | NRI[2] | Type[5] ) where type != 0
+ * - byte-stream
+ * - 0..n 0-byte(s) (padding, unused).
+ * Segmentation in RTP only needs to be done on start code prefixes.
+ * If fragments are too long... we don't support it yet.
+ * - encapsulate (or fragment) the byte-stream (with NAL header included)
+ */
+static struct ast_frame *h264_encap(struct video_out_desc *out,
+ struct ast_frame **tail)
+{
+ struct ast_frame *f = NULL, *cur = NULL, *first = NULL;
+ uint8_t *d, *start = out->enc_out.data;
+ uint8_t *end = start + out->enc_out.used;
+
+ /* Search the first start code prefix - ITU-T H.264 sec. B.2,
+ * and move start right after that, on the NAL header byte.
+ */
+#define HAVE_NAL(x) (x[-4] == 0 && x[-3] == 0 && x[-2] == 0 && x[-1] == 1)
+ for (start += 4; start < end; start++) {
+ int ty = start[0] & 0x1f;
+ if (HAVE_NAL(start) && ty != 0 && ty != 31)
+ break;
+ }
+ /* if not found, or too short, we just skip the next loop and are done. */
+
+ /* Here follows the main loop to create frames. Search subsequent start
+ * codes, and then possibly fragment the unit into smaller fragments.
+ */
+ for (;start < end - 4; start = d) {
+ int size; /* size of current block */
+ uint8_t hdr[2]; /* add-on header when fragmenting */
+ int ty = 0;
+
+ /* now search next nal */
+ for (d = start + 4; d < end; d++) {
+ ty = d[0] & 0x1f;
+ if (HAVE_NAL(d))
+ break; /* found NAL */
+ }
+ /* have a block to send. d past the start code unless we overflow */
+ if (d >= end) { /* NAL not found */
+ d = end + 4;
+ } else if (ty == 0 || ty == 31) { /* found but invalid type, skip */
+ ast_log(LOG_WARNING, "skip invalid nal type %d at %d of %d\n",
+ ty, d - out->enc_out.data, out->enc_out.used);
+ continue;
+ }
+
+ size = d - start - 4; /* don't count the end */
+
+ if (size < out->mtu) { // test - don't fragment
+ // Single NAL Unit
+ f = create_video_frame(start, d - 4, AST_FORMAT_H264, 0, cur);
+ if (!f)
+ break;
+ if (!first)
+ first = f;
+
+ cur = f;
+ continue;
+ }
+
+ // Fragmented Unit (Mode A: no DON, very weak)
+ hdr[0] = (*start & 0xe0) | 28; /* mark as a fragmentation unit */
+ hdr[1] = (*start++ & 0x1f) | 0x80 ; /* keep type and set START bit */
+ size--; /* skip the NAL header */
+ while (size) {
+ uint8_t *data;
+ int frag_size = MIN(size, out->mtu);
+
+ f = create_video_frame(start, start+frag_size, AST_FORMAT_H264, 2, cur);
+ if (!f)
+ break;
+ size -= frag_size; /* skip this data block */
+ start += frag_size;
+
+ data = f->data;
+ data[0] = hdr[0];
+ data[1] = hdr[1] | (size == 0 ? 0x40 : 0); /* end bit if we are done */
+ hdr[1] &= ~0x80; /* clear start bit for subsequent frames */
+ if (!first)
+ first = f;
+ cur = f;
+ }
+ }
+
+ if (cur)
+ cur->subclass |= 1; // RTP Marker
+
+ *tail = cur;
+
+ return first;
+}
+
+static int h264_decap(struct fbuf_t *b, uint8_t *data, int len)
+{
+ /* Start Code Prefix (Annex B in specification) */
+ uint8_t scp[] = { 0x00, 0x00, 0x00, 0x01 };
+ int retval = 0;
+ int type, ofs = 0;
+
+ if (len < 2) {
+ ast_log(LOG_WARNING, "--- invalid len %d\n", len);
+ return 1;
+ }
+ /* first of all, check if the packet has F == 0 */
+ if (data[0] & 0x80) {
+ ast_log(LOG_WARNING, "--- forbidden packet; nal: %02x\n",
+ data[0]);
+ return 1;
+ }
+
[... 1542 lines stripped ...]
More information about the asterisk-commits
mailing list