Hi,
here follows my patch. Please excuse the long delay.
The original code uses explicitly programmed loop unrolling. I did not do that
because I assume that the compiler can do this much more efficiently.
BTW: Has anyone tried to use Intel's compilers? This would give a boost in
execution speed.
Michael
On Thursday 19 February 2004 18.35, you wrote:
> >This is what I did. The luminance channel of the matte is used in a
> > convex combination of the input sources thus using the [16,235]/219
> > range.
>
> Well, post your patch, already. You fixed a bug! :)
>
> -matt m.
--
/*
* matteblend - reads three frame-interlaced YUV4MPEG streams from stdin
* and blends the second over the first using the third's
* luminance channel as a matte
*
* Copyright (C) 2001, pHilipp Zabel <[EMAIL PROTECTED]>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include "yuv4mpeg.h"
static void usage (void) {
fprintf (stderr, "usage: matteblend.flt\n"
"no params at the moment - color saturation falloff or such has to be implemented\n");
}
static void blend (unsigned char *src0[3], unsigned char *src1[3], unsigned char *matte[3],
unsigned int width, unsigned int height,
unsigned char *dst[3])
{
register unsigned int i,j;
register unsigned int len = width * height;
for (i=0; i<len; i+=4) {
dst[0][i] = ((235 - matte[0][i] ) * src0[0][i] + (matte[0][i] - 16 ) * src1[0][i]) / 219;
dst[0][i+1] = ((235 - matte[0][i+1] ) * src0[0][i+1] + (matte[0][i+1] - 16 ) * src1[0][i+1]) / 219;
dst[0][i+2] = ((235 - matte[0][i+2] ) * src0[0][i+2] + (matte[0][i+2] - 16 ) * src1[0][i+2]) / 219;
dst[0][i+3] = ((235 - matte[0][i+3] ) * src0[0][i+3] + (matte[0][i+3] - 16 ) * src1[0][i+3]) / 219;
}
len>>=2; /* len = len / 4 */
/* do we really have to "downscale" matte here? */
for (i=0,j=0; i<len; i++, j+=2) {
int m = (matte[0][j] + matte[0][j+1] + matte[0][j+width] + matte[0][j+width+1]) >> 2;
if ((j % width) == (width - 2)) j += width;
dst[1][i] = ((235-m) * src0[1][i] + (m-16) * src1[1][i]) / 219;
dst[2][i] = ((235-m) * src0[2][i] + (m-16) * src1[2][i]) / 219;
}
}
int main (int argc, char *argv[])
{
int in_fd = 0; /* stdin */
int out_fd = 1; /* stdout */
unsigned char *yuv0[3]; /* input 0 */
unsigned char *yuv1[3]; /* input 1 */
unsigned char *yuv2[3]; /* input 2 */
unsigned char *yuv[3]; /* output */
y4m_stream_info_t streaminfo;
y4m_frame_info_t frameinfo;
int i;
int w, h;
if (argc > 1) {
usage ();
exit (1);
}
y4m_init_stream_info (&streaminfo);
y4m_init_frame_info (&frameinfo);
i = y4m_read_stream_header (in_fd, &streaminfo);
if (i != Y4M_OK) {
fprintf (stderr, "%s: input stream error - %s\n",
argv[0], y4m_strerr(i));
exit (1);
}
w = y4m_si_get_width(&streaminfo);
h = y4m_si_get_height(&streaminfo);
yuv[0] = malloc (w * h);
yuv0[0] = malloc (w * h);
yuv1[0] = malloc (w * h);
yuv2[0] = malloc (w * h);
yuv[1] = malloc (w * h / 4);
yuv0[1] = malloc (w * h / 4);
yuv1[1] = malloc (w * h / 4);
yuv2[1] = malloc (w * h / 4);
yuv[2] = malloc (w * h / 4);
yuv0[2] = malloc (w * h / 4);
yuv1[2] = malloc (w * h / 4);
yuv2[2] = malloc (w * h / 4);
y4m_write_stream_header (out_fd, &streaminfo);
while (1) {
i = y4m_read_frame(in_fd, &streaminfo, &frameinfo, yuv0);
if (i == Y4M_ERR_EOF)
exit (0);
else if (i != Y4M_OK)
exit (1);
i = y4m_read_frame(in_fd, &streaminfo, &frameinfo, yuv1);
if (i != Y4M_OK)
exit (1);
i = y4m_read_frame(in_fd, &streaminfo, &frameinfo, yuv2);
if (i != Y4M_OK)
exit (1);
/* constrain matte luma */
for (i = 0; i < w*h; i++) {
if (yuv2[0][i] < 16) yuv2[0][i] = 16;
else
if (yuv2[0][i] > 235) yuv2[0][i] = 235;
}
blend (yuv0, yuv1, yuv2, w, h, yuv);
y4m_write_frame (out_fd, &streaminfo, &frameinfo, yuv);
}
}