Hi. The attached patch adds back OpenMP support to the VNG interpolation method. This slightly modified patch should fix the flaws that were discovered in my original patch.
As far as I know, this should be the last performance critical part of ufraw that was lacking MP support. Interpolation, color smoothing, development all have MP support now. Are there any other areas I should be looking at? -- Bruce Guenter <[email protected]> http://untroubled.org/
diff --git a/dcraw_indi.c b/dcraw_indi.c
index e6505dc..74eeebf 100644
--- a/dcraw_indi.c
+++ b/dcraw_indi.c
@@ -440,16 +440,17 @@ void CLASS vng_interpolate_INDI(ushort (*image)[4], const
unsigned filters,
+1,-1,+1,+1,0,0x88, +1,+0,+1,+2,0,0x08, +1,+0,+2,-1,0,0x40,
+1,+0,+2,+1,0,0x10
}, chood[] = { -1,-1, -1,0, -1,+1, 0,+1, +1,+1, +1,0, +1,-1, 0,-1 };
- ushort (*brow[5])[4], *pix;
+ ushort (*brow[4])[4], *pix;
int prow=7, pcol=1, *ip, *code[16][16], gval[8], gmin, gmax, sum[4];
int row, col, x, y, x1, x2, y1, y2, t, weight, grads, color, diag;
int g, diff, thold, num, c;
+ ushort rowtmp[4][width*4];
lin_interpolate_INDI(image, filters, width, height, colors, dcraw); /*UF*/
dcraw_message (dcraw, DCRAW_VERBOSE,_("VNG interpolation...\n")); /*UF*/
if (filters == 1) prow = pcol = 15;
- ip = (int *) calloc ((prow+1)*(pcol+1), 1280);
+ int *ipalloc = ip = (int *) calloc ((prow+1)*(pcol+1), 1280);
merror (ip, "vng_interpolate()");
for (row=0; row <= prow; row++) /* Precalculate for VNG */
for (col=0; col <= pcol; col++) {
@@ -481,11 +482,19 @@ void CLASS vng_interpolate_INDI(ushort (*image)[4], const
unsigned filters,
*ip++ = 0;
}
}
- brow[4] = (ushort (*)[4]) calloc (width*3, sizeof **brow);
- merror (brow[4], "vng_interpolate()");
- for (row=0; row < 3; row++)
- brow[row] = brow[4] + row*width;
- for (row=2; row < height-2; row++) { /* Do VNG interpolation */
+#ifdef _OPENMP
+#pragma omp parallel \
+ default(none) \
+ shared(image,code,prow,pcol) \
+
private(row,col,g,brow,rowtmp,pix,ip,gval,diff,gmin,gmax,thold,sum,color,num,c,t)
+#endif
+ {
+ int slice = (height - 4) / uf_omp_get_num_threads();
+ int start_row = 2 + slice * uf_omp_get_thread_num();
+ int end_row = MIN(start_row + slice, height - 2);
+ for (row=start_row; row < end_row; row++) { /* Do VNG interpolation */
+ for (g = 0; g < 4; g++)
+ brow[g] = &rowtmp[(row + g - 2) % 4];
for (col=2; col < width-2; col++) {
pix = image[row*width+col];
ip = code[row & prow][col & pcol];
@@ -529,15 +538,15 @@ void CLASS vng_interpolate_INDI(ushort (*image)[4], const
unsigned filters,
brow[2][col][c] = CLIP(t);
}
}
- if (row > 3) /* Write buffer to image */
+ if (row > start_row+1) /* Write buffer to image */
memcpy (image[(row-2)*width+2], brow[0]+2, (width-4)*sizeof *image);
- for (g=0; g < 4; g++)
- brow[(g-1) & 3] = brow[g];
}
- memcpy (image[(row-2)*width+2], brow[0]+2, (width-4)*sizeof *image);
- memcpy (image[(row-1)*width+2], brow[1]+2, (width-4)*sizeof *image);
- free (brow[4]);
- free (code[0][0]);
+ if (row == height - 2) {
+ memcpy (image[(row-2)*width+2], brow[0]+2, (width-4)*sizeof *image);
+ memcpy (image[(row-1)*width+2], brow[1]+2, (width-4)*sizeof *image);
+ }
+ }
+ free(ipalloc);
}
/*
pgp7XEchdS4pz.pgp
Description: PGP signature
------------------------------------------------------------------------------ Let Crystal Reports handle the reporting - Free Crystal Reports 2008 30-Day trial. Simplify your report design, integration and deployment - and focus on what you do best, core application coding. Discover what's new with Crystal Reports now. http://p.sf.net/sfu/bobj-july
_______________________________________________ ufraw-devel mailing list [email protected] https://lists.sourceforge.net/lists/listinfo/ufraw-devel
