commit 4df594b3f48679f594e6f738981cb3baca8a42d9
Author:     Mattias Andrée <maand...@kth.se>
AuthorDate: Sun May 7 16:11:31 2017 +0200
Commit:     Mattias Andrée <maand...@kth.se>
CommitDate: Sun May 7 16:11:31 2017 +0200

    Add support for floats
    
    Signed-off-by: Mattias Andrée <maand...@kth.se>

diff --git a/README b/README
index 1437aa6..5155331 100644
--- a/README
+++ b/README
@@ -24,6 +24,9 @@ UTILITIES
        blind-concat(1)
               Concatenate videos
 
+       blind-convert(1)
+              Change pixel format of a video
+
        blind-crop(1)
               Extract subframes for all frames
 
diff --git a/man/blind-convert.1 b/man/blind-convert.1
new file mode 100644
index 0000000..dd87b78
--- /dev/null
+++ b/man/blind-convert.1
@@ -0,0 +1,47 @@
+.TH BLIND-CONVERT 1 blind
+.SH NAME
+blind-convert - Change pixel format of a video
+.SH SYNOPSIS
+.B blind-convert
+.IR pixel-format " ..."
+.SH DESCRIPTION
+.B blind-convert
+converts a video to use the pixel format
+.I pixel-format
+(all arguments are joined together with
+a blank space). Available formats are:
+.TP
+.B xyza f
+Unscaled CIE XYZ with unscaled alpha channel, in the
+order, X, Y, Z, alpha. Each subpixel value is stored
+as a single-precision floating-point number using the
+local machines endianness.
+.TP
+.B xyza !f
+Unscaled CIE XYZ with unscaled alpha channel, in the
+order, X, Y, Z, alpha. Each subpixel value is stored
+as a double-precision floating-point number using the
+local machines endianness. This is the default format
+in other commands.
+.TP
+.B xyza
+Unscaled CIE XYZ with unscaled alpha channel, in the
+order, X, Y, Z, alpha. Each subpixel value is stored
+in the same format as in the input stream.
+.TP
+.B f
+The same colour space as the input stream, but with
+each subpixel value is stored as a single-precision
+floating-point number using the local machines
+endianness.
+.TP
+.B !f
+The same colour space as the input stream, but with
+each subpixel value is stored as a double-precision
+floating-point number using the local machines
+endianness.
+.SH SEE ALSO
+.BR blind (7)
+.SH AUTHORS
+Mattias Andrée
+.RI < maand...@kth.se >
diff --git a/man/blind-from-image.1 b/man/blind-from-image.1
index 9759ab8..7e6d057 100644
--- a/man/blind-from-image.1
+++ b/man/blind-from-image.1
@@ -30,7 +30,8 @@ Arbitrary Map image.
 .BR blind-write-head (1),
 .BR blind-next-frame (1),
 .BR blind-from-text (1),
-.BR blind-repeat (1)
+.BR blind-repeat (1),
+.BR blind-convert (1)
 .SH AUTHORS
 Mattias Andrée
 .RI < maand...@kth.se >
diff --git a/man/blind-from-video.1 b/man/blind-from-video.1
index 652639e..9172af2 100644
--- a/man/blind-from-video.1
+++ b/man/blind-from-video.1
@@ -3,6 +3,8 @@
 blind-from-video - Converts a regular, cooked video to a blind video
 .SH SYNOPSIS
 .B blind-from-video
+[-F
+.IR pixel-format ]
 [-r
 .IR frame-rate ]
 [-w
@@ -43,6 +45,11 @@ flag, you should also use it in
 .BR blind-to-video (1),
 otherwise the colours will be modified.
 .TP
+.BR -F " "\fIpixel-format\fP
+Select pixel format, see
+.BR blind-convert (1)
+for more information.
+.TP
 .BR -h " "\fIheight\fP
 Change the height of the video to
 .IR height .
diff --git a/man/blind-single-colour.1 b/man/blind-single-colour.1
index a10ea24..2e3d675 100644
--- a/man/blind-single-colour.1
+++ b/man/blind-single-colour.1
@@ -7,6 +7,8 @@ blind-single-colour - Generate a single-colour video
 .I frames
 | -f
 .RB ' inf ']
+[-F
+.IR pixel-format ]
 -w
 .I width
 -h
@@ -50,6 +52,11 @@ head (works fine with most
 tools) but the video will be printed to stdout until there are
 no processes with an open read end to this process's stdout.
 .TP
+.BR -F " "\fIpixel-format\fP
+Select pixel format, see
+.BR blind-convert (1)
+for more information.
+.TP
 .BR -w " "\fIwidth\fP
 The width of the video, in pixels.
 .TP
diff --git a/man/blind.7 b/man/blind.7
index e878e78..cc3256b 100644
--- a/man/blind.7
+++ b/man/blind.7
@@ -34,6 +34,9 @@ Compress a video for network transmission
 .BR blind-concat (1)
 Concatenate videos
 .TP
+.BR blind-convert (1)
+Change pixel format of a video
+.TP
 .BR blind-crop (1)
 Extract subframes for all frames
 .TP
diff --git a/src/blind-arithm.c b/src/blind-arithm.c
index e8c1f95..4b540a6 100644
--- a/src/blind-arithm.c
+++ b/src/blind-arithm.c
@@ -18,40 +18,56 @@ static int skip_z = 0;
 /* Because the syntax for a function returning a function pointer is 
disgusting. */
 typedef void (*process_func)(struct stream *left, struct stream *right, size_t 
n);
 
-#define LIST_OPERATORS\
-       X(add, *lh += rh)\
-       X(sub, *lh -= rh)\
-       X(mul, *lh *= rh)\
-       X(div, *lh /= rh)\
-       X(exp, *lh = pow(*lh, rh))\
-       X(log, *lh = log(*lh) / log(rh))\
-       X(min, *lh = MIN(*lh, rh))\
-       X(max, *lh = MAX(*lh, rh))\
-       X(abs, *lh = fabs(*lh - rh) + rh)
+#define LIST_OPERATORS(PIXFMT, TYPE, SUFFIX)\
+       X(add, *lh += rh,                                 PIXFMT, TYPE)\
+       X(sub, *lh -= rh,                                 PIXFMT, TYPE)\
+       X(mul, *lh *= rh,                                 PIXFMT, TYPE)\
+       X(div, *lh /= rh,                                 PIXFMT, TYPE)\
+       X(exp, *lh = pow##SUFFIX(*lh, rh),                PIXFMT, TYPE)\
+       X(log, *lh = log##SUFFIX(*lh) / log##SUFFIX(rh),  PIXFMT, TYPE)\
+       X(min, *lh = MIN(*lh, rh),                        PIXFMT, TYPE)\
+       X(max, *lh = MAX(*lh, rh),                        PIXFMT, TYPE)\
+       X(abs, *lh = fabs##SUFFIX(*lh - rh) + rh,         PIXFMT, TYPE)
 
-#define C(CH, CHI, ALGO)\
-       (!skip_##CH ? ((lh = ((double *)(left->buf + i)) + (CHI),\
-                       rh = ((double *)(right->buf + i))[CHI],\
+#define C(CH, CHI, ALGO, TYPE)\
+       (!skip_##CH ? ((lh = ((TYPE *)(left->buf + i)) + (CHI),\
+                       rh = ((TYPE *)(right->buf + i))[CHI],\
                        (ALGO)), 0) : 0)
 
-#define X(NAME, ALGO)\
+#define X(NAME, ALGO, PIXFMT, TYPE)\
        static void\
-       process_lf_##NAME(struct stream *left, struct stream *right, size_t n)\
+       process_##PIXFMT##_##NAME(struct stream *left, struct stream *right, 
size_t n)\
        {\
                size_t i;\
-               double *lh, rh;\
-               for (i = 0; i < n; i += 4 * sizeof(double))\
-                       C(x, 0, ALGO), C(y, 1, ALGO), C(z, 2, ALGO), C(a, 3, 
ALGO);\
+               TYPE *lh, rh;\
+               for (i = 0; i < n; i += 4 * sizeof(TYPE)) {\
+                       C(x, 0, ALGO, TYPE);\
+                       C(y, 1, ALGO, TYPE);\
+                       C(z, 2, ALGO, TYPE);\
+                       C(a, 3, ALGO, TYPE);\
+               }\
        }
-LIST_OPERATORS
+LIST_OPERATORS(xyza, double,)
+LIST_OPERATORS(xyzaf, float, f)
 #undef X
 
 static process_func
-get_lf_process(const char *operation)
+get_process_xyza(const char *operation)
 {
-#define X(NAME, ALGO)\
-       if (!strcmp(operation, #NAME)) return process_lf_##NAME;
-LIST_OPERATORS
+#define X(NAME, ALGO, PIXFMT, TYPE)\
+       if (!strcmp(operation, #NAME)) return process_##PIXFMT##_##NAME;
+       LIST_OPERATORS(xyza, double,)
+#undef X
+       eprintf("algorithm not recognised: %s\n", operation);
+       return NULL;
+}
+
+static process_func
+get_process_xyzaf(const char *operation)
+{
+#define X(NAME, ALGO, PIXFMT, TYPE)\
+       if (!strcmp(operation, #NAME)) return process_##PIXFMT##_##NAME;
+       LIST_OPERATORS(xyzaf, float, f)
 #undef X
        eprintf("algorithm not recognised: %s\n", operation);
        return NULL;
@@ -87,7 +103,9 @@ main(int argc, char *argv[])
        eopen_stream(&right, argv[1]);
 
        if (!strcmp(left.pixfmt, "xyza"))
-               process = get_lf_process(argv[0]);
+               process = get_process_xyza(argv[0]);
+       else if (!strcmp(left.pixfmt, "xyza f"))
+               process = get_process_xyzaf(argv[0]);
        else
                eprintf("pixel format %s is not supported, try xyza\n", 
left.pixfmt);
 
diff --git a/src/blind-dissolve.c b/src/blind-dissolve.c
index 719cd28..1efe2a3 100644
--- a/src/blind-dissolve.c
+++ b/src/blind-dissolve.c
@@ -8,31 +8,24 @@
 USAGE("[-r]")
 
 static size_t fm;
-static double fmd;
-
-static void
-process_xyza(struct stream *stream, size_t n, size_t f)
-{
-       size_t i;
-       double a;
-       for (i = 0; i < n; i += stream->pixel_size) {
-               a = ((double *)(stream->buf + i))[3];
-               a = a * (double)(fm - f) / fmd;
-               ((double *)(stream->buf + i))[3] = a;
-       }
-}
-
-static void
-process_xyza_r(struct stream *stream, size_t n, size_t f)
-{
-       size_t i;
-       double a;
-       for (i = 0; i < n; i += stream->pixel_size) {
-               a = ((double *)(stream->buf + i))[3];
-               a = a * (double)f / fmd;
-               ((double *)(stream->buf + i))[3] = a;
-       }
-}
+static double fm_double;
+static float fm_float;
+
+#define PROCESS(TYPE, NREV)\
+       do {\
+               size_t i;\
+               TYPE a;\
+               for (i = 0; i < n; i += stream->pixel_size) {\
+                       a = ((TYPE *)(stream->buf + i))[3];\
+                       a = a * (TYPE)(NREV f) / fm_##TYPE;\
+                       ((TYPE *)(stream->buf + i))[3] = a;\
+               }\
+       } while (0)
+
+static void process_xyza   (struct stream *stream, size_t n, size_t f) 
{PROCESS(double, fm -);}
+static void process_xyza_r (struct stream *stream, size_t n, size_t f) 
{PROCESS(double,);}
+static void process_xyzaf  (struct stream *stream, size_t n, size_t f) 
{PROCESS(float, fm -);}
+static void process_xyzaf_r(struct stream *stream, size_t n, size_t f) 
{PROCESS(float,);}
 
 int
 main(int argc, char *argv[])
@@ -56,12 +49,15 @@ main(int argc, char *argv[])
 
        if (!strcmp(stream.pixfmt, "xyza"))
                process = reverse ? process_xyza_r : process_xyza;
+       else if (!strcmp(stream.pixfmt, "xyza f"))
+               process = reverse ? process_xyzaf_r : process_xyzaf;
        else
                eprintf("pixel format %s is not supported, try xyza\n", 
stream.pixfmt);
 
        fprint_stream_head(stdout, &stream);
        efflush(stdout, "<stdout>");
-       fmd = fm = stream.frames - 1;
+       fm_double = fm = stream.frames - 1;
+       fm_float = (float)fm_double;
        process_each_frame_segmented(&stream, STDOUT_FILENO, "<stdout>", 
process);
        return 0;
 }
diff --git a/src/blind-from-text.c b/src/blind-from-text.c
index 6bd5921..37a0799 100644
--- a/src/blind-from-text.c
+++ b/src/blind-from-text.c
@@ -9,24 +9,25 @@
 
 USAGE("")
 
-static void
-process_xyza(void)
-{
-       double buf[BUFSIZ / sizeof(double)];
-       size_t i;
-       int r, done = 0;
+#define PROCESS(TYPE, FMT)\
+       do {\
+               TYPE buf[BUFSIZ / sizeof(TYPE)];\
+               size_t i;\
+               int r, done = 0;\
+               while (!done) {\
+                       for (i = 0; i < ELEMENTSOF(buf); i += (size_t)r) {\
+                               r = scanf("%"FMT, buf + i);\
+                               if (r == EOF) {\
+                                       done = 1;\
+                                       break;\
+                               }\
+                       }\
+                       ewriteall(STDOUT_FILENO, buf, i * sizeof(*buf), 
"<stdout>");\
+               }\
+       } while (0)
 
-       while (!done) {
-               for (i = 0; i < ELEMENTSOF(buf); i += (size_t)r) {
-                       r = scanf("%lf", buf + i);
-                       if (r == EOF) {
-                               done = 1;
-                               break;
-                       }
-               }
-               ewriteall(STDOUT_FILENO, buf, i * sizeof(*buf), "<stdout>");
-       }
-}
+static void process_xyza (void) {PROCESS(double, "lf");}
+static void process_xyzaf(void) {PROCESS(float, "f");}
 
 int
 main(int argc, char *argv[])
@@ -61,6 +62,8 @@ main(int argc, char *argv[])
 
        if (!strcmp(stream.pixfmt, "xyza"))
                process = process_xyza;
+       else if (!strcmp(stream.pixfmt, "xyza f"))
+               process = process_xyzaf;
        else
                eprintf("pixel format %s is not supported, try xyza\n", 
stream.pixfmt);
 
diff --git a/src/blind-from-video.c b/src/blind-from-video.c
index d34e2aa..4c35e66 100644
--- a/src/blind-from-video.c
+++ b/src/blind-from-video.c
@@ -9,9 +9,10 @@
 #include <stdlib.h>
 #include <string.h>
 
-USAGE("[-r frame-rate] [-w width -h height] [-dL] input-file output-file")
+USAGE("[-F pixel-format] [-r frame-rate] [-w width -h height] [-dL] input-file 
output-file")
 
 static int draft = 0;
+static void (*convert_segment)(char *buf, size_t n, int fd, const char *file);
 
 static void
 read_metadata(FILE *fp, char *fname, size_t *width, size_t *height)
@@ -78,45 +79,57 @@ get_metadata(char *file, size_t *width, size_t *height)
                exit(1);
 }
 
+#define CONVERT_SEGMENT(TYPE, SUFFIX)\
+       do {\
+               typedef TYPE pixel_t[4];\
+               size_t i, ptr;\
+               TYPE y, u, v, max = (TYPE)UINT16_MAX;\
+               TYPE r, g, b;\
+               pixel_t pixels[1024];\
+               if (draft) {\
+                       for (ptr = i = 0; ptr < n; ptr += 8) {\
+                               pixels[i][3] = 1;\
+                               y = (long int)(le16toh(((uint16_t *)(buf + 
ptr))[1])) -  16L * 256L;\
+                               u = (long int)(le16toh(((uint16_t *)(buf + 
ptr))[2])) - 128L * 256L;\
+                               v = (long int)(le16toh(((uint16_t *)(buf + 
ptr))[3])) - 128L * 256L;\
+                               scaled_yuv_to_ciexyz##SUFFIX(y, u, v, pixels[i] 
+ 0,\
+                                                            pixels[i] + 1, 
pixels[i] + 2);\
+                               if (++i == 1024) {\
+                                       i = 0;\
+                                       ewriteall(fd, pixels, sizeof(pixels), 
file);\
+                               }\
+                       }\
+               } else {\
+                       for (ptr = i = 0; ptr < n; ptr += 8) {\
+                               pixels[i][3] = le16toh(((uint16_t *)(buf + 
ptr))[0]) / max;\
+                               y = ((long int)le16toh(((uint16_t *)(buf + 
ptr))[1]) -  16L * 256L) / max;\
+                               u = ((long int)le16toh(((uint16_t *)(buf + 
ptr))[2]) - 128L * 256L) / max;\
+                               v = ((long int)le16toh(((uint16_t *)(buf + 
ptr))[3]) - 128L * 256L) / max;\
+                               yuv_to_srgb##SUFFIX(y, u, v, &r, &g, &b);\
+                               r = srgb_decode##SUFFIX(r);\
+                               g = srgb_decode##SUFFIX(g);\
+                               b = srgb_decode##SUFFIX(b);\
+                               srgb_to_ciexyz##SUFFIX(r, g, b, pixels[i] + 0, 
pixels[i] + 1, pixels[i] + 2);\
+                               if (++i == 1024) {\
+                                       i = 0;\
+                                       ewriteall(fd, pixels, sizeof(pixels), 
file);\
+                               }\
+                       }\
+               }\
+               if (i)\
+                       ewriteall(fd, pixels, i * sizeof(*pixels), file);\
+       } while (0)
+
 static void
-convert_segment(char *buf, size_t n, int fd, const char *file)
+convert_segment_xyza(char *buf, size_t n, int fd, const char *file)
 {
-       typedef double pixel_t[4];
-       size_t i, ptr;
-       double y, u, v, max = (double)UINT16_MAX;
-       double r, g, b;
-       pixel_t pixels[1024];
-       if (draft) {
-               for (ptr = i = 0; ptr < n; ptr += 8) {
-                       pixels[i][3] = 1;
-                       y = (long int)(le16toh(((uint16_t *)(buf + ptr))[1])) - 
 16L * 256L;
-                       u = (long int)(le16toh(((uint16_t *)(buf + ptr))[2])) - 
128L * 256L;
-                       v = (long int)(le16toh(((uint16_t *)(buf + ptr))[3])) - 
128L * 256L;
-                       scaled_yuv_to_ciexyz(y, u, v, pixels[i] + 0, pixels[i] 
+ 1, pixels[i] + 2);
-                       if (++i == 1024) {
-                               i = 0;
-                               ewriteall(fd, pixels, sizeof(pixels), file);
-                       }
-               }
-       } else {
-               for (ptr = i = 0; ptr < n; ptr += 8) {
-                       pixels[i][3] = le16toh(((uint16_t *)(buf + ptr))[0]) / 
max;
-                       y = ((long int)le16toh(((uint16_t *)(buf + ptr))[1]) -  
16L * 256L) / max;
-                       u = ((long int)le16toh(((uint16_t *)(buf + ptr))[2]) - 
128L * 256L) / max;
-                       v = ((long int)le16toh(((uint16_t *)(buf + ptr))[3]) - 
128L * 256L) / max;
-                       yuv_to_srgb(y, u, v, &r, &g, &b);
-                       r = srgb_decode(r);
-                       g = srgb_decode(g);
-                       b = srgb_decode(b);
-                       srgb_to_ciexyz(r, g, b, pixels[i] + 0, pixels[i] + 1, 
pixels[i] + 2);
-                       if (++i == 1024) {
-                               i = 0;
-                               ewriteall(fd, pixels, sizeof(pixels), file);
-                       }
-               }
-       }
-       if (i)
-               ewriteall(fd, pixels, i * sizeof(*pixels), file);
+       CONVERT_SEGMENT(double,);
+}
+
+static void
+convert_segment_xyzaf(char *buf, size_t n, int fd, const char *file)
+{
+       CONVERT_SEGMENT(float, _f);
 }
 
 static void
@@ -181,6 +194,7 @@ main(int argc, char *argv[])
        char *infile;
        const char *outfile;
        char *data;
+       const char *pixfmt = "xyza";
        ssize_t headlen;
        size_t length, frame_size;
        int outfd, skip_length = 0;
@@ -193,6 +207,9 @@ main(int argc, char *argv[])
        case 'L':
                skip_length = 1;
                break;
+       case 'F':
+               pixfmt = UARGF();
+               break;
        case 'r':
                frame_rate = UARGF();
                break;
@@ -212,6 +229,14 @@ main(int argc, char *argv[])
        infile = argv[0];
        outfile = argv[1];
 
+       pixfmt = get_pixel_format(pixfmt, "xyza");
+       if (!strcmp(pixfmt, "xyza"))
+               convert_segment = convert_segment_xyza;
+       else if (!strcmp(pixfmt, "xyza f"))
+               convert_segment = convert_segment_xyzaf;
+       else
+               eprintf("pixel format %s is not supported, try xyza\n", pixfmt);
+
        if (!width)
                get_metadata(infile, &width, &height);
        if (width > SIZE_MAX / height)
@@ -231,7 +256,7 @@ main(int argc, char *argv[])
        }
 
        if (skip_length) {
-               SPRINTF_HEAD_ZN(head, frames, width, height, "xyza", &headlen);
+               SPRINTF_HEAD_ZN(head, frames, width, height, pixfmt, &headlen);
                ewriteall(outfd, head, (size_t)headlen, outfile);
        }
 
@@ -246,7 +271,7 @@ main(int argc, char *argv[])
        frames = length / frame_size;
 
        if (!skip_length) {
-               SPRINTF_HEAD_ZN(head, frames, width, height, "xyza", &headlen);
+               SPRINTF_HEAD_ZN(head, frames, width, height, pixfmt, &headlen);
                ewriteall(outfd, head, (size_t)headlen, outfile);
                data = mmap(0, length + (size_t)headlen, PROT_READ | 
PROT_WRITE, MAP_SHARED, outfd, 0);
                memmove(data + headlen, data, length);
diff --git a/src/blind-gauss-blur.c b/src/blind-gauss-blur.c
index 3796fa4..f4c99c0 100644
--- a/src/blind-gauss-blur.c
+++ b/src/blind-gauss-blur.c
@@ -20,111 +20,14 @@ static int auto_spread = 0;
 static size_t jobs = 1;
 static size_t spread = 0;
 
-static void
-process_xyza(char *restrict output, char *restrict cbuf, char *restrict sbuf,
-            struct stream *colour, struct stream *sigma, size_t cn, size_t sn)
-{
-       typedef double pixel_t[4];
-
-       pixel_t *restrict clr = (pixel_t *)cbuf;
-       pixel_t *restrict sig = (pixel_t *)sbuf;
-       pixel_t *img = (pixel_t *)output;
-       pixel_t c, k;
-       size_t x1, y1, i1, x2, y2, i2;
-       double d, m;
-       int i, blurred, blur[3] = {0, 0, 0};
-       size_t start, end, x2start, x2end, y2start, y2end;
-       int is_master;
-       pid_t *children;
-
-       y2start = x2start = 0;
-       x2end = colour->width;
-       y2end = colour->height;
-
-       if (chroma || !noalpha) {
-               start = 0, end = colour->height;
-               is_master = efork_jobs(&start, &end, jobs, &children);
-
-               /* premultiply alpha channel */
-               if (!noalpha) {
-                       i1 = start * colour->width;
-                       for (y1 = start; y1 < end; y1++) {
-                               for (x1 = 0; x1 < colour->width; x1++, i1++) {
-                                       clr[i1][0] *= clr[i1][3];
-                                       clr[i1][1] *= clr[i1][3];
-                                       clr[i1][2] *= clr[i1][3];
-                               }
-                       }
-               }
-
-               /* convert colour model */
-               if (chroma) {
-                       i1 = start * colour->width;
-                       for (y1 = start; y1 < end; y1++) {
-                               for (x1 = 0; x1 < colour->width; x1++, i1++) {
-                                       clr[i1][0] = clr[i1][0] / D65_XYZ_X - 
clr[i1][1];
-                                       clr[i1][2] = clr[i1][2] / D65_XYZ_Z - 
clr[i1][1];
-                                       /*
-                                        * Explaination:
-                                        *   Y is the luma and ((X / Xn - Y / 
Yn), (Z / Zn - Y / Yn))
-                                        *   is the chroma (according to 
CIELAB), where (Xn, Yn, Zn)
-                                        *   is the white point.
-                                        */
-                               }
-                       }
-               }
-               /* Conversion makes no difference if blur is applied to all
-                * parameters:
-                * 
-                * Gaussian blur:
-                * 
-                *                  ∞ ∞
-                *                  ⌠ ⌠ V(x,y)  −((x−x₀)² + 
(y−y₀)²)/(2σ²)
-                *     V′ (x₀,y₀) = │ │ ────── e          
                  dxdy
-                *       σ          ⌡ ⌡  2πσ²
-                *                −∞ −∞
-                * 
-                * With linear transformation, F:
-                * 
-                *                      ∞ ∞
-                *                      ⌠ ⌠ F(V(x,y))  −((x−x₀)² + 
(y−y₀)²)/(2σ²)
-                *     V′ (x₀,y₀) = F⁻¹ │ │ 
───────── e                           dxdy
-                *       σ              ⌡ ⌡    2πσ²
-                *                     −∞ −∞
-                * 
-                *                      ∞ ∞
-                *                      ⌠ ⌠  ⎛V(x,y)  −((x−x₀)² + 
(y−y₀)²)/(2σ²)⎞
-                *     V′ (x₀,y₀) = F⁻¹ │ │ F⎜────── 
e                          ⎟ dxdy
-                *       σ              ⌡ ⌡  ⎝ 2πσ²                   
          ⎠
-                *                     −∞ −∞
-                * 
-                *                            ∞ ∞
-                *                            ⌠ ⌠ V(x,y)  −((x−x₀)² 
+ (y−y₀)²)/(2σ²)
-                *     V′ (x₀,y₀) = (F⁻¹ ∘ F) │ │ 
────── e                           dxdy
-                *       σ                    ⌡ ⌡  2πσ²
-                *                           −∞ −∞
-                * 
-                *                  ∞ ∞
-                *                  ⌠ ⌠ V(x,y)  −((x−x₀)² + 
(y−y₀)²)/(2σ²)
-                *     V′ (x₀,y₀) = │ │ ────── e          
                 dxdy
-                *       σ          ⌡ ⌡  2πσ²
-                *                 −∞ −∞
-                * 
-                * Just like expected, the colour space should not affect the
-                * result of guassian blur as long as it is linear.
-                */
-
-               ejoin_jobs(is_master, children);
-       }
-
-       /*
-        * This is not a regular simple gaussian blur implementation.
-        * This implementation is able to apply different levels of
-        * blur on different pixels. It's therefore written a bit
-        * oldly. Instead of going through each pixel and calculate
-        * the new value for each pixel, it goes through each pixel
-        * and smears it out to the other pixels.
-        */
+/*
+ * This is not a regular simple gaussian blur implementation.
+ * This implementation is able to apply different levels of
+ * blur on different pixels. It's therefore written a bit
+ * oldly. Instead of going through each pixel and calculate
+ * the new value for each pixel, it goes through each pixel
+ * and smears it out to the other pixels.
+ */
 
 #define BLUR_PIXEL_PROLOGUE(DIR)\
        if (sig[i1][3] == 0)\
@@ -163,13 +66,13 @@ process_xyza(char *restrict output, char *restrict cbuf, 
char *restrict sbuf,
        if (auto_spread && spread < 1)\
                spread = 1;
 
-#define BLUR_PIXEL(START, LOOP, DISTANCE)\
+#define BLUR_PIXEL(START, LOOP, DISTANCE, SUFFIX)\
        if (k[0] == k[1] && k[1] == k[2]) {\
                START;\
                for (LOOP) {\
                        d = (DISTANCE);\
                        d *= d;\
-                       m = c[0] * exp(d * k[0]);\
+                       m = c[0] * exp##SUFFIX(d * k[0]);\
                        img[i2][0] += clr[i1][0] * m;\
                        img[i2][1] += clr[i1][1] * m;\
                        img[i2][2] += clr[i1][2] * m;\
@@ -196,7 +99,7 @@ process_xyza(char *restrict output, char *restrict cbuf, 
char *restrict sbuf,
                        }\
                }\
        }
-       
+
 #define BLUR_PIXEL_EPILOGUE(DIR)\
        continue;\
        no_blur_##DIR:\
@@ -205,7 +108,7 @@ process_xyza(char *restrict output, char *restrict cbuf, 
char *restrict sbuf,
        img[i1][2] = clr[i1][2];\
        img[i1][3] = clr[i1][3];
 
-#define BLUR(DIR, SETSTART, SETEND, START, LOOP, DISTANCE)\
+#define BLUR(DIR, SETSTART, SETEND, START, LOOP, DISTANCE, SUFFIX)\
        do {\
                memset(img, 0, cn);\
                start = 0, end = colour->height;\
@@ -218,69 +121,180 @@ process_xyza(char *restrict output, char *restrict cbuf, 
char *restrict sbuf,
                                        SETSTART;\
                                        SETEND;\
                                }\
-                               BLUR_PIXEL(START, LOOP, DISTANCE);\
+                               BLUR_PIXEL(START, LOOP, DISTANCE, SUFFIX);\
                                BLUR_PIXEL_EPILOGUE(DIR);\
                        }\
                }\
                ejoin_jobs(is_master, children);\
        } while (0)
 
-       /* blur */
-       if (horizontal)
-               BLUR(horizontal,
-                    x2start = spread > x1 ? 0 : x1 - spread,
-                    x2end = spread + 1 > colour->width - x1 ? colour->width : 
x1 + spread + 1,
-                    i2 = y1 * colour->width + x2start,
-                    x2 = x2start; x2 < x2end; (x2++, i2++),
-                    (ssize_t)x1 - (ssize_t)x2);
-       if (horizontal && vertical)
-               memcpy(clr, img, cn);
-       if (vertical)
-               BLUR(vertical,
-                    y2start = spread > y1 ? 0 : y1 - spread,
-                    y2end = spread + 1 > colour->height - y1 ? colour->height 
: y1 + spread + 1,
-                    i2 = y2start * colour->width + x1,
-                    y2 = y2start; y2 < y2end; (y2++, i2 += colour->width),
-                    (ssize_t)y1 - (ssize_t)y2);
-
-       start = 0, end = colour->height;
-       is_master = efork_jobs(&start, &end, jobs, &children);
-
-       /* convert back to CIE XYZ */
-       if (chroma) {
-               i1 = start * colour->width;
-               for (y1 = start; y1 < end; y1++) {
-                       for (x1 = 0; x1 < colour->width; x1++, i1++) {
-                               img[i1][0] = (img[i1][0] + img[i1][1]) * 
D65_XYZ_X;
-                               img[i1][2] = (img[i1][2] + img[i1][1]) * 
D65_XYZ_Z;
-                       }
-               }
-       }
-
-       /* unpremultiply alpha channel */
-       i1 = start * colour->width;
-       for (y1 = start; y1 < end; y1++) {
-               for (x1 = 0; x1 < colour->width; x1++, i1++) {
-                       if (!img[i1][3])
-                               continue;
-                       img[i1][0] /= img[i1][3];
-                       img[i1][1] /= img[i1][3];
-                       img[i1][2] /= img[i1][3];
-               }
-       }
-
-       /* ensure the video if opaque if -a was used */
-       if (noalpha) {
-               i1 = start * colour->width;
-               for (y1 = start; y1 < end; y1++)
-                       for (x1 = 0; x1 < colour->width; x1++, i1++)
-                               img[i1][3] = 1;
-       }
+#define PROCESS(TYPE, SUFFIX)\
+       do {\
+               typedef TYPE pixel_t[4];\
+               \
+               pixel_t *restrict clr = (pixel_t *)cbuf;\
+               pixel_t *restrict sig = (pixel_t *)sbuf;\
+               pixel_t *img = (pixel_t *)output;\
+               pixel_t c, k;\
+               size_t x1, y1, i1, x2, y2, i2;\
+               TYPE d, m;\
+               int i, blurred, blur[3] = {0, 0, 0};\
+               size_t start, end, x2start, x2end, y2start, y2end;\
+               int is_master;\
+               pid_t *children;\
+               \
+               y2start = x2start = 0;\
+               x2end = colour->width;\
+               y2end = colour->height;\
+               \
+               if (chroma || !noalpha) {\
+                       start = 0, end = colour->height;\
+                       is_master = efork_jobs(&start, &end, jobs, &children);\
+                       \
+                       /* premultiply alpha channel */\
+                       if (!noalpha) {\
+                               i1 = start * colour->width;\
+                               for (y1 = start; y1 < end; y1++) {\
+                                       for (x1 = 0; x1 < colour->width; x1++, 
i1++) {\
+                                               clr[i1][0] *= clr[i1][3];\
+                                               clr[i1][1] *= clr[i1][3];\
+                                               clr[i1][2] *= clr[i1][3];\
+                                       }\
+                               }\
+                       }\
+                       \
+                       /* convert colour model */\
+                       if (chroma) {\
+                               i1 = start * colour->width;\
+                               for (y1 = start; y1 < end; y1++) {\
+                                       for (x1 = 0; x1 < colour->width; x1++, 
i1++) {\
+                                               clr[i1][0] = clr[i1][0] / 
D65_XYZ_X - clr[i1][1];\
+                                               clr[i1][2] = clr[i1][2] / 
D65_XYZ_Z - clr[i1][1];\
+                                               /*
+                                                * Explaination:
+                                                *   Y is the luma and ((X / Xn 
- Y / Yn), (Z / Zn - Y / Yn))
+                                                *   is the chroma (according 
to CIELAB), where (Xn, Yn, Zn)
+                                                *   is the white point.
+                                                */\
+                                       }\
+                               }\
+                       }\
+                       /* Conversion makes no difference if blur is applied to 
all
+                        * parameters:
+                        * 
+                        * Gaussian blur:
+                        * 
+                        *                  ∞ ∞
+                        *                  ⌠ ⌠ V(x,y)  −((x−x₀)² + 
(y−y₀)²)/(2σ²)
+                        *     V′ (x₀,y₀) = │ │ ────── e  
                          dxdy
+                        *       σ          ⌡ ⌡  2πσ²
+                        *                −∞ −∞
+                        * 
+                        * With linear transformation, F:
+                        * 
+                        *                      ∞ ∞
+                        *                      ⌠ ⌠ F(V(x,y))  
−((x−x₀)² + (y−y₀)²)/(2σ²)
+                        *     V′ (x₀,y₀) = F⁻¹ │ │ 
───────── e                           dxdy
+                        *       σ              ⌡ ⌡    2πσ²
+                        *                     −∞ −∞
+                        * 
+                        *                      ∞ ∞
+                        *                      ⌠ ⌠  ⎛V(x,y)  
−((x−x₀)² + (y−y₀)²)/(2σ²)⎞
+                        *     V′ (x₀,y₀) = F⁻¹ │ │ 
F⎜────── e                          ⎟ dxdy
+                        *       σ              ⌡ ⌡  ⎝ 2πσ²           
                  ⎠
+                        *                     −∞ −∞
+                        * 
+                        *                            ∞ ∞
+                        *                            ⌠ ⌠ V(x,y)  
−((x−x₀)² + (y−y₀)²)/(2σ²)
+                        *     V′ (x₀,y₀) = (F⁻¹ ∘ F) │ │ 
────── e                           dxdy
+                        *       σ                    ⌡ ⌡  2πσ²
+                        *                           −∞ −∞
+                        * 
+                        *                  ∞ ∞
+                        *                  ⌠ ⌠ V(x,y)  −((x−x₀)² + 
(y−y₀)²)/(2σ²)
+                        *     V′ (x₀,y₀) = │ │ ────── e  
                         dxdy
+                        *       σ          ⌡ ⌡  2πσ²
+                        *                 −∞ −∞
+                        * 
+                        * Just like expected, the colour space should not 
affect the
+                        * result of guassian blur as long as it is linear.
+                        */\
+                       \
+                       ejoin_jobs(is_master, children);\
+               }\
+               \
+               /* blur */\
+               if (horizontal)\
+                       BLUR(horizontal,\
+                            x2start = spread > x1 ? 0 : x1 - spread,\
+                            x2end = spread + 1 > colour->width - x1 ? 
colour->width : x1 + spread + 1,\
+                            i2 = y1 * colour->width + x2start,\
+                            x2 = x2start; x2 < x2end; (x2++, i2++),\
+                            (ssize_t)x1 - (ssize_t)x2,\
+                            SUFFIX);\
+               if (horizontal && vertical)\
+                       memcpy(clr, img, cn);\
+               if (vertical)\
+                       BLUR(vertical,\
+                            y2start = spread > y1 ? 0 : y1 - spread,\
+                            y2end = spread + 1 > colour->height - y1 ? 
colour->height : y1 + spread + 1,\
+                            i2 = y2start * colour->width + x1,\
+                            y2 = y2start; y2 < y2end; (y2++, i2 += 
colour->width),\
+                            (ssize_t)y1 - (ssize_t)y2,\
+                            SUFFIX);\
+               \
+               start = 0, end = colour->height;\
+               is_master = efork_jobs(&start, &end, jobs, &children);\
+               \
+               /* convert back to CIE XYZ */\
+               if (chroma) {\
+                       i1 = start * colour->width;\
+                       for (y1 = start; y1 < end; y1++) {\
+                               for (x1 = 0; x1 < colour->width; x1++, i1++) {\
+                                       img[i1][0] = (img[i1][0] + img[i1][1]) 
* D65_XYZ_X;\
+                                       img[i1][2] = (img[i1][2] + img[i1][1]) 
* D65_XYZ_Z;\
+                               }\
+                       }\
+               }\
+               \
+               /* unpremultiply alpha channel */\
+               i1 = start * colour->width;\
+               for (y1 = start; y1 < end; y1++) {\
+                       for (x1 = 0; x1 < colour->width; x1++, i1++) {\
+                               if (!img[i1][3])\
+                                       continue;\
+                               img[i1][0] /= img[i1][3];\
+                               img[i1][1] /= img[i1][3];\
+                               img[i1][2] /= img[i1][3];\
+                       }\
+               }\
+               \
+               /* ensure the video if opaque if -a was used */\
+               if (noalpha) {\
+                       i1 = start * colour->width;\
+                       for (y1 = start; y1 < end; y1++)\
+                               for (x1 = 0; x1 < colour->width; x1++, i1++)\
+                                       img[i1][3] = 1;\
+               }\
+               \
+               ejoin_jobs(is_master, children);\
+               \
+               (void) sigma;\
+               (void) sn;\
+       } while (0)
 
-       ejoin_jobs(is_master, children);
+static void
+process_xyza(char *restrict output, char *restrict cbuf, char *restrict sbuf,
+            struct stream *colour, struct stream *sigma, size_t cn, size_t sn)
+{
+       PROCESS(double,);
+}
 
-       (void) sigma;
-       (void) sn;
+static void
+process_xyzaf(char *restrict output, char *restrict cbuf, char *restrict sbuf,
+            struct stream *colour, struct stream *sigma, size_t cn, size_t sn)
+{
+       PROCESS(float, f);
 }
 
 int
@@ -332,6 +346,8 @@ main(int argc, char *argv[])
 
        if (!strcmp(colour.pixfmt, "xyza"))
                process = process_xyza;
+       else if (!strcmp(colour.pixfmt, "xyza f"))
+               process = process_xyzaf;
        else
                eprintf("pixel format %s is not supported, try xyza\n", 
colour.pixfmt);
 
diff --git a/src/blind-invert-luma.c b/src/blind-invert-luma.c
index 9a8ac51..c169fd6 100644
--- a/src/blind-invert-luma.c
+++ b/src/blind-invert-luma.c
@@ -9,79 +9,52 @@
 
 USAGE("[-iw] mask-stream")
 
-static void
-process_xyza(struct stream *colour, struct stream *mask, size_t n)
-{
-       size_t i;
-       double w, y, yo;
-       for (i = 0; i < n; i += colour->pixel_size) {
-               w = ((double *)(mask->buf + i))[1];
-               w *= ((double *)(mask->buf + i))[3];
-               yo = ((double *)(colour->buf + i))[1];
-               y = (1 - yo) * w + yo * (1 - w);
-               ((double *)(colour->buf + i))[0] += (y - yo) * D65_XYZ_X;
-               ((double *)(colour->buf + i))[1] = y;
-               ((double *)(colour->buf + i))[2] += (y - yo) * D65_XYZ_Z;
-               /*
-                * Explaination:
-                *   Y is the luma and ((X / Xn - Y / Yn), (Z / Zn - Y / Yn))
-                *   is the chroma (according to CIELAB), where (Xn, Yn, Zn)
-                *   is the white point.
-                */
-       }
-}
+#define PROCESS(TYPE, INV)\
+       do {\
+               size_t i;\
+               TYPE w, y, yo;\
+               for (i = 0; i < n; i += colour->pixel_size) {\
+                       w = INV ((TYPE *)(mask->buf + i))[1];\
+                       w *= ((TYPE *)(mask->buf + i))[3];\
+                       yo = ((TYPE *)(colour->buf + i))[1];\
+                       y = (1 - yo) * w + yo * (1 - w);\
+                       ((TYPE *)(colour->buf + i))[0] += (y - yo) * 
(TYPE)D65_XYZ_X;\
+                       ((TYPE *)(colour->buf + i))[1] = y;\
+                       ((TYPE *)(colour->buf + i))[2] += (y - yo) * 
(TYPE)D65_XYZ_Z;\
+                       /*
+                        * Explaination:
+                        *   Y is the luma and ((X / Xn - Y / Yn), (Z / Zn - Y 
/ Yn))
+                        *   is the chroma (according to CIELAB), where (Xn, 
Yn, Zn)
+                        *   is the white point.
+                        */\
+               }\
+       } while (0)
 
-static void
-process_xyza_i(struct stream *colour, struct stream *mask, size_t n)
-{
-       size_t i;
-       double w, y, yo;
-       for (i = 0; i < n; i += colour->pixel_size) {
-               w = 1 - ((double *)(mask->buf + i))[1];
-               w *= ((double *)(mask->buf + i))[3];
-               yo = ((double *)(colour->buf + i))[1];
-               y = (1 - yo) * w + yo * (1 - w);
-               ((double *)(colour->buf + i))[0] += (y - yo) * D65_XYZ_X;
-               ((double *)(colour->buf + i))[1] = y;
-               ((double *)(colour->buf + i))[2] += (y - yo) * D65_XYZ_Z;
-       }
-}
+#define PROCESS_W(TYPE, INV)\
+       do {\
+               size_t i;\
+               TYPE w, y, yo, X, Z;\
+               for (i = 0; i < n; i += colour->pixel_size) {\
+                       X = ((TYPE *)(mask->buf + i))[0];\
+                       Z = ((TYPE *)(mask->buf + i))[2];\
+                       w = INV ((TYPE *)(mask->buf + i))[1];\
+                       w *= ((TYPE *)(mask->buf + i))[3];\
+                       yo = ((TYPE *)(colour->buf + i))[1];\
+                       y = (1 - yo) * w + yo * (1 - w);\
+                       ((TYPE *)(colour->buf + i))[0] += (y - yo) * X;\
+                       ((TYPE *)(colour->buf + i))[1] = y;\
+                       ((TYPE *)(colour->buf + i))[2] += (y - yo) * Z;\
+               }\
+       } while (0)
 
-static void
-process_xyza_w(struct stream *colour, struct stream *mask, size_t n)
-{
-       size_t i;
-       double w, y, yo, X, Z;
-       for (i = 0; i < n; i += colour->pixel_size) {
-               X = ((double *)(mask->buf + i))[0];
-               Z = ((double *)(mask->buf + i))[2];
-               w = ((double *)(mask->buf + i))[1];
-               w *= ((double *)(mask->buf + i))[3];
-               yo = ((double *)(colour->buf + i))[1];
-               y = (1 - yo) * w + yo * (1 - w);
-               ((double *)(colour->buf + i))[0] += (y - yo) * X;
-               ((double *)(colour->buf + i))[1] = y;
-               ((double *)(colour->buf + i))[2] += (y - yo) * Z;
-       }
-}
-
-static void
-process_xyza_iw(struct stream *colour, struct stream *mask, size_t n)
-{
-       size_t i;
-       double w, y, yo, X, Z;
-       for (i = 0; i < n; i += colour->pixel_size) {
-               X = ((double *)(mask->buf + i))[0];
-               Z = ((double *)(mask->buf + i))[2];
-               w = 1 - ((double *)(mask->buf + i))[1];
-               w *= ((double *)(mask->buf + i))[3];
-               yo = ((double *)(colour->buf + i))[1];
-               y = (1 - yo) * w + yo * (1 - w);
-               ((double *)(colour->buf + i))[0] += (y - yo) * X;
-               ((double *)(colour->buf + i))[1] = y;
-               ((double *)(colour->buf + i))[2] += (y - yo) * Z;
-       }
-}
+static void process_xyza    (struct stream *colour, struct stream *mask, 
size_t n) {PROCESS(double,);}
+static void process_xyza_i  (struct stream *colour, struct stream *mask, 
size_t n) {PROCESS(double, 1 -);}
+static void process_xyza_w  (struct stream *colour, struct stream *mask, 
size_t n) {PROCESS_W(double,);}
+static void process_xyza_iw (struct stream *colour, struct stream *mask, 
size_t n) {PROCESS_W(double, 1 -);}
+static void process_xyzaf   (struct stream *colour, struct stream *mask, 
size_t n) {PROCESS(float,);}
+static void process_xyzaf_i (struct stream *colour, struct stream *mask, 
size_t n) {PROCESS(float, 1 -);}
+static void process_xyzaf_w (struct stream *colour, struct stream *mask, 
size_t n) {PROCESS_W(float,);}
+static void process_xyzaf_iw(struct stream *colour, struct stream *mask, 
size_t n) {PROCESS_W(float, 1 -);}
 
 int
 main(int argc, char *argv[])
@@ -110,6 +83,9 @@ main(int argc, char *argv[])
        if (!strcmp(colour.pixfmt, "xyza"))
                process = invert ? whitepoint ? process_xyza_iw : process_xyza_i
                                 : whitepoint ? process_xyza_w  : process_xyza;
+       else if (!strcmp(colour.pixfmt, "xyza f"))
+               process = invert ? whitepoint ? process_xyzaf_iw : 
process_xyzaf_i
+                                : whitepoint ? process_xyzaf_w  : 
process_xyzaf;
        else
                eprintf("pixel format %s is not supported, try xyza\n", 
colour.pixfmt);
 
diff --git a/src/blind-set-alpha.c b/src/blind-set-alpha.c
index 23be8ab..40f1a9e 100644
--- a/src/blind-set-alpha.c
+++ b/src/blind-set-alpha.c
@@ -9,29 +9,21 @@
 
 USAGE("[-i] alpha-stream")
 
-static void
-process_xyza(struct stream *colour, struct stream *alpha, size_t n)
-{
-       size_t i;
-       double a;
-       for (i = 0; i < n; i += colour->pixel_size) {
-               a = ((double *)(alpha->buf + i))[1];
-               a *= ((double *)(alpha->buf + i))[3];
-               ((double *)(colour->buf + i))[3] *= a;
-       }
-}
-
-static void
-process_xyza_i(struct stream *colour, struct stream *alpha, size_t n)
-{
-       size_t i;
-       double a;
-       for (i = 0; i < n; i += colour->pixel_size) {
-               a = 1 - ((double *)(alpha->buf + i))[1];
-               a *= ((double *)(alpha->buf + i))[3];
-               ((double *)(colour->buf + i))[3] *= a;
-       }
-}
+#define PROCESS(TYPE, INV)\
+       do {\
+               size_t i;\
+               TYPE a;\
+               for (i = 0; i < n; i += colour->pixel_size) {\
+                       a = INV ((TYPE *)(alpha->buf + i))[1];\
+                       a *= ((TYPE *)(alpha->buf + i))[3];\
+                       ((TYPE *)(colour->buf + i))[3] *= a;\
+               }\
+       } while (0)
+
+static void process_xyza   (struct stream *colour, struct stream *alpha, 
size_t n) {PROCESS(double,);}
+static void process_xyza_i (struct stream *colour, struct stream *alpha, 
size_t n) {PROCESS(double, 1 -);}
+static void process_xyzaf  (struct stream *colour, struct stream *alpha, 
size_t n) {PROCESS(float,);}
+static void process_xyzaf_i(struct stream *colour, struct stream *alpha, 
size_t n) {PROCESS(float, 1 -);}
 
 int
 main(int argc, char *argv[])
@@ -56,6 +48,8 @@ main(int argc, char *argv[])
 
        if (!strcmp(colour.pixfmt, "xyza"))
                process = invert ? process_xyza_i : process_xyza;
+       else if (!strcmp(colour.pixfmt, "xyza f"))
+               process = invert ? process_xyzaf_i : process_xyzaf;
        else
                eprintf("pixel format %s is not supported, try xyza\n", 
colour.pixfmt);
 
diff --git a/src/blind-set-luma.c b/src/blind-set-luma.c
index 751bc6b..2c2b885 100644
--- a/src/blind-set-luma.c
+++ b/src/blind-set-luma.c
@@ -9,82 +9,84 @@
 
 USAGE("luma-stream")
 
-static void
-process_xyza(struct stream *colour, struct stream *luma, size_t n)
-{
-       size_t i;
-       double a, y;
-       for (i = 0; i < n; i += colour->pixel_size) {
-               a = ((double *)(luma->buf + i))[1];
-               a *= ((double *)(luma->buf + i))[3];
-               y = ((double *)(colour->buf + i))[1];
-               ((double *)(colour->buf + i))[0] += y * a - y;
-               ((double *)(colour->buf + i))[1]  = y * a;
-               ((double *)(colour->buf + i))[2] += y * a - y;
-               /*
-                * Note, this changes the luma only, not the saturation,
-                * so the result may look a bit weird. To change both
-                * you can use `blind-arithm mul`.
-                * 
-                * Explaination of algorithm:
-                * 
-                *   Y is the luma, but (X, Z) is not the chroma,
-                *   but in CIELAB, L* is the luma and (a*, *b) is
-                *   the chroma. Multiplying
-                *   
-                *      ⎛0 1   0⎞
-                *      ⎜1 −1  0⎟
-                *      ⎝0  1 −1⎠
-                *   
-                *   (X Y Z)' gives a colour model similar to
-                *   CIE L*a*b*: a model where each parameter is
-                *   a linear transformation of the corresponding
-                *   parameter in CIE L*a*b*. The inverse of that
-                *   matrix is
-                *   
-                *      ⎛1 1  0⎞
-                *      ⎜1 0  0⎟
-                *      ⎝0 0 −1⎠
-                *   
-                *   and
-                *   
-                *      ⎛1 1  0⎞⎛a 0 0⎞⎛0 1   0⎞   ⎛1 a−1 0⎞
-                *      ⎜1 0  0⎟⎜0 1 0⎟⎜1 −1  0⎟ = ⎜0  a  0⎟.
-                *      ⎝0 0 −1⎠⎝0 0 1⎠⎝0  1 −1⎠   ⎝0 a−1 
1⎠
-                * 
-                * Explanation of why changing only the luma looks weird:
-                * 
-                *   Consider when you are workings with colours,
-                *   when you want to change the brightness of a
-                *   colour, you multiply all parameters: red, green,
-                *   and blue, with the same value (this is however
-                *   only an approximation in most cases, since you
-                *   are usually usally working with colours that
-                *   have the sRGB transfer function applied to their
-                *   parameters). This action is the same in all
-                *   colour models and colour spaces that are a
-                *   linear transformation of the sRGB colour spaces
-                *   (sans transfer function); this is simply because
-                *   of the properties of linear transformations.
-                * 
-                *   The reason you change brightness this way can
-                *   be explained by how objects reflect colour.
-                *   Objects can only reject colours that are present
-                *   in the light source. A ideal white object will look
-                *   pure red if the light sources is ideal red, and a
-                *   a ideal blue object will pure black in the same
-                *   light source. An object can also not reflect
-                *   colours brighter than the source. When the brightness
-                *   of a light source is changed, the intensity of all
-                *   colours (by wavelength) it emits is multiplied by
-                *   one value. Therefore, when changing the brightness
-                *   it looks most natural when all primaries (red, green,
-                *   and blue) are multiplied by one value, or all
-                *   parameters of the used colour spaces is a linear
-                *   transformation of sRGB, such as CIE XYZ.
-                */
-       }
-}
+#define PROCESS(TYPE)\
+       do {\
+               size_t i;\
+               TYPE a, y;\
+               for (i = 0; i < n; i += colour->pixel_size) {\
+                       a = ((TYPE *)(luma->buf + i))[1];\
+                       a *= ((TYPE *)(luma->buf + i))[3];\
+                       y = ((TYPE *)(colour->buf + i))[1];\
+                       ((TYPE *)(colour->buf + i))[0] += y * a - y;\
+                       ((TYPE *)(colour->buf + i))[1]  = y * a;\
+                       ((TYPE *)(colour->buf + i))[2] += y * a - y;\
+                       /*
+                        * Note, this changes the luma only, not the saturation,
+                        * so the result may look a bit weird. To change both
+                        * you can use `blind-arithm mul`.
+                        * 
+                        * Explaination of algorithm:
+                        * 
+                        *   Y is the luma, but (X, Z) is not the chroma,
+                        *   but in CIELAB, L* is the luma and (a*, *b) is
+                        *   the chroma. Multiplying
+                        *   
+                        *      ⎛0 1   0⎞
+                        *      ⎜1 −1  0⎟
+                        *      ⎝0  1 −1⎠
+                        *   
+                        *   (X Y Z)' gives a colour model similar to
+                        *   CIE L*a*b*: a model where each parameter is
+                        *   a linear transformation of the corresponding
+                        *   parameter in CIE L*a*b*. The inverse of that
+                        *   matrix is
+                        *   
+                        *      ⎛1 1  0⎞
+                        *      ⎜1 0  0⎟
+                        *      ⎝0 0 −1⎠
+                        *   
+                        *   and
+                        *   
+                        *      ⎛1 1  0⎞⎛a 0 0⎞⎛0 1   0⎞   ⎛1 
a−1 0⎞
+                        *      ⎜1 0  0⎟⎜0 1 0⎟⎜1 −1  0⎟ = ⎜0  
a  0⎟.
+                        *      ⎝0 0 −1⎠⎝0 0 1⎠⎝0  1 −1⎠   ⎝0 
a−1 1⎠
+                        * 
+                        * Explanation of why changing only the luma looks 
weird:
+                        * 
+                        *   Consider when you are workings with colours,
+                        *   when you want to change the brightness of a
+                        *   colour, you multiply all parameters: red, green,
+                        *   and blue, with the same value (this is however
+                        *   only an approximation in most cases, since you
+                        *   are usually usally working with colours that
+                        *   have the sRGB transfer function applied to their
+                        *   parameters). This action is the same in all
+                        *   colour models and colour spaces that are a
+                        *   linear transformation of the sRGB colour spaces
+                        *   (sans transfer function); this is simply because
+                        *   of the properties of linear transformations.
+                        * 
+                        *   The reason you change brightness this way can
+                        *   be explained by how objects reflect colour.
+                        *   Objects can only reject colours that are present
+                        *   in the light source. A ideal white object will look
+                        *   pure red if the light sources is ideal red, and a
+                        *   a ideal blue object will pure black in the same
+                        *   light source. An object can also not reflect
+                        *   colours brighter than the source. When the 
brightness
+                        *   of a light source is changed, the intensity of all
+                        *   colours (by wavelength) it emits is multiplied by
+                        *   one value. Therefore, when changing the brightness
+                        *   it looks most natural when all primaries (red, 
green,
+                        *   and blue) are multiplied by one value, or all
+                        *   parameters of the used colour spaces is a linear
+                        *   transformation of sRGB, such as CIE XYZ.
+                        */\
+               }\
+       } while (0)
+
+static void process_xyza (struct stream *colour, struct stream *luma, size_t 
n) {PROCESS(double);}
+static void process_xyzaf(struct stream *colour, struct stream *luma, size_t 
n) {PROCESS(float);}
 
 int
 main(int argc, char *argv[])
@@ -99,6 +101,8 @@ main(int argc, char *argv[])
 
        if (!strcmp(colour.pixfmt, "xyza"))
                process = process_xyza;
+       else if (!strcmp(colour.pixfmt, "xyza f"))
+               process = process_xyzaf;
        else
                eprintf("pixel format %s is not supported, try xyza\n", 
colour.pixfmt);
 
diff --git a/src/blind-set-saturation.c b/src/blind-set-saturation.c
index ce0dd40..472863f 100644
--- a/src/blind-set-saturation.c
+++ b/src/blind-set-saturation.c
@@ -9,45 +9,48 @@
 
 USAGE("[-w] saturation-stream")
 
-static void
-process_xyza(struct stream *colour, struct stream *satur, size_t n)
-{
-       size_t i;
-       double s, *x, y, *z;
-       for (i = 0; i < n; i += colour->pixel_size) {
-               s = ((double *)(satur->buf + i))[1];
-               s *= ((double *)(satur->buf + i))[3];
-               x = ((double *)(colour->buf + i)) + 0;
-               y = ((double *)(colour->buf + i))[1];
-               z = ((double *)(colour->buf + i)) + 2;
-               *x = ((*x / D65_XYZ_X - y) * s + y) * D65_XYZ_X;
-               *z = ((*z / D65_XYZ_Z - y) * s + y) * D65_XYZ_Z;
-               /*
-                * Explaination:
-                *   Y is the luma and ((X / Xn - Y / Yn), (Z / Zn - Y / Yn))
-                *   is the chroma (according to CIELAB), where (Xn, Yn, Zn)
-                *   is the white point.
-                */
-       }
-}
+#define PROCESS(TYPE)\
+       do {\
+               size_t i;\
+               TYPE s, *x, y, *z;\
+               for (i = 0; i < n; i += colour->pixel_size) {\
+                       s = ((TYPE *)(satur->buf + i))[1];\
+                       s *= ((TYPE *)(satur->buf + i))[3];\
+                       x = ((TYPE *)(colour->buf + i)) + 0;\
+                       y = ((TYPE *)(colour->buf + i))[1];\
+                       z = ((TYPE *)(colour->buf + i)) + 2;\
+                       *x = ((*x / (TYPE)D65_XYZ_X - y) * s + y) * 
(TYPE)D65_XYZ_X;\
+                       *z = ((*z / (TYPE)D65_XYZ_Z - y) * s + y) * 
(TYPE)D65_XYZ_Z;\
+                       /*
+                        * Explaination:
+                        *   Y is the luma and ((X / Xn - Y / Yn), (Z / Zn - Y 
/ Yn))
+                        *   is the chroma (according to CIELAB), where (Xn, 
Yn, Zn)
+                        *   is the white point.
+                        */\
+               }\
+       } while (0)
 
-static void
-process_xyza_w(struct stream *colour, struct stream *satur, size_t n)
-{
-       size_t i;
-       double s, *x, y, *z, X, Z;
-       for (i = 0; i < n; i += colour->pixel_size) {
-               X = ((double *)(satur->buf + i))[0];
-               Z = ((double *)(satur->buf + i))[2];
-               s = ((double *)(satur->buf + i))[1];
-               s *= ((double *)(satur->buf + i))[3];
-               x = ((double *)(colour->buf + i)) + 0;
-               y = ((double *)(colour->buf + i))[1];
-               z = ((double *)(colour->buf + i)) + 2;
-               *x = ((*x / X - y) * s + y) * X;
-               *z = ((*z / Z - y) * s + y) * Z;
-       }
-}
+#define PROCESS_W(TYPE)\
+       do {\
+               size_t i;\
+               TYPE s, *x, y, *z, X, Z;\
+               for (i = 0; i < n; i += colour->pixel_size) {\
+                       X = ((TYPE *)(satur->buf + i))[0];\
+                       Z = ((TYPE *)(satur->buf + i))[2];\
+                       s = ((TYPE *)(satur->buf + i))[1];\
+                       s *= ((TYPE *)(satur->buf + i))[3];\
+                       x = ((TYPE *)(colour->buf + i)) + 0;\
+                       y = ((TYPE *)(colour->buf + i))[1];\
+                       z = ((TYPE *)(colour->buf + i)) + 2;\
+                       *x = ((*x / X - y) * s + y) * X;\
+                       *z = ((*z / Z - y) * s + y) * Z;\
+               }\
+       } while (0)
+
+static void process_xyza   (struct stream *colour, struct stream *satur, 
size_t n) {PROCESS(double);}
+static void process_xyza_w (struct stream *colour, struct stream *satur, 
size_t n) {PROCESS_W(double);}
+static void process_xyzaf  (struct stream *colour, struct stream *satur, 
size_t n) {PROCESS(float);}
+static void process_xyzaf_w(struct stream *colour, struct stream *satur, 
size_t n) {PROCESS_W(float);}
 
 int
 main(int argc, char *argv[])
@@ -72,6 +75,8 @@ main(int argc, char *argv[])
 
        if (!strcmp(colour.pixfmt, "xyza"))
                process = whitepoint ? process_xyza_w : process_xyza;
+       else if (!strcmp(colour.pixfmt, "xyza f"))
+               process = whitepoint ? process_xyzaf_w : process_xyzaf;
        else
                eprintf("pixel format %s is not supported, try xyza\n", 
colour.pixfmt);
 
diff --git a/src/blind-single-colour.c b/src/blind-single-colour.c
index 8eb25b9..f7a1afe 100644
--- a/src/blind-single-colour.c
+++ b/src/blind-single-colour.c
@@ -6,20 +6,42 @@
 #include <string.h>
 #include <unistd.h>
 
-USAGE("[-f frames | -f 'inf'] -w width -h height (X Y Z | Y) [alpha]")
+USAGE("[-f frames | -f 'inf'] [-F pixel-format] -w width -h height (X Y Z | Y) 
[alpha]")
 
-typedef double pixel_t[4];
+static struct stream stream = { .width = 0, .height = 0, .frames = 1 };
+static double X, Y, Z, alpha = 1;
+static int inf = 0;
+
+#define PROCESS(TYPE)\
+       do {\
+               typedef TYPE pixel_t[4];\
+               pixel_t buf[BUFSIZ / 4];\
+               size_t x, y, n;\
+               ssize_t r;\
+               \
+               for (x = 0; x < ELEMENTSOF(buf); x++) {\
+                       buf[x][0] = (TYPE)X;\
+                       buf[x][1] = (TYPE)Y;\
+                       buf[x][2] = (TYPE)Z;\
+                       buf[x][3] = (TYPE)alpha;\
+               }\
+               while (inf || stream.frames--)\
+                       for (y = stream.height; y--;)\
+                               for (x = stream.width * sizeof(*buf); x;)\
+                                       for (x -= n = MIN(sizeof(buf), x); n; n 
-= (size_t)r)\
+                                               if ((r = write(STDOUT_FILENO, 
buf, n)) < 0)\
+                                                       eprintf("write 
<stdout>:");\
+       } while (0)
+
+static void process_xyza(void)  {PROCESS(double);}
+static void process_xyzaf(void) {PROCESS(float);}
 
 int
 main(int argc, char *argv[])
 {
-       struct stream stream = { .width = 0, .height = 0, .frames = 1 };
-       double X, Y, Z, alpha = 1;
-       size_t x, y, n;
-       pixel_t buf[BUFSIZ / 4];
-       ssize_t r;
-       int inf = 0;
        char *arg;
+       const char *pixfmt = "xyza";
+       void (*process)(void) = NULL;
 
        ARGBEGIN {
        case 'f':
@@ -29,6 +51,9 @@ main(int argc, char *argv[])
                else
                        stream.frames = etozu_flag('f', arg, 1, SIZE_MAX);
                break;
+       case 'F':
+               pixfmt = UARGF();
+               break;
        case 'w':
                stream.width = etozu_flag('w', UARGF(), 1, SIZE_MAX);
                break;
@@ -57,22 +82,18 @@ main(int argc, char *argv[])
        if (inf)
                einf_check_fd(STDOUT_FILENO, "<stdout>");
 
-       strcpy(stream.pixfmt, "xyza");
+       pixfmt = get_pixel_format(pixfmt, "xyza");
+       if (!strcmp(pixfmt, "xyza"))
+               process = process_xyza;
+       else if (!strcmp(pixfmt, "xyza f"))
+               process = process_xyzaf;
+       else
+               eprintf("pixel format %s is not supported, try xyza\n", pixfmt);
+
+       strcpy(stream.pixfmt, pixfmt);
        fprint_stream_head(stdout, &stream);
        efflush(stdout, "<stdout>");
 
-       for (x = 0; x < ELEMENTSOF(buf); x++) {
-               buf[x][0] = X;
-               buf[x][1] = Y;
-               buf[x][2] = Z;
-               buf[x][3] = alpha;
-       }
-       while (inf || stream.frames--)
-               for (y = stream.height; y--;)
-                       for (x = stream.width * sizeof(*buf); x;)
-                               for (x -= n = MIN(sizeof(buf), x); n; n -= 
(size_t)r)
-                                       if ((r = write(STDOUT_FILENO, buf, n)) 
< 0)
-                                               eprintf("write <stdout>:");
-
+       process();
        return 0;
 }
diff --git a/src/blind-stack.c b/src/blind-stack.c
index d409cc6..72233a2 100644
--- a/src/blind-stack.c
+++ b/src/blind-stack.c
@@ -9,7 +9,7 @@
 
 USAGE("[-b] bottom-stream ... top-stream")
 
-#define PROCESS_LINEAR_3CH_ALPHA(TYPE, BLEND)\
+#define PROCESS(TYPE, BLEND)\
        do {\
                TYPE x1, y1, z1, a1;\
                TYPE x2, y2, z2, a2;\
@@ -38,8 +38,10 @@ USAGE("[-b] bottom-stream ... top-stream")
                }\
        } while (0)
 
-static void process_xyza  (struct stream *streams, size_t n_streams, size_t n) 
{ PROCESS_LINEAR_3CH_ALPHA(double, 0); }
-static void process_xyza_b(struct stream *streams, size_t n_streams, size_t n) 
{ PROCESS_LINEAR_3CH_ALPHA(double, 1); }
+static void process_xyza   (struct stream *streams, size_t n_streams, size_t 
n) { PROCESS(double, 0); }
+static void process_xyza_b (struct stream *streams, size_t n_streams, size_t 
n) { PROCESS(double, 1); }
+static void process_xyzaf  (struct stream *streams, size_t n_streams, size_t 
n) { PROCESS(float, 0); }
+static void process_xyzaf_b(struct stream *streams, size_t n_streams, size_t 
n) { PROCESS(float, 1); }
 
 int
 main(int argc, char *argv[])
@@ -71,6 +73,8 @@ main(int argc, char *argv[])
 
        if (!strcmp(streams->pixfmt, "xyza"))
                process = blend ? process_xyza_b : process_xyza;
+       else if (!strcmp(streams->pixfmt, "xyza f"))
+               process = blend ? process_xyzaf_b : process_xyzaf;
        else
                eprintf("pixel format %s is not supported, try xyza\n", 
streams->pixfmt);
 
diff --git a/src/blind-time-blur.c b/src/blind-time-blur.c
index c5d9ee5..702b280 100644
--- a/src/blind-time-blur.c
+++ b/src/blind-time-blur.c
@@ -9,36 +9,48 @@ USAGE("alpha-stream")
 
 static int first = 1;
 
+#define PROCESS(TYPE)\
+       do {\
+               typedef TYPE pixel_t[4];\
+               pixel_t *restrict clr = (pixel_t *)cbuf;\
+               pixel_t *restrict alf = (pixel_t *)abuf;\
+               pixel_t *img = (pixel_t *)output;\
+               size_t i, n = cn / sizeof(pixel_t);\
+               TYPE a1, a2;\
+               \
+               if (first) {\
+                       memcpy(output, cbuf, cn);\
+                       first = 0;\
+                       return;\
+               }\
+               \
+               for (i = 0; i < n; i++, clr++, alf++, img++) {\
+                       a1 = (*img)[3];\
+                       a2 = (*clr)[3] * (*alf)[1] * (*alf)[3];\
+                       a1 *= (1 - a2);\
+                       (*img)[0] = (*img)[0] * a1 + (*clr)[0] * a2;\
+                       (*img)[1] = (*img)[1] * a1 + (*clr)[1] * a2;\
+                       (*img)[2] = (*img)[2] * a1 + (*clr)[2] * a2;\
+                       (*img)[3] = a1 + a2;\
+               }\
+               \
+               (void) colour;\
+               (void) alpha;\
+               (void) an;\
+       } while (0)
+
 static void
 process_xyza(char *output, char *restrict cbuf, char *restrict abuf,
             struct stream *colour, struct stream *alpha, size_t cn, size_t an)
 {
-       typedef double pixel_t[4];
-       pixel_t *restrict clr = (pixel_t *)cbuf;
-       pixel_t *restrict alf = (pixel_t *)abuf;
-       pixel_t *img = (pixel_t *)output;
-       size_t i, n = cn / sizeof(pixel_t);
-       double a1, a2;
-
-       if (first) {
-               memcpy(output, cbuf, cn);
-               first = 0;
-               return;
-       }
-
-       for (i = 0; i < n; i++, clr++, alf++, img++) {
-               a1 = (*img)[3];
-               a2 = (*clr)[3] * (*alf)[1] * (*alf)[3];
-               a1 *= (1 - a2);
-               (*img)[0] = (*img)[0] * a1 + (*clr)[0] * a2;
-               (*img)[1] = (*img)[1] * a1 + (*clr)[1] * a2;
-               (*img)[2] = (*img)[2] * a1 + (*clr)[2] * a2;
-               (*img)[3] = a1 + a2;
-       }
+       PROCESS(double);
+}
 
-       (void) colour;
-       (void) alpha;
-       (void) an;
+static void
+process_xyzaf(char *output, char *restrict cbuf, char *restrict abuf,
+             struct stream *colour, struct stream *alpha, size_t cn, size_t an)
+{
+       PROCESS(float);
 }
 
 int
@@ -61,6 +73,8 @@ main(int argc, char *argv[])
 
        if (!strcmp(colour.pixfmt, "xyza"))
                process = process_xyza;
+       else if (!strcmp(colour.pixfmt, "xyza f"))
+               process = process_xyzaf;
        else
                eprintf("pixel format %s is not supported, try xyza\n", 
colour.pixfmt);
 
diff --git a/src/blind-to-image.c b/src/blind-to-image.c
index 534eef3..c7cf37d 100644
--- a/src/blind-to-image.c
+++ b/src/blind-to-image.c
@@ -17,70 +17,74 @@ static int alpha_warning_triggered = 0;
 static unsigned long long int max;
 static int bytes;
 
-static void
-write_pixel(double R, double G, double B, double A)
-{
-       unsigned long long int colours[4];
-       unsigned char buf[4 * 8];
-       int i, j, k, bm = bytes - 1;
-
-       if (R < 0 || G < 0 || B < 0 || R > 1 || G > 1 || B > 1) {
-               if (gamut_warning_triggered) {
-                       gamut_warning_triggered = 1;
-                       weprintf("warning: out-of-gamut colour detected\n");
-               }
-               ; /* TODO gamut */
-               R = CLIP(0, R, 1);
-               G = CLIP(0, G, 1);
-               B = CLIP(0, B, 1);
-       }
-
-       if (A < 0 || A > 1) {
-               if (alpha_warning_triggered) {
-                       alpha_warning_triggered = 1;
-                       weprintf("warning: alpha values truncated\n");
-               }
-               A = A < 0 ? 0 : 1;
-       }
-
-       colours[0] = srgb_encode(R) * max + 0.5;
-       colours[1] = srgb_encode(G) * max + 0.5;
-       colours[2] = srgb_encode(B) * max + 0.5;
-       colours[3] = A * max + 0.5;
-
-       for (i = k = 0; i < 4; i++, k += bytes) {
-               for (j = 0; j < bytes; j++) {
-                       buf[k + bm - j] = (unsigned char)(colours[i]);
-                       colours[i] >>= 8;
-               }
-       }
-
-       ewriteall(STDOUT_FILENO, buf, k, "<stdout>");
-}
-
-static void
-process_xyza(struct stream *stream, size_t n)
-{
-       size_t i;
-       double X, Y, Z, A, R, G, B;
-       for (i = 0; i < n; i += stream->pixel_size) {
-               X = ((double *)(stream->buf + i))[0];
-               Y = ((double *)(stream->buf + i))[1];
-               Z = ((double *)(stream->buf + i))[2];
-               A = ((double *)(stream->buf + i))[3];
-
-               if (Y < 0 || Y > 1) {
-                       if (luma_warning_triggered) {
-                               luma_warning_triggered = 1;
-                               weprintf("warning: %s colour detected\n",
-                                        Y < 0 ? "subblack" : "superwhite");
-                       }
-               }
-
-               ciexyz_to_srgb(X, Y, Z, &R, &G, &B);
-               write_pixel(R, G, B, A);
-       }
-}
+#define WRITE_PIXEL(TYPE, SUFFIX)\
+       do {\
+               unsigned long long int colours[4];\
+               unsigned char buf[4 * 8];\
+               int i, j, k, bm = bytes - 1;\
+               \
+               if (R < 0 || G < 0 || B < 0 || R > 1 || G > 1 || B > 1) {\
+                       if (gamut_warning_triggered) {\
+                               gamut_warning_triggered = 1;\
+                               weprintf("warning: out-of-gamut colour 
detected\n");\
+                       }\
+                       ; /* TODO gamut */\
+                       R = CLIP(0, R, 1);\
+                       G = CLIP(0, G, 1);\
+                       B = CLIP(0, B, 1);\
+               }\
+               \
+               if (A < 0 || A > 1) {\
+                       if (alpha_warning_triggered) {\
+                               alpha_warning_triggered = 1;\
+                               weprintf("warning: alpha values truncated\n");\
+                       }\
+                       A = A < 0 ? 0 : 1;\
+               }\
+               \
+               colours[0] = srgb_encode##SUFFIX(R) * max + (TYPE)0.5;\
+               colours[1] = srgb_encode##SUFFIX(G) * max + (TYPE)0.5;\
+               colours[2] = srgb_encode##SUFFIX(B) * max + (TYPE)0.5;\
+               colours[3] = A * max + (TYPE)0.5;\
+               \
+               for (i = k = 0; i < 4; i++, k += bytes) {\
+                       for (j = 0; j < bytes; j++) {\
+                               buf[k + bm - j] = (unsigned char)(colours[i]);\
+                               colours[i] >>= 8;\
+                       }\
+               }\
+               \
+               ewriteall(STDOUT_FILENO, buf, k, "<stdout>");\
+       } while (0)
+
+#define PROCESS(TYPE, SUFFIX)\
+       do {\
+               size_t i;\
+               TYPE X, Y, Z, A, R, G, B;\
+               for (i = 0; i < n; i += stream->pixel_size) {\
+                       X = ((TYPE *)(stream->buf + i))[0];\
+                       Y = ((TYPE *)(stream->buf + i))[1];\
+                       Z = ((TYPE *)(stream->buf + i))[2];\
+                       A = ((TYPE *)(stream->buf + i))[3];\
+                       \
+                       if (Y < 0 || Y > 1) {\
+                               if (luma_warning_triggered) {\
+                                       luma_warning_triggered = 1;\
+                                       weprintf("warning: %s colour 
detected\n",\
+                                                Y < 0 ? "subblack" : 
"superwhite");\
+                               }\
+                       }\
+                       \
+                       ciexyz_to_srgb##SUFFIX(X, Y, Z, &R, &G, &B);\
+                       write_pixel##SUFFIX(R, G, B, A);\
+               }\
+       } while (0)
+
+static void write_pixel(double R, double G, double B, double A) 
{WRITE_PIXEL(double,);}
+static void write_pixel_f(float R, float G, float B, float A) 
{WRITE_PIXEL(float, _f);}
+
+static void process_xyza (struct stream *stream, size_t n) {PROCESS(double,);}
+static void process_xyzaf(struct stream *stream, size_t n) {PROCESS(float, 
_f);}
 
 int
 main(int argc, char *argv[])
@@ -111,6 +115,8 @@ main(int argc, char *argv[])
 
        if (!strcmp(stream.pixfmt, "xyza"))
                process = process_xyza;
+       else if (!strcmp(stream.pixfmt, "xyza f"))
+               process = process_xyzaf;
        else
                eprintf("pixel format %s is not supported, try xyza\n", 
stream.pixfmt);
 
diff --git a/src/blind-to-text.c b/src/blind-to-text.c
index 7fefbfc..59ab8c3 100644
--- a/src/blind-to-text.c
+++ b/src/blind-to-text.c
@@ -8,17 +8,19 @@
 
 USAGE("")
 
-static void
-process_xyza(struct stream *stream, size_t n)
-{
-       size_t i;
-       for (i = 0; i < n; i += stream->pixel_size)
-               printf("%lf %lf %lf %lf\n",
-                      ((double *)(stream->buf + i))[0],
-                      ((double *)(stream->buf + i))[1],
-                      ((double *)(stream->buf + i))[2],
-                      ((double *)(stream->buf + i))[3]);
-}
+#define PROCESS(TYPE, CAST, FMT)\
+       do {\
+               size_t i;\
+               for (i = 0; i < n; i += stream->pixel_size)\
+                       printf("%"FMT" %"FMT" %"FMT" %"FMT"\n",\
+                              (CAST)(((TYPE *)(stream->buf + i))[0]),\
+                              (CAST)(((TYPE *)(stream->buf + i))[1]),\
+                              (CAST)(((TYPE *)(stream->buf + i))[2]),\
+                              (CAST)(((TYPE *)(stream->buf + i))[3]));\
+       } while (0)
+
+static void process_xyza (struct stream *stream, size_t n) {PROCESS(double, 
double, "lf");}
+static void process_xyzaf(struct stream *stream, size_t n) {PROCESS(float,  
double, "lf");}
 
 int
 main(int argc, char *argv[])
@@ -32,6 +34,8 @@ main(int argc, char *argv[])
 
        if (!strcmp(stream.pixfmt, "xyza"))
                process = process_xyza;
+       else if (!strcmp(stream.pixfmt, "xyza f"))
+               process = process_xyzaf;
        else
                eprintf("pixel format %s is not supported, try xyza\n", 
stream.pixfmt);
 
diff --git a/src/blind-to-video.c b/src/blind-to-video.c
index 2bebe9b..a98d719 100644
--- a/src/blind-to-video.c
+++ b/src/blind-to-video.c
@@ -12,53 +12,55 @@ USAGE("[-d] frame-rate ffmpeg-arguments ...")
 static int draft = 0;
 static int fd;
 
-static void
-process_xyza(struct stream *stream, size_t n)
-{
-       char *buf = stream->buf;
-       double *pixel, r, g, b;
-       uint16_t *pixels, *end;
-       uint16_t pixbuf[1024];
-       long int a, y, u, v;
-       size_t ptr;
-       pixels = pixbuf;
-       end = pixbuf + ELEMENTSOF(pixbuf);
-       if (draft) {
-               for (ptr = 0; ptr < n; ptr += 4 * sizeof(double)) {
-                       pixel = (double *)(buf + ptr);
-                       ciexyz_to_scaled_yuv(pixel[0], pixel[1], pixel[2], &r, 
&g, &b);
-                       y = (long int)r +  16L * 256L;
-                       u = (long int)g + 128L * 256L;
-                       v = (long int)b + 128L * 256L;
-                       *pixels++ = 0xFFFFU;
-                       *pixels++ = htole16((uint16_t)CLIP(0, y, 0xFFFFL));
-                       *pixels++ = htole16((uint16_t)CLIP(0, u, 0xFFFFL));
-                       *pixels++ = htole16((uint16_t)CLIP(0, v, 0xFFFFL));
-                       if (pixels == end)
-                               ewriteall(fd, pixels = pixbuf, sizeof(pixbuf), 
"<subprocess>");
-               }
-       } else {
-               for (ptr = 0; ptr < n; ptr += 4 * sizeof(double)) {
-                       pixel = (double *)(buf + ptr);
-                       a = (long int)(pixel[3] * 0xFFFFL);
-                       ciexyz_to_srgb(pixel[0], pixel[1], pixel[2], &r, &g, 
&b);
-                       r = srgb_encode(r);
-                       g = srgb_encode(g);
-                       b = srgb_encode(b);
-                       srgb_to_yuv(r, g, b, pixel + 0, pixel + 1, pixel + 2);
-                       y = (long int)(pixel[0] * 0xFFFFL) +  16L * 256L;
-                       u = (long int)(pixel[1] * 0xFFFFL) + 128L * 256L;
-                       v = (long int)(pixel[2] * 0xFFFFL) + 128L * 256L;
-                       *pixels++ = htole16((uint16_t)CLIP(0, a, 0xFFFFL));
-                       *pixels++ = htole16((uint16_t)CLIP(0, y, 0xFFFFL));
-                       *pixels++ = htole16((uint16_t)CLIP(0, u, 0xFFFFL));
-                       *pixels++ = htole16((uint16_t)CLIP(0, v, 0xFFFFL));
-                       if (pixels == end)
-                               ewriteall(fd, pixels = pixbuf, sizeof(pixbuf), 
"<subprocess>");
-               }
-       }
-       ewriteall(fd, pixbuf, (size_t)(pixels - pixbuf) * sizeof(*pixels), 
"<subprocess>");
-}
+#define PROCESS(TYPE, SUFFIX)\
+       do {\
+               char *buf = stream->buf;\
+               TYPE *pixel, r, g, b;\
+               uint16_t *pixels, *end;\
+               uint16_t pixbuf[BUFSIZ / sizeof(uint16_t)];\
+               long int a, y, u, v;\
+               size_t ptr;\
+               pixels = pixbuf;\
+               end = pixbuf + ELEMENTSOF(pixbuf);\
+               if (draft) {\
+                       for (ptr = 0; ptr < n; ptr += 4 * sizeof(TYPE)) {\
+                               pixel = (TYPE *)(buf + ptr);\
+                               ciexyz_to_scaled_yuv##SUFFIX(pixel[0], 
pixel[1], pixel[2], &r, &g, &b);\
+                               y = (long int)r +  16L * 256L;\
+                               u = (long int)g + 128L * 256L;\
+                               v = (long int)b + 128L * 256L;\
+                               *pixels++ = 0xFFFFU;\
+                               *pixels++ = htole16((uint16_t)CLIP(0, y, 
0xFFFFL));\
+                               *pixels++ = htole16((uint16_t)CLIP(0, u, 
0xFFFFL));\
+                               *pixels++ = htole16((uint16_t)CLIP(0, v, 
0xFFFFL));\
+                               if (pixels == end)\
+                                       ewriteall(fd, pixels = pixbuf, 
sizeof(pixbuf), "<subprocess>");\
+                       }\
+               } else {\
+                       for (ptr = 0; ptr < n; ptr += 4 * sizeof(TYPE)) {\
+                               pixel = (TYPE *)(buf + ptr);\
+                               a = (long int)(pixel[3] * 0xFFFFL);\
+                               ciexyz_to_srgb##SUFFIX(pixel[0], pixel[1], 
pixel[2], &r, &g, &b);\
+                               r = srgb_encode##SUFFIX(r);\
+                               g = srgb_encode##SUFFIX(g);\
+                               b = srgb_encode##SUFFIX(b);\
+                               srgb_to_yuv##SUFFIX(r, g, b, pixel + 0, pixel + 
1, pixel + 2);\
+                               y = (long int)(pixel[0] * 0xFFFFL) +  16L * 
256L;\
+                               u = (long int)(pixel[1] * 0xFFFFL) + 128L * 
256L;\
+                               v = (long int)(pixel[2] * 0xFFFFL) + 128L * 
256L;\
+                               *pixels++ = htole16((uint16_t)CLIP(0, a, 
0xFFFFL));\
+                               *pixels++ = htole16((uint16_t)CLIP(0, y, 
0xFFFFL));\
+                               *pixels++ = htole16((uint16_t)CLIP(0, u, 
0xFFFFL));\
+                               *pixels++ = htole16((uint16_t)CLIP(0, v, 
0xFFFFL));\
+                               if (pixels == end)\
+                                       ewriteall(fd, pixels = pixbuf, 
sizeof(pixbuf), "<subprocess>");\
+                       }\
+               }\
+               ewriteall(fd, pixbuf, (size_t)(pixels - pixbuf) * 
sizeof(*pixels), "<subprocess>");\
+       } while (0)
+
+static void process_xyza (struct stream *stream, size_t n) {PROCESS(double,);}
+static void process_xyzaf(struct stream *stream, size_t n) {PROCESS(float, 
_f);}
 
 int
 main(int argc, char *argv[])
@@ -99,6 +101,8 @@ main(int argc, char *argv[])
 
        if (!strcmp(stream.pixfmt, "xyza"))
                process = process_xyza;
+       else if (!strcmp(stream.pixfmt, "xyza f"))
+               process = process_xyzaf;
        else
                eprintf("pixel format %s is not supported, try xyza\n", 
stream.pixfmt);
 
diff --git a/src/stream.c b/src/stream.c
index 4bb0195..6ff71c0 100644
--- a/src/stream.c
+++ b/src/stream.c
@@ -184,6 +184,39 @@ encheck_compat(int status, const struct stream *a, const 
struct stream *b)
 }
 
 
+const char *
+get_pixel_format(const char *specified, const char *current)
+{
+       const char *base = NULL;
+       int as_float = 0;
+
+       if (!strcmp(current, "xyza"))
+               base = "xyza";
+       else if (!strcmp(current, "xyza f"))
+               base = "xyza", as_float = 1;
+       else
+               return specified;
+
+       if (!strcmp(specified, "xyza"))
+               base = "xyza";
+       else if (!strcmp(specified, "xyza f"))
+               return "xyza f";
+       else if (!strcmp(specified, "xyza !f"))
+               return "xyza";
+       else if (!strcmp(specified, "f"))
+               as_float = 1;
+       else if (!strcmp(specified, "!f"))
+               as_float = 0;
+       else
+               return specified;
+
+       if (!strcmp(base, "xyza"))
+               return as_float ? "xyza f" : "xyza";
+       else
+               return specified;
+}
+
+
 int
 enread_frame(int status, struct stream *stream, void *buf, size_t n)
 {
diff --git a/src/stream.h b/src/stream.h
index 41a1a7f..280dde1 100644
--- a/src/stream.h
+++ b/src/stream.h
@@ -62,6 +62,7 @@ void eninf_check_fd(int status, int fd, const char *file);
 int check_frame_size(size_t width, size_t height, size_t pixel_size);
 void encheck_frame_size(int status, size_t width, size_t height, size_t 
pixel_size, const char *prefix, const char *fname);
 void encheck_compat(int status, const struct stream *a, const struct stream 
*b);
+const char *get_pixel_format(const char *specified, const char *current);
 int enread_frame(int status, struct stream *stream, void *buf, size_t n);
 
 void nprocess_stream(int status, struct stream *stream, void (*process)(struct 
stream *stream, size_t n));
diff --git a/src/util/colour.h b/src/util/colour.h
index ae50d32..7622af0 100644
--- a/src/util/colour.h
+++ b/src/util/colour.h
@@ -15,7 +15,23 @@ srgb_encode(double t)
                t = -t;
                sign = -1;
        }
-       t = t <= 0.0031306684425217108 ? 12.92 * t : 1.055 * pow(t, 1 / 2.4) - 
0.055;
+       t = t <= 0.0031306684425217108
+               ? 12.92 * t
+               : 1.055 * pow(t, 1 / 2.4) - 0.055;
+       return t * sign;
+}
+
+static inline float
+srgb_encode_f(float t)
+{
+       float sign = 1;
+       if (t < 0) {
+               t = -t;
+               sign = -1;
+       }
+       t = t <= (float)0.0031306684425217108
+               ? (float)12.92 * t
+               : (float)1.055 * powf(t, 1 / (float)2.4) - (float)0.055;
        return t * sign;
 }
 
@@ -27,7 +43,23 @@ srgb_decode(double t)
                t = -t;
                sign = -1;
        }
-       t = t <= 0.0031306684425217108 * 12.92 ? t / 12.92 : pow((t + 0.055) / 
1.055, 2.4);
+       t = t <= 0.0031306684425217108 * 12.92
+               ? t / 12.92
+               : pow((t + 0.055) / 1.055, 2.4);
+       return t * sign;
+}
+
+static inline float
+srgb_decode_f(float t)
+{
+       float sign = 1;
+       if (t < 0) {
+               t = -t;
+               sign = -1;
+       }
+       t = t <= (float)0.0031306684425217108 * (float)12.92
+               ? t / (float)12.92
+               : powf((t + (float)0.055) / (float)1.055, (float)2.4);
        return t * sign;
 }
 
@@ -42,6 +74,16 @@ yuv_to_srgb(double y, double u, double v, double *r, double 
*g, double *b)
 }
 
 static inline void
+yuv_to_srgb_f(float y, float u, float v, float *r, float *g, float *b)
+{
+#define MULTIPLY(CY, CU, CV)  ((float)(CY) * y + (float)(CU) * u + (float)(CV) 
* v)
+       *r = MULTIPLY(1,  0.00028328010485821202317155420580263580632163211703, 
 1.14070449590558520291949662350816652178764343261719);
+       *g = MULTIPLY(1, -0.39630886669497211727275498560629785060882568359375, 
-0.58107364288228224857846271333983168005943298339844);
+       *b = MULTIPLY(1,  2.03990003507541306504435851820744574069976806640625, 
 0.00017179031692307700847528739718228507626918144524);
+#undef MULTIPLY
+}
+
+static inline void
 srgb_to_yuv(double r, double g, double b, double *y, double *u, double *v)
 {
 #define MULTIPLY(CR, CG, CB) ((CR) * r + (CG) * g + (CB) * b)
@@ -56,6 +98,20 @@ srgb_to_yuv(double r, double g, double b, double *y, double 
*u, double *v)
 }
 
 static inline void
+srgb_to_yuv_f(float r, float g, float b, float *y, float *u, float *v)
+{
+#define MULTIPLY(CR, CG, CB) ((float)(CR) * r + (float)(CG) * g + (float)(CB) 
* b)
+       *y = MULTIPLY(0.299, 0.587, 0.114);
+       *u = MULTIPLY(-0.14662756598240470062854967636667424812912940979004,
+                     -0.28771586836102963635752871596196200698614120483398,
+                      0.43434343434343436474165400795754976570606231689453);
+       *v = MULTIPLY( 0.61456892577224520035628074765554629266262054443359,
+                     -0.51452282157676354490405401520547457039356231689453,
+                     -0.10004610419548178035231700278018251992762088775635);
+#undef MULTIPLY
+}
+
+static inline void
 ciexyz_to_srgb(double x, double y, double z, double *r, double *g, double *b)
 {
 #define MULTIPLY(CX, CY, CZ)  ((CX) * x + (CY) * y + (CZ) * z)
@@ -66,6 +122,16 @@ ciexyz_to_srgb(double x, double y, double z, double *r, 
double *g, double *b)
 }
 
 static inline void
+ciexyz_to_srgb_f(float x, float y, float z, float *r, float *g, float *b)
+{
+#define MULTIPLY(CX, CY, CZ)  ((float)(CX) * x + (float)(CY) * y + (float)(CZ) 
* z)
+       *r = MULTIPLY(3.240446254647737500675930277794, 
-1.537134761820080575134284117667, -0.498530193022728718155178739835);
+       *g = MULTIPLY(-0.969266606244679751469561779231, 
1.876011959788370209167851498933, 0.041556042214430065351304932619);
+       *b = MULTIPLY(0.055643503564352832235773149705, 
-0.204026179735960239147729566866, 1.057226567722703292062647051353);
+#undef MULTIPLY
+}
+
+static inline void
 srgb_to_ciexyz(double r, double g, double b, double *x, double *y, double *z)
 {
 #define MULTIPLY(CR, CG, CB) ((CR) * r + (CG) * g + (CB) * b)
@@ -76,6 +142,16 @@ srgb_to_ciexyz(double r, double g, double b, double *x, 
double *y, double *z)
 }
 
 static inline void
+srgb_to_ciexyz_f(float r, float g, float b, float *x, float *y, float *z)
+{
+#define MULTIPLY(CR, CG, CB) ((float)(CR) * r + (float)(CG) * g + (float)(CB) 
* b)
+       *x = MULTIPLY(0.412457445582367576708548995157, 
0.357575865245515878143578447634, 0.180437247826399665973085006954);
+       *y = MULTIPLY(0.212673370378408277403536885686, 
0.715151730491031756287156895269, 0.072174899130559869164791564344);
+       *z = MULTIPLY(0.019333942761673460208893260415, 
0.119191955081838593666354597644, 0.950302838552371742508739771438);
+#undef MULTIPLY
+}
+
+static inline void
 scaled_yuv_to_ciexyz(double y, double u, double v, double *xp, double *yp, 
double *zp)
 {
 #define MULTIPLY(CY, CU, CV) ((CY) * y + (CU) * u + (CV) * v)
@@ -92,6 +168,22 @@ scaled_yuv_to_ciexyz(double y, double u, double v, double 
*xp, double *yp, doubl
 }
 
 static inline void
+scaled_yuv_to_ciexyz_f(float y, float u, float v, float *xp, float *yp, float 
*zp)
+{
+#define MULTIPLY(CY, CU, CV) ((float)(CY) * y + (float)(CU) * u + (float)(CV) 
* v)
+       *xp = MULTIPLY( 0.00001450325106667098632156481796684488472237717360,
+                       0.00000345586790639342739093228633329157872822179343,
+                       0.00000400923398630552893485111398685916128670214675);
+       *yp = MULTIPLY( 0.00001525902189669641837040624243737596543724066578,
+                      -0.00000207722814409390653614547427030512238843584782,
+                      -0.00000263898607692305410302407824019166326934282552);
+       *zp = MULTIPLY( 0.00001661446153041708825425643025752719950105529279,
+                       0.00002885925752619118069149627137104374696718878113,
+                       -0.00000071781086875769179526501342566979779746816348);
+#undef MULTIPLY
+}
+
+static inline void
 ciexyz_to_scaled_yuv(double x, double y, double z, double *yp, double *up, 
double *vp)
 {
 #define MULTIPLY(CX, CY, CZ) ((CX) * x + (CY) * y + (CZ) * z)
@@ -106,3 +198,19 @@ ciexyz_to_scaled_yuv(double x, double y, double z, double 
*yp, double *up, doubl
                        -28411.65702312920984695665538311004638671875);
 #undef MULTIPLY
 }
+
+static inline void
+ciexyz_to_scaled_yuv_f(float x, float y, float z, float *yp, float *up, float 
*vp)
+{
+#define MULTIPLY(CX, CY, CZ) ((float)(CX) * x + (float)(CY) * y + (float)(CZ) 
* z)
+       *yp = MULTIPLY(  26625.38231027395886485464870929718017578125,
+                        40524.0090949436053051613271236419677734375,
+                         -271.5313105642117079696618020534515380859375);
+       *up = MULTIPLY( -11278.3751445417292416095733642578125,
+                       -26409.91773157499847002327442169189453125,
+                        34100.5706543184860493056476116180419921875);
+       *vp = MULTIPLY( 162829.60100012840121053159236907958984375,
+                      -123829.313212639070115983486175537109375,
+                       -28411.65702312920984695665538311004638671875);
+#undef MULTIPLY
+}

Reply via email to