Re: [Mesa-dev] [PATCH] nir: add lowering for idiv/udiv/umod

2015-04-03 Thread Eric Anholt
Rob Clark  writes:

> From: Rob Clark 
>
> Based on the algo from NV50LegalizeSSA::handleDIV() and handleMOD().
> See also trans_idiv() in freedreno/ir3/ir3_compiler.c (which was an
> adaptation of the nv50 code from Ilia Mirkin).
>
> Also, including a py script that implements the same algo with numpy,
> based on something written by Ilia (and beaten on with a hammer a bit
> by me).
>
> I've tested this on i965 hacked up to insert the idiv lowering pass.

Tested-by: Eric Anholt  (vc4)

I don't think we should commit the python file, though.


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] nir: add lowering for idiv/udiv/umod

2015-04-03 Thread Matt Turner
On Fri, Apr 3, 2015 at 8:21 AM, Rob Clark  wrote:
> From: Rob Clark 
>
> Based on the algo from NV50LegalizeSSA::handleDIV() and handleMOD().
> See also trans_idiv() in freedreno/ir3/ir3_compiler.c (which was an
> adaptation of the nv50 code from Ilia Mirkin).
>
> Also, including a py script that implements the same algo with numpy,
> based on something written by Ilia (and beaten on with a hammer a bit
> by me).
>
> I've tested this on i965 hacked up to insert the idiv lowering pass.
>
> Signed-off-by: Rob Clark 
> ---
>  src/glsl/Makefile.sources |   1 +
>  src/glsl/nir/div-lowering.py  |  75 

I have no idea if it's valuable to include this file in Mesa (sort of
doubt it is?), but if it is it needs to be included in
src/glsl/Makefile.am's EXTRA_DIST. It also needs a license header.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] nir: add lowering for idiv/udiv/umod

2015-04-03 Thread Ilia Mirkin
On Fri, Apr 3, 2015 at 11:21 AM, Rob Clark  wrote:
> From: Rob Clark 
>
> Based on the algo from NV50LegalizeSSA::handleDIV() and handleMOD().
> See also trans_idiv() in freedreno/ir3/ir3_compiler.c (which was an
> adaptation of the nv50 code from Ilia Mirkin).
>
> Also, including a py script that implements the same algo with numpy,
> based on something written by Ilia (and beaten on with a hammer a bit
> by me).
>
> I've tested this on i965 hacked up to insert the idiv lowering pass.
>
> Signed-off-by: Rob Clark 
> ---
>  src/glsl/Makefile.sources |   1 +
>  src/glsl/nir/div-lowering.py  |  75 

Python *really* hates files with - in their name. You can't import
them, so you have to use underscores. Admittedly it's not designed for
importing, but let's not prevent it in the future.

>  src/glsl/nir/nir.h|   1 +
>  src/glsl/nir/nir_lower_idiv.c | 157 
> ++
>  4 files changed, 234 insertions(+)
>  create mode 100755 src/glsl/nir/div-lowering.py
>  create mode 100644 src/glsl/nir/nir_lower_idiv.c
>
> diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources
> index ffce706..5d70e88 100644
> --- a/src/glsl/Makefile.sources
> +++ b/src/glsl/Makefile.sources
> @@ -33,6 +33,7 @@ NIR_FILES = \
> nir/nir_lower_atomics.c \
> nir/nir_lower_global_vars_to_local.c \
> nir/nir_lower_locals_to_regs.c \
> +   nir/nir_lower_idiv.c \
> nir/nir_lower_io.c \
> nir/nir_lower_phis_to_scalar.c \
> nir/nir_lower_samplers.cpp \
> diff --git a/src/glsl/nir/div-lowering.py b/src/glsl/nir/div-lowering.py
> new file mode 100755
> index 000..87db784
> --- /dev/null
> +++ b/src/glsl/nir/div-lowering.py
> @@ -0,0 +1,75 @@
> +#!/usr/bin/python

I think it's BS, but you're going to get yelled at by people who have
foolishly set up python to point to python3 (despite the *huge*
quantity of programs that will never change and assume that python ==
python2). Probably just hard-code it to python2 for now, which is a
symlink available in most, but not all, python installations.

> +
> +import numpy as np
> +import sys
> +
> +op = sys.argv[1]
> +
> +if op not in ("idiv", "udiv", "umod"):
> +   print "invalid op:", op
> +   exit(1)
> +
> +is_signed = op == "idiv"
> +
> +if is_signed:
> +   numer = np.int32(sys.argv[2])
> +   denom = np.int32(sys.argv[3])
> +else:
> +   numer = np.uint32(sys.argv[2])
> +   denom = np.uint32(sys.argv[3])
> +
> +print op, numer, denom, "\n"

print prints a newline by default, no need for the "\n". Unless
there's a , at the end, in which case it skips the newline. Which is
what I was doing in my version since I wanted like a / b = 5 or
whatever.

> +
> +
> +if is_signed:
> +   af = np.float32(numer)
> +   bf = np.float32(denom)
> +   af = np.abs(af)
> +   bf = np.abs(bf)
> +   a = np.abs(numer).view(np.uint32)
> +   b = np.abs(denom).view(np.uint32)
> +else:
> +   af = np.float32(numer)
> +   bf = np.float32(denom)
> +   a = numer
> +   b = denom
> +
> +# get first result:
> +bf = np.reciprocal(bf)
> +bf = (bf.view(np.uint32) - np.uint32(2)).view(np.float32)
> +q = af * bf
> +
> +if is_signed:
> +   q = np.int32(q).view(np.uint32)
> +else:
> +   q = np.uint32(q).view(np.uint32)
> +
> +# get error of first result:
> +r = q * b
> +r = a - r
> +r = np.float32(r)
> +r = r * bf
> +r = np.uint32(r)
> +
> +# add quotients:
> +q = q + r
> +
> +# correction: if modulus >= divisor, add 1
> +r = q * b
> +r = a - r
> +
> +r = np.uint32(1) if r.view(np.uint32) >= b.view(np.uint32) else np.uint32(0)
> +q = q + r
> +
> +if is_signed:
> +   r = np.bitwise_xor(numer, denom)
> +   r = np.right_shift(r, 31)
> +   b = -q
> +   q = b if r else q
> +
> +if op == "umod":
> +   r = q * b
> +   q = a - r
> +
> +print "=", q.view(np.int32)
> +
> diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
> index c14c51c..20984e9 100644
> --- a/src/glsl/nir/nir.h
> +++ b/src/glsl/nir/nir.h
> @@ -1605,6 +1605,7 @@ void nir_lower_samplers(nir_shader *shader,
>
>  void nir_lower_system_values(nir_shader *shader);
>  void nir_lower_tex_projector(nir_shader *shader);
> +void nir_lower_idiv(nir_shader *shader);
>
>  void nir_lower_atomics(nir_shader *shader);
>  void nir_lower_to_source_mods(nir_shader *shader);
> diff --git a/src/glsl/nir/nir_lower_idiv.c b/src/glsl/nir/nir_lower_idiv.c
> new file mode 100644
> index 000..c2f08df
> --- /dev/null
> +++ b/src/glsl/nir/nir_lower_idiv.c
> @@ -0,0 +1,157 @@
> +/*
> + * Copyright © 2015 Red Hat
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit person

[Mesa-dev] [PATCH] nir: add lowering for idiv/udiv/umod

2015-04-03 Thread Rob Clark
From: Rob Clark 

Based on the algo from NV50LegalizeSSA::handleDIV() and handleMOD().
See also trans_idiv() in freedreno/ir3/ir3_compiler.c (which was an
adaptation of the nv50 code from Ilia Mirkin).

Also, including a py script that implements the same algo with numpy,
based on something written by Ilia (and beaten on with a hammer a bit
by me).

I've tested this on i965 hacked up to insert the idiv lowering pass.

Signed-off-by: Rob Clark 
---
 src/glsl/Makefile.sources |   1 +
 src/glsl/nir/div-lowering.py  |  75 
 src/glsl/nir/nir.h|   1 +
 src/glsl/nir/nir_lower_idiv.c | 157 ++
 4 files changed, 234 insertions(+)
 create mode 100755 src/glsl/nir/div-lowering.py
 create mode 100644 src/glsl/nir/nir_lower_idiv.c

diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources
index ffce706..5d70e88 100644
--- a/src/glsl/Makefile.sources
+++ b/src/glsl/Makefile.sources
@@ -33,6 +33,7 @@ NIR_FILES = \
nir/nir_lower_atomics.c \
nir/nir_lower_global_vars_to_local.c \
nir/nir_lower_locals_to_regs.c \
+   nir/nir_lower_idiv.c \
nir/nir_lower_io.c \
nir/nir_lower_phis_to_scalar.c \
nir/nir_lower_samplers.cpp \
diff --git a/src/glsl/nir/div-lowering.py b/src/glsl/nir/div-lowering.py
new file mode 100755
index 000..87db784
--- /dev/null
+++ b/src/glsl/nir/div-lowering.py
@@ -0,0 +1,75 @@
+#!/usr/bin/python
+
+import numpy as np
+import sys
+
+op = sys.argv[1]
+
+if op not in ("idiv", "udiv", "umod"):
+   print "invalid op:", op
+   exit(1)
+
+is_signed = op == "idiv"
+
+if is_signed:
+   numer = np.int32(sys.argv[2])
+   denom = np.int32(sys.argv[3])
+else:
+   numer = np.uint32(sys.argv[2])
+   denom = np.uint32(sys.argv[3])
+
+print op, numer, denom, "\n"
+
+
+if is_signed:
+   af = np.float32(numer)
+   bf = np.float32(denom)
+   af = np.abs(af)
+   bf = np.abs(bf)
+   a = np.abs(numer).view(np.uint32)
+   b = np.abs(denom).view(np.uint32)
+else:
+   af = np.float32(numer)
+   bf = np.float32(denom)
+   a = numer
+   b = denom
+
+# get first result:
+bf = np.reciprocal(bf)
+bf = (bf.view(np.uint32) - np.uint32(2)).view(np.float32)
+q = af * bf
+
+if is_signed:
+   q = np.int32(q).view(np.uint32)
+else:
+   q = np.uint32(q).view(np.uint32)
+
+# get error of first result:
+r = q * b
+r = a - r
+r = np.float32(r)
+r = r * bf
+r = np.uint32(r)
+
+# add quotients:
+q = q + r
+
+# correction: if modulus >= divisor, add 1
+r = q * b
+r = a - r
+
+r = np.uint32(1) if r.view(np.uint32) >= b.view(np.uint32) else np.uint32(0)
+q = q + r
+
+if is_signed:
+   r = np.bitwise_xor(numer, denom)
+   r = np.right_shift(r, 31)
+   b = -q
+   q = b if r else q
+
+if op == "umod":
+   r = q * b
+   q = a - r
+
+print "=", q.view(np.int32)
+
diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index c14c51c..20984e9 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -1605,6 +1605,7 @@ void nir_lower_samplers(nir_shader *shader,
 
 void nir_lower_system_values(nir_shader *shader);
 void nir_lower_tex_projector(nir_shader *shader);
+void nir_lower_idiv(nir_shader *shader);
 
 void nir_lower_atomics(nir_shader *shader);
 void nir_lower_to_source_mods(nir_shader *shader);
diff --git a/src/glsl/nir/nir_lower_idiv.c b/src/glsl/nir/nir_lower_idiv.c
new file mode 100644
index 000..c2f08df
--- /dev/null
+++ b/src/glsl/nir/nir_lower_idiv.c
@@ -0,0 +1,157 @@
+/*
+ * Copyright © 2015 Red Hat
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *Rob Clark 
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+
+/* Lowers idiv/udiv/umod
+ * Based on NV50LegalizeSSA::handleDIV()
+ *
+ * Note that this is probably not enough precision for compute shaders.
+ * Perhaps we want a second higher precision (looping) version of this?
+