[theano-users] Re: Unused input error with chained OpFromGraph ops

2017-07-17 Thread nicolas . granger . m
Hello,

I still haven't managed to trace the error down. Below is a shorter example 
that triggers the error. It seems theano tries to create a variable for the 
output gradient for a node through which I do not back propagate. At some 
point it hits a DisconnectedType instance and raises an error.

import numpy as np
import theano.tensor as T
import theano


def make_ops():
x_var = T.vector()
m_var = T.bvector()

r = m_var.sum().astype('floatX')
z = x_var * m_var / r


def grad_op1(inputs, output_gradients):
pass
return [
output_gradients[0],  # computation delegated to op2
theano.gradient.DisconnectedType()()
]


op1 = theano.OpFromGraph(
inputs=[x_var, m_var],
outputs=[z, r],
grad_overrides=grad_op1,
inline=True,
name="op1")

return op1


op1 = make_ops()
x_var = T.vector()
m_var = T.bvector()
z, r = op1(x_var, m_var)

g = theano.grad(T.sum(z), wrt=x_var)
print(g.eval({x_var: np.array([1., .3, .0, .2], dtype=np.float32),
  m_var: np.array([1, 0, 1, 1], dtype=np.int8)}))

output:
TypeError: Cannot convert Type DisconnectedType (of Variable <
DisconnectedType>) into Type TensorType(float32, scalar). You can try to 
manually convert  into a TensorType(float32, scalar).

Process finished with exit code 1



Le jeudi 13 juillet 2017 13:03:28 UTC+2, nicolas@gmail.com a écrit :
>
> Hi,
>
> Thank you for the suggestion, actually inlining makes more sense for what 
> I am trying to do. 
>
> However, a casting issue arises when trying to compute the derivative wrt 
> to the continuous input. If I understood correctly, DisconnectedInput 
> should be returned as the gradient for integral inputs (or inputs wrt which 
> I don't need the derivative) right?
>
> Below is the slightly modified code which illustrate this new issue:
>
> import numpy as np
> import theano.tensor as T
> import theano
>
>
> def make_ops():
> x_var = T.vector()
> m_var = T.bvector()
>
> r = m_var.sum().astype('floatX')
> z = x_var * m_var / r
>
>
> def grad_op1(inputs, output_gradients):
> return [
> output_gradients[0],  # computation delegated to op2
> theano.gradient.DisconnectedType()(),
> ]
>
>
> op1 = theano.OpFromGraph(
> inputs=[x_var, m_var],
> outputs=[z, r],
> grad_overrides=grad_op1,
> inline=True)
>
>
> z_var = T.vector()
> r_var = T.scalar()
>
> def grad_op2(inputs, output_gradients):
> _, m_, r_ = inputs
> return [
> m_ * r_,
> theano.gradient.DisconnectedType()(),
> theano.gradient.DisconnectedType()()
> ]
>
> op2 = theano.OpFromGraph(
> inputs=[z_var, m_var, r_var],
> outputs=[z_var],
> grad_overrides=grad_op2,
> inline=True)
>
> return op1, op2
>
>
> op1, op2 = make_ops()
> x_var = T.vector()
> m_var = T.bvector()
> z_, r = op1(x_var, m_var)
> z = op2(z_, m_var, r)
>
> g = theano.grad(T.sum(z), wrt=x_var)
> print(g.eval({x_var: np.array([1., .3, .0, .2], dtype=np.float32),
>   m_var: np.array([1, 0, 1, 1], dtype=np.int8)}))
>
>
>
> Le mardi 11 juillet 2017 11:32:50 UTC+2, nicolas@gmail.com a écrit :
>>
>> Hi,
>>
>> I am trying to split an computation over two ops in order to avoid 
>> spurious computations when computing the gradient.
>> My current attempt uses a first op which returns the desired result for 
>> the forward part and extra intermediate results. The second op just 
>> forwards the desired result, but its grad is overriden to compute the 
>> gradient based on the intermediate results.
>>
>> In this configuration, Theano complains about unused inputs in the 
>> forward computation because the intermediate results are not used for the 
>> forward method of the second op.
>>
>> Is this an expected behaviour or a bug?
>>
>> 
>>
>> import numpy as np
>> import theano.tensor as T
>> import theano
>>
>>
>> def make_ops():
>> x_var = T.vector()
>> m_var = T.bvector()
>>
>> r = m_var.sum().astype('floatX')
>> z = x_var * m_var / r
>>
>>
>> def grad_op1(inputs, output_gradients):
>> return [
>> output_gradients[0],  # computation delegated to op2
>> theano.gradient.DisconnectedType()()
>> ]
>>
>>
>> op1 = theano.OpFromGraph(
>> inputs=[x_var, m_var],
>> outputs=[z, r],
>> grad_overrides=grad_op1)
>>
>>
>> z_var = T.vector()
>> r_var = T.scalar()
>>
>> def grad_op2(inputs, output_gradients):
>> _, m_, r_ = inputs
>> return [
>> m_ * r_,
>> theano.gradient.DisconnectedType()(),
>> theano.gradient.DisconnectedType()()
>> ]
>>
>> op2 = theano.OpFromGraph(
>> inputs=[z_var, m_var, r_var],
>> outputs=[z_var],
>> grad_overrides=grad_op2)
>>
>> return op1, op2
>>
>>
>> op1, op2 = make_ops

[theano-users] Re: Unused input error with chained OpFromGraph ops

2017-07-13 Thread nicolas . granger . m
Hi,

Thank you for the suggestion, actually inlining makes more sense for what I 
am trying to do. 

However, a casting issue arises when trying to compute the derivative wrt 
to the continuous input. If I understood correctly, DisconnectedInput 
should be returned as the gradient for integral inputs (or inputs wrt which 
I don't need the derivative) right?

Below is the slightly modified code which illustrate this new issue:

import numpy as np
import theano.tensor as T
import theano


def make_ops():
x_var = T.vector()
m_var = T.bvector()

r = m_var.sum().astype('floatX')
z = x_var * m_var / r


def grad_op1(inputs, output_gradients):
return [
output_gradients[0],  # computation delegated to op2
theano.gradient.DisconnectedType()(),
]


op1 = theano.OpFromGraph(
inputs=[x_var, m_var],
outputs=[z, r],
grad_overrides=grad_op1,
inline=True)


z_var = T.vector()
r_var = T.scalar()

def grad_op2(inputs, output_gradients):
_, m_, r_ = inputs
return [
m_ * r_,
theano.gradient.DisconnectedType()(),
theano.gradient.DisconnectedType()()
]

op2 = theano.OpFromGraph(
inputs=[z_var, m_var, r_var],
outputs=[z_var],
grad_overrides=grad_op2,
inline=True)

return op1, op2


op1, op2 = make_ops()
x_var = T.vector()
m_var = T.bvector()
z_, r = op1(x_var, m_var)
z = op2(z_, m_var, r)

g = theano.grad(T.sum(z), wrt=x_var)
print(g.eval({x_var: np.array([1., .3, .0, .2], dtype=np.float32),
  m_var: np.array([1, 0, 1, 1], dtype=np.int8)}))



Le mardi 11 juillet 2017 11:32:50 UTC+2, nicolas@gmail.com a écrit :
>
> Hi,
>
> I am trying to split an computation over two ops in order to avoid 
> spurious computations when computing the gradient.
> My current attempt uses a first op which returns the desired result for 
> the forward part and extra intermediate results. The second op just 
> forwards the desired result, but its grad is overriden to compute the 
> gradient based on the intermediate results.
>
> In this configuration, Theano complains about unused inputs in the forward 
> computation because the intermediate results are not used for the forward 
> method of the second op.
>
> Is this an expected behaviour or a bug?
>
> 
>
> import numpy as np
> import theano.tensor as T
> import theano
>
>
> def make_ops():
> x_var = T.vector()
> m_var = T.bvector()
>
> r = m_var.sum().astype('floatX')
> z = x_var * m_var / r
>
>
> def grad_op1(inputs, output_gradients):
> return [
> output_gradients[0],  # computation delegated to op2
> theano.gradient.DisconnectedType()()
> ]
>
>
> op1 = theano.OpFromGraph(
> inputs=[x_var, m_var],
> outputs=[z, r],
> grad_overrides=grad_op1)
>
>
> z_var = T.vector()
> r_var = T.scalar()
>
> def grad_op2(inputs, output_gradients):
> _, m_, r_ = inputs
> return [
> m_ * r_,
> theano.gradient.DisconnectedType()(),
> theano.gradient.DisconnectedType()()
> ]
>
> op2 = theano.OpFromGraph(
> inputs=[z_var, m_var, r_var],
> outputs=[z_var],
> grad_overrides=grad_op2)
>
> return op1, op2
>
>
> op1, op2 = make_ops()
> x_var = T.vector()
> m_var = T.bvector()
> z_, r = op1(x_var, m_var)
> z = op2(z_, m_var, r)
>
> print(z_.eval({x_var: np.array([1., .3, .0, .2], dtype=np.float32),
>m_var: np.array([1, 0, 1, 1], dtype=np.int8)}))
>
> f = theano.function([x_var, m_var], [z], on_unused_input='ignore')  # 
> raises anyway
>
> print(f(np.array([1., .3, .0, .2], dtype=np.float32),
>   np.array([1, 0, 1, 1], dtype=np.int8)))
>
> # g = theano.grad(T.sum(z), wrt=x_var)
> # print(g.eval({x_var: np.array([1., .3, .0, .2], dtype=np.float32),
> #   m_var: np.array([1, 0, 1, 1], dtype=np.int8)}))
>
>
>

-- 

--- 
You received this message because you are subscribed to the Google Groups 
"theano-users" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to theano-users+unsubscr...@googlegroups.com.
For more options, visit https://groups.google.com/d/optout.


[theano-users] Re: Unused input error with chained OpFromGraph ops

2017-07-11 Thread Pascal Lamblin
That error actually comes from the function compilation that happens inside 
op2.
One option is to add "inline=True" to the call to OpFromGraph returning 
op2, in that case the gradient gets overriden, but you will have the 
inlined subgraph in the global one, and OpFromGraph will not build an inner 
function.
The other one would be to add "on_unused_input='ignore'" to the call to 
OpFromGraph:

op2 = theano.OpFromGraph(
inputs=[z_var, m_var, r_var],
outputs=[z_var],
grad_overrides=grad_op2,
on_unused_input='ignore')

With it, the program worked, but I still got an error in g.eval(...):
TypeError: Cannot convert Type DisconnectedType (of Variable 
) into Type TensorType(float64, scalar). You can try to 
manually convert  into a TensorType(float64, scalar).
I'm not sure where that comes from, please let us know if you do not figure 
it out.

On Tuesday, July 11, 2017 at 5:32:50 AM UTC-4, nicolas.grange...@gmail.com 
wrote:
>
> Hi,
>
> I am trying to split an computation over two ops in order to avoid 
> spurious computations when computing the gradient.
> My current attempt uses a first op which returns the desired result for 
> the forward part and extra intermediate results. The second op just 
> forwards the desired result, but its grad is overriden to compute the 
> gradient based on the intermediate results.
>
> In this configuration, Theano complains about unused inputs in the forward 
> computation because the intermediate results are not used for the forward 
> method of the second op.
>
> Is this an expected behaviour or a bug?
>
> 
>
> import numpy as np
> import theano.tensor as T
> import theano
>
>
> def make_ops():
> x_var = T.vector()
> m_var = T.bvector()
>
> r = m_var.sum().astype('floatX')
> z = x_var * m_var / r
>
>
> def grad_op1(inputs, output_gradients):
> return [
> output_gradients[0],  # computation delegated to op2
> theano.gradient.DisconnectedType()()
> ]
>
>
> op1 = theano.OpFromGraph(
> inputs=[x_var, m_var],
> outputs=[z, r],
> grad_overrides=grad_op1)
>
>
> z_var = T.vector()
> r_var = T.scalar()
>
> def grad_op2(inputs, output_gradients):
> _, m_, r_ = inputs
> return [
> m_ * r_,
> theano.gradient.DisconnectedType()(),
> theano.gradient.DisconnectedType()()
> ]
>
> op2 = theano.OpFromGraph(
> inputs=[z_var, m_var, r_var],
> outputs=[z_var],
> grad_overrides=grad_op2)
>
> return op1, op2
>
>
> op1, op2 = make_ops()
> x_var = T.vector()
> m_var = T.bvector()
> z_, r = op1(x_var, m_var)
> z = op2(z_, m_var, r)
>
> print(z_.eval({x_var: np.array([1., .3, .0, .2], dtype=np.float32),
>m_var: np.array([1, 0, 1, 1], dtype=np.int8)}))
>
> f = theano.function([x_var, m_var], [z], on_unused_input='ignore')  # 
> raises anyway
>
> print(f(np.array([1., .3, .0, .2], dtype=np.float32),
>   np.array([1, 0, 1, 1], dtype=np.int8)))
>
> # g = theano.grad(T.sum(z), wrt=x_var)
> # print(g.eval({x_var: np.array([1., .3, .0, .2], dtype=np.float32),
> #   m_var: np.array([1, 0, 1, 1], dtype=np.int8)}))
>
>
>

-- 

--- 
You received this message because you are subscribed to the Google Groups 
"theano-users" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to theano-users+unsubscr...@googlegroups.com.
For more options, visit https://groups.google.com/d/optout.