1 year ago
#108163
GPrathap
How to get the symbolic gradient in Tensorflow 2.x
I want to convert https://web.casadi.org/blog/tensorflow/ , which was written in Tensorflow 1 with casadi, using Tensorflow 2. I have changed the code yet tf.disable_v2_behavior()
had to be done to get it working.
import casadi as ca
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
class TensorFlowEvaluator(ca.Callback):
def __init__(self,t_in,t_out,session, opts={}):
"""
t_in: list of inputs (tensorflow placeholders)
t_out: list of outputs (tensors dependent on those placeholders)
session: a tensorflow session
"""
ca.Callback.__init__(self)
assert isinstance(t_in,list)
self.t_in = t_in
assert isinstance(t_out,list)
self.t_out = t_out
self.construct("TensorFlowEvaluator", opts)
self.session = session
self.refs = []
def get_n_in(self): return len(self.t_in)
def get_n_out(self): return len(self.t_out)
def get_sparsity_in(self,i):
return ca.Sparsity.dense(*self.t_in[i].get_shape().as_list())
def get_sparsity_out(self,i):
return ca.Sparsity.dense(*self.t_out[i].get_shape().as_list())
def eval(self,arg):
# Associate each tensorflow input with the numerical argument passed by CasADi
d = dict((v,arg[i].toarray()) for i,v in enumerate(self.t_in))
# Evaluate the tensorflow expressions
ret = self.session.run(self.t_out,feed_dict=d)
return ret
# Vanilla tensorflow offers just the reverse mode AD
def has_reverse(self,nadj): return nadj==1
def get_reverse(self,nadj,name,inames,onames,opts):
# Construct tensorflow placeholders for the reverse seeds
adj_seed = [tf.placeholder(shape=self.sparsity_out(i).shape,dtype=tf.float64) for i in range(self.n_out())]
# Construct the reverse tensorflow graph through 'gradients'
grad = tf.gradients(self.t_out, self.t_in,grad_ys=adj_seed)
# Create another TensorFlowEvaluator object
callback = TensorFlowEvaluator(self.t_in+adj_seed,grad,self.session)
# Make sure you keep a reference to it
self.refs.append(callback)
# Package it in the nominal_in+nominal_out+adj_seed form that CasADi expects
nominal_in = self.mx_in()
nominal_out = self.mx_out()
adj_seed = self.mx_out()
return ca.Function(name,nominal_in+nominal_out+adj_seed,callback.call(nominal_in+adj_seed),inames,onames)
if __name__=="__main__":
a = tf.placeholder(shape=(2,2),dtype=tf.float64)
b = tf.placeholder(shape=(2,1),dtype=tf.float64)
y = tf.matmul(tf.sin(a), b)
with tf.Session() as session:
f_tf = TensorFlowEvaluator([a,b], [y], session)
a = ca.MX.sym("a",2,2)
b = ca.MX.sym("a",2,1)
y = f_tf(a,b)
yref = ca.mtimes(ca.sin(a),b)
f = ca.Function('f',[a,b],[ca.jacobian(y,a)])
fref = ca.Function('f',[a,b],[ca.jacobian(yref,a)])
print(f(ca.DM([[1,2],[3,4]]),ca.DM([[1],[3]])))
print(fref(ca.DM([[1,2],[3,4]]),ca.DM([[1],[3]])))
Now I want to write this purely using Tensorflow 2.x. Eager execution is enabled by default I was thinking to use @tf.function
to calculate the gradient,
@tf.function
def f_k(input_dat):
y = tf.matmul(tf.sin(input_dat[0]), input_dat[1])
grads = tf.gradients([y], input_dat)
# grads = tape.gradient([y], input_dat)
tf.print('tf >>', grads)
print('print >>', grads)
return y, grads
Here is the update the code at the moment,
import casadi as ca
import tensorflow as tf
from casadi import Sparsity
class TensorFlowEvaluator(ca.Callback):
def __init__(self, t_in, t_out, model, opts={}):
"""
t_in: list of inputs (tensorflow placeholders)
t_out: list of outputs (tensors dependent on those placeholders)
"""
ca.Callback.__init__(self)
assert isinstance(t_in,list)
self.t_in = t_in
assert isinstance(t_out,list)
self.t_out = t_out
self.construct("TensorFlowEvaluator", opts)
self.refs = []
self.model = model
def get_n_in(self): return len(self.t_in)
def get_n_out(self): return len(self.t_out)
def get_sparsity_in(self, i):
tesnor_shape = self.t_in[i].get_shape().as_list()
return Sparsity.dense(tesnor_shape[0], tesnor_shape[1])
# return Sparsity.dense(4, 1)
def get_sparsity_out(self, i):
return Sparsity.dense(2, 1)
def eval(self, arg):
# Associate each tensorflow input with the numerical argument passed by CasADi
print(arg)
# d = dict((v, arg[i].toarray()) for i,v in enumerate(self.t_in))
updated_t = []
for i,v in enumerate(self.t_in):
updated_t.append(tf.Variable(arg[i].toarray()))
# Evaluate the tensorflow expressions
if not tf.is_tensor(self.t_out[0]):
ret = self.t_out[0](updated_t)[0].numpy()
else:
ret = self.t_out[0](updated_t).numpy()
return [ca.DM(ret)]
# Vanilla tensorflow offers just the reverse mode AD
def has_reverse(self,nadj): return nadj==1
def get_reverse(self, nadj, name, inames, onames, opts):
initializer = tf.random_normal_initializer(mean=1., stddev=2.)
adj_seed = [ tf.Variable(initializer(shape=self.sparsity_out(i).shape, dtype=tf.float64)) for i in range(self.n_out())]
tf.config.run_functions_eagerly(False)
print("=============== self.t_in========", self.t_out)
print("=============== self.t_out========", self.t_in)
# grad = tape.gradient(mean, self.t_in, output_gradients=adj_seed)
out_, grad = self.t_out[0](self.t_in)
print("============== grad========", grad)
# Create another TensorFlowEvaluator object
callback = TensorFlowEvaluator(self.t_in + adj_seed, grad, self.model)
# Make sure you keep a reference to it
self.refs.append(callback)
# Package it in the nominal_in+nominal_out+adj_seed form that CasADi expects
nominal_in = self.mx_in()
nominal_out = self.mx_out()
adj_seed = self.mx_out()
return ca.Function(name, nominal_in+nominal_out+adj_seed, callback.call(nominal_in + adj_seed), inames, onames)
if __name__=="__main__":
initializer = tf.random_normal_initializer(mean=1., stddev=2.)
a = tf.Variable(initializer(shape=(2,2), dtype=tf.float64))
b = tf.Variable(initializer(shape=(2,1), dtype=tf.float64))
@tf.function
def f_k(input_dat):
y = tf.matmul(tf.sin(input_dat[0]), input_dat[1])
grads = tf.gradients([y], input_dat)
# grads = tape.gradient([y], input_dat)
tf.print('tf >>', grads)
print('print >>', grads)
return y, grads
f_tf = TensorFlowEvaluator([a,b], [f_k], None)
a = ca.MX.sym("a",2,2)
b = ca.MX.sym("a",2,1)
y = f_tf(a,b)
yref = ca.mtimes(ca.sin(a),b)
f = ca.Function('f',[a,b],[ca.jacobian(y,a)])
fref = ca.Function('f',[a,b],[ca.jacobian(yref,a)])
print(fref(ca.DM([[1,2],[3,4]]),ca.DM([[1],[3]])))
print(f(ca.DM([[1,2],[3,4]]),ca.DM([[1],[3]])))
Problem:
In the get_reverse
method, when calculating the gradient, i.e., grad = tf.gradients(self.t_out, self.t_in,grad_ys=adj_seed)
, I get symbolic form, i.e., [<tf.Tensor 'gradients/Sin_grad/mul:0' shape=(2, 2) dtype=float32>, <tf.Tensor 'gradients/MatMul_grad/MatMul_1:0' shape=(2, 1) dtype=float32>]
in Tensorflow 1.
However, in Tensorflow 2, I always get numerical results. I can access the graph but those are not callable. self.t_out[0].get_concrete_function(self.t_in).graph
similar to here
What would be the better way to get the symbolic gradient like in Tensorflow 1?
Expected Behaviour:
out_, grad = self.t_out[0](self.t_in)
grad
should return symbolic form of the gradient rather than numerical evaluation
python
tensorflow
tensorflow2.0
casadi
0 Answers
Your Answer