1 year ago
#120168
GPrathap
How to improve the execution time of gradient estimation using tf.GradientTape in tensorflow 2.x
The objective is to implement https://web.casadi.org/blog/tensorflow/, which was written using Tensorflow 1.x and gpflow 1.x, using Tensorflow 2.x and gpflow 2.x .
I have implemented it in Tensorflow 2.x (I attached them at the end of the question). However, still, I could not get as faster as the initial implementation, i.e., using Tensorflow 1.x and gpflow 1.x.
Here are execution time
Using Tensorflow 1.x and gpflow 1.x.
EXIT: Optimal Solution Found.
solver : t_proc (avg) t_wall (avg) n_eval
nlp_f | 50.62ms ( 1.63ms) 15.51ms (500.32us) 31
nlp_g | 4.16ms (134.16us) 4.15ms (133.90us) 31
nlp_grad_f | 216.76ms ( 6.77ms) 153.84ms ( 4.81ms) 32
nlp_jac_g | 21.29ms (665.25us) 21.46ms (670.63us) 32
total | 332.16ms (332.16ms) 233.83ms (233.83ms) 1
Ncalls 63
Total time [s] 0.08702826499938965
Using Tensorflow 2.x and gpflow 2.x.
EXIT: Optimal Solution Found.
solver : t_proc (avg) t_wall (avg) n_eval
nlp_f | 14.11ms ( 1.76ms) 5.78ms (722.38us) 8
nlp_g | 1.03ms (129.00us) 1.05ms (130.77us) 8
nlp_grad_f | 1.23 s (136.79ms) 1.21 s (134.41ms) 9
nlp_jac_g | 6.03ms (670.00us) 6.03ms (670.34us) 9
total | 1.26 s ( 1.26 s) 1.23 s ( 1.23 s) 1
Ncalls 17
Total time [s] 0.8965554237365723
According to these execution times, it mainly depends on the gradient estimation nlp_grad_f. Tensorflow 1.x uses graph-based gradient estimation which seems to be rather faster than Tensorflow 2.x tf.GradientTape
My question as follows: What's the proper way to implement the gradient estimation in Tensorflow 2.x or how to improve this code further? I've checked https://www.tensorflow.org/api_docs/python/tf/function and what I could to make it faster. I want it to reduce as faster as I get in Tensorflow 1.x.
@tf.function
def f_k(input_dat, get_grad_val=None):
xf_tensor = input_dat
if(get_grad_val is not None):
with tf.GradientTape(persistent=True) as tape:
tape.watch(xf_tensor)
mean, _ = model.predict_y(xf_tensor)
grad_mean = tape.gradient(mean, xf_tensor, output_gradients=get_grad_val)
else:
mean, _ = model.predict_y(xf_tensor)
grad_mean = None
return mean, grad_mean
For reference, I include all the codebase, in case someone wants to execute and see it.
ocp.py
from casadi import *
T = 10. # Time horizon
N = 20 # number of control intervals
# Declare model variables
x1 = MX.sym('x1')
x2 = MX.sym('x2')
x = vertcat(x1, x2)
u = MX.sym('u')
# Model equations
xdot = vertcat((1-x2**2)*x1 - x2 + u, x1)
# Formulate discrete time dynamics
if False:
# CVODES from the SUNDIALS suite
dae = {'x':x, 'p':u, 'ode':xdot}
opts = {'tf':T/N}
F = integrator('F', 'cvodes', dae, opts)
else:
# Fixed step Runge-Kutta 4 integrator
M = 4 # RK4 steps per interval
DT = T/N/M
f = Function('f', [x, u], [xdot])
X0 = MX.sym('X0', 2)
U = MX.sym('U')
X = X0
Q = 0
for j in range(M):
k1 = f(X, U)
k2 = f(X + DT/2 * k1, U)
k3 = f(X + DT/2 * k2, U)
k4 = f(X + DT * k3, U)
X=X+DT/6*(k1 +2*k2 +2*k3 +k4)
F = Function('F', [X0, U], [X],['x0','p'],['xf'])
# Start with an empty NLP
w=[]
w0 = []
lbw = []
ubw = []
g=[]
lbg = []
ubg = []
# "Lift" initial conditions
Xk = MX.sym('X0', 2)
w += [Xk]
lbw += [0, 1]
ubw += [0, 1]
w0 += [0, 1]
# Formulate the NLP
for k in range(N):
# New NLP variable for the control
Uk = MX.sym('U_' + str(k))
w += [Uk]
lbw += [-1]
ubw += [1]
w0 += [0]
# Integrate till the end of the interval
Fk = F(x0=Xk, p=Uk)
Xk_end = Fk['xf']
# New NLP variable for state at end of interval
Xk = MX.sym('X_' + str(k+1), 2)
w += [Xk]
lbw += [-0.25, -inf]
ubw += [ inf, inf]
w0 += [0, 0]
# Add equality constraint
g += [Xk_end-Xk]
lbg += [0, 0]
ubg += [0, 0]
nd = N+1
import gpflow
import time
import tensorflow as tf
from tensorflow_casadi_mod_ocp import TensorFlowEvaluator
class GPR(TensorFlowEvaluator):
def __init__(self, model, opts={}):
initializer = tf.random_normal_initializer(mean=1., stddev=2.)
X = tf.Variable(initializer(shape=[1,nd], dtype=tf.float64), trainable=False)
@tf.function
def f_k(input_dat, get_grad_val=None):
xf_tensor = input_dat
if(get_grad_val is not None):
with tf.GradientTape(persistent=True) as tape:
tape.watch(xf_tensor)
mean, _ = model.predict_y(xf_tensor)
grad_mean = tape.gradient(mean, xf_tensor, output_gradients=get_grad_val)
else:
mean, _ = model.predict_y(xf_tensor)
grad_mean = None
return mean, grad_mean
TensorFlowEvaluator.__init__(self, [X], [f_k], model, opts=opts)
self.counter = 0
self.time = 0
def eval(self,arg):
self.counter += 1
t0 = time.time()
ret = TensorFlowEvaluator.eval(self, arg)
self.time += time.time()-t0
return [ret]
# Create
np.random.seed(0)
data = np.random.normal(loc=0.5,scale=1,size=(N,nd))
value = np.random.random((N,1))
model = gpflow.models.GPR(data=(data, value), kernel=gpflow.kernels.Constant(nd) + gpflow.kernels.Linear(nd) + gpflow.kernels.White(nd) + gpflow.kernels.RBF(nd))
optimizer = gpflow.optimizers.Scipy()
optimizer.minimize(
model.training_loss,
variables = model.trainable_variables,
options=dict(disp=True, maxiter=100),
)
import tensorflow as tf
opts = {}
opts["output_dim"] = [1, 1]
opts["grad_dim"] = [1, nd]
GPR = GPR(model, opts=opts)
w = vertcat(*w)
# # Create an NLP solver
prob = {'f': GPR(w[0::3]), 'x': w , 'g': vertcat(*g)}
# options = {"ipopt": {"hessian_approximation": "limited-memory"}}
options = {"ipopt": {"hessian_approximation": "limited-memory"}}
solver = nlpsol('solver', 'ipopt', prob,options);
sol = solver(x0=w0, lbx=lbw, ubx=ubw, lbg=lbg, ubg=ubg)
print("Ncalls",GPR.counter)
print("Total time [s]",GPR.time)
w_opt = sol['x'].full().flatten()
# Plot the solution
x1_opt = w_opt[0::3]
x2_opt = w_opt[1::3]
u_opt = w_opt[2::3]
tgrid = [T/N*k for k in range(N+1)]
import matplotlib.pyplot as plt
plt.figure(1)
plt.clf()
plt.plot(tgrid, x1_opt, '--')
plt.plot(tgrid, x2_opt, '-')
plt.step(tgrid, vertcat(DM.nan(1), u_opt), '-.')
plt.xlabel('t')
plt.legend(['x1','x2','u'])
plt.grid()
plt.show()
tensorflow_casadi_mod_ocp.py
import casadi as ca
import tensorflow as tf
from casadi import Sparsity
import gpflow
import numpy as np
class TensorFlowEvaluator(ca.Callback):
def __init__(self, t_in, t_out, model, set_init=False, opts={}):
self.set_init = set_init
self.opts = opts
ca.Callback.__init__(self)
assert isinstance(t_in,list)
self.t_in = t_in
assert isinstance(t_out, list)
self.t_out = t_out
self.output_shapes = []
self.construct("TensorFlowEvaluator", {})
self.refs = []
self.model = model
def get_n_in(self): return len(self.t_in)
def get_n_out(self): return len(self.t_out)
def get_sparsity_in(self, i):
tesnor_shape = self.t_in[i].shape
return Sparsity.dense(tesnor_shape[0], tesnor_shape[1])
def get_sparsity_out(self, i):
if(i == 0 and self.set_init is False):
tensor_shape = [self.opts["output_dim"][0], self.opts["output_dim"][1]]
elif (i == 0 and self.set_init is True):
tensor_shape = [self.opts["grad_dim"][0], self.opts["grad_dim"][1]]
else:
tensor_shape = [self.opts["output_dim"][0], self.opts["output_dim"][1]]
return Sparsity.dense(tensor_shape[0], tensor_shape[1])
def eval(self, arg):
updated_t = []
for i,v in enumerate(self.t_in):
updated_t.append(tf.Variable(arg[i].toarray()))
if(len(updated_t) == 1):
out_, grad_estimate = self.t_out[0](tf.convert_to_tensor(updated_t[0]))
else:
out_, grad_estimate = self.t_out[0](tf.convert_to_tensor(updated_t[0]), tf.convert_to_tensor(updated_t[1]))
if(len(updated_t) == 1):
selected_set = out_.numpy()
else:
selected_set = grad_estimate.numpy()
return [selected_set]
# Vanilla tensorflow offers just the reverse mode AD
def has_reverse(self,nadj): return nadj==1
def get_reverse(self, nadj, name, inames, onames, opts):
initializer = tf.random_normal_initializer(mean=1., stddev=2.)
adj_seed = [ tf.Variable(initializer(shape=self.sparsity_out(i).shape, dtype=tf.float64)) for i in range(self.n_out())]
callback = TensorFlowEvaluator(self.t_in + adj_seed, [self.t_out[0]], self.model, set_init=True, opts=self.opts)
self.refs.append(callback)
nominal_in = self.mx_in()
nominal_out = self.mx_out()
adj_seed = self.mx_out()
casadi_bal = callback.call(nominal_in + adj_seed)
return ca.Function(name, nominal_in+nominal_out+adj_seed, casadi_bal, inames, onames)
python
tensorflow
optimization
tensorflow2.0
casadi
0 Answers
Your Answer