How to get the symbolic gradient in Tensorflow 2.x

2 years ago

#108163

GPrathap

I want to convert https://web.casadi.org/blog/tensorflow/ , which was written in Tensorflow 1 with casadi, using Tensorflow 2. I have changed the code yet tf.disable_v2_behavior() had to be done to get it working.

  import casadi as ca
  import tensorflow.compat.v1 as tf
  tf.disable_v2_behavior()

  class TensorFlowEvaluator(ca.Callback):
    def __init__(self,t_in,t_out,session, opts={}):
      """
        t_in: list of inputs (tensorflow placeholders)
        t_out: list of outputs (tensors dependent on those placeholders)
        session: a tensorflow session
      """
      ca.Callback.__init__(self)
      assert isinstance(t_in,list)
      self.t_in = t_in
      assert isinstance(t_out,list)
      self.t_out = t_out
      self.construct("TensorFlowEvaluator", opts)
      self.session = session
      self.refs = []

    def get_n_in(self): return len(self.t_in)
    def get_n_out(self): return len(self.t_out)

    def get_sparsity_in(self,i):
        return ca.Sparsity.dense(*self.t_in[i].get_shape().as_list())

    def get_sparsity_out(self,i):
        return ca.Sparsity.dense(*self.t_out[i].get_shape().as_list())

    def eval(self,arg):
      # Associate each tensorflow input with the numerical argument passed by CasADi
      d = dict((v,arg[i].toarray()) for i,v in enumerate(self.t_in))
      # Evaluate the tensorflow expressions
      ret = self.session.run(self.t_out,feed_dict=d)
      return ret

    # Vanilla tensorflow offers just the reverse mode AD
    def has_reverse(self,nadj): return nadj==1
    def get_reverse(self,nadj,name,inames,onames,opts):
      # Construct tensorflow placeholders for the reverse seeds
      adj_seed = [tf.placeholder(shape=self.sparsity_out(i).shape,dtype=tf.float64) for i in range(self.n_out())]
      # Construct the reverse tensorflow graph through 'gradients'
      grad = tf.gradients(self.t_out, self.t_in,grad_ys=adj_seed)
      # Create another TensorFlowEvaluator object
      callback = TensorFlowEvaluator(self.t_in+adj_seed,grad,self.session)
      # Make sure you keep a reference to it
      self.refs.append(callback)

      # Package it in the nominal_in+nominal_out+adj_seed form that CasADi expects
      nominal_in = self.mx_in()
      nominal_out = self.mx_out()
      adj_seed = self.mx_out()
      return ca.Function(name,nominal_in+nominal_out+adj_seed,callback.call(nominal_in+adj_seed),inames,onames)


  if __name__=="__main__":

    a = tf.placeholder(shape=(2,2),dtype=tf.float64)
    b = tf.placeholder(shape=(2,1),dtype=tf.float64)

    y = tf.matmul(tf.sin(a), b)

    with tf.Session() as session:
      f_tf = TensorFlowEvaluator([a,b], [y], session)

      a = ca.MX.sym("a",2,2)
      b = ca.MX.sym("a",2,1)
      y = f_tf(a,b)
      yref = ca.mtimes(ca.sin(a),b)

      f = ca.Function('f',[a,b],[ca.jacobian(y,a)])
      fref = ca.Function('f',[a,b],[ca.jacobian(yref,a)])
      print(f(ca.DM([[1,2],[3,4]]),ca.DM([[1],[3]])))
      print(fref(ca.DM([[1,2],[3,4]]),ca.DM([[1],[3]])))

Now I want to write this purely using Tensorflow 2.x. Eager execution is enabled by default I was thinking to use @tf.function to calculate the gradient,

  @tf.function
  def f_k(input_dat):
      y = tf.matmul(tf.sin(input_dat[0]), input_dat[1])
      grads = tf.gradients([y], input_dat)
      # grads = tape.gradient([y], input_dat)
      tf.print('tf >>', grads)
      print('print >>', grads)
      return y, grads

Here is the update the code at the moment,

  import casadi as ca
  import tensorflow as tf
  from casadi import Sparsity

  class TensorFlowEvaluator(ca.Callback):
    def __init__(self, t_in, t_out, model, opts={}):
      """
        t_in: list of inputs (tensorflow placeholders)
        t_out: list of outputs (tensors dependent on those placeholders)
      """
      ca.Callback.__init__(self)
      assert isinstance(t_in,list)
      self.t_in = t_in
      assert isinstance(t_out,list)
      self.t_out = t_out
      self.construct("TensorFlowEvaluator", opts)
      self.refs = []
      self.model = model

    def get_n_in(self): return len(self.t_in)
    def get_n_out(self): return len(self.t_out)

    def get_sparsity_in(self, i):
        tesnor_shape = self.t_in[i].get_shape().as_list()
        return Sparsity.dense(tesnor_shape[0], tesnor_shape[1])
        # return Sparsity.dense(4, 1)

    def get_sparsity_out(self, i):
        return Sparsity.dense(2, 1)

    def eval(self, arg):
      # Associate each tensorflow input with the numerical argument passed by CasADi
      print(arg)
      # d = dict((v, arg[i].toarray()) for i,v in enumerate(self.t_in))
      updated_t = []
      for i,v in enumerate(self.t_in):
          updated_t.append(tf.Variable(arg[i].toarray()))
      # Evaluate the tensorflow expressions
      if not tf.is_tensor(self.t_out[0]):
          ret = self.t_out[0](updated_t)[0].numpy()
      else: 
          ret = self.t_out[0](updated_t).numpy()
      return [ca.DM(ret)]

    # Vanilla tensorflow offers just the reverse mode AD
    def has_reverse(self,nadj): return nadj==1
    def get_reverse(self, nadj, name, inames, onames, opts):
      initializer = tf.random_normal_initializer(mean=1., stddev=2.)
      adj_seed = [ tf.Variable(initializer(shape=self.sparsity_out(i).shape, dtype=tf.float64)) for i in range(self.n_out())]
      tf.config.run_functions_eagerly(False)
      print("=============== self.t_in========", self.t_out)
      print("=============== self.t_out========", self.t_in)
      # grad = tape.gradient(mean, self.t_in, output_gradients=adj_seed)
      out_, grad = self.t_out[0](self.t_in)
      print("============== grad========", grad)
      # Create another TensorFlowEvaluator object
      callback = TensorFlowEvaluator(self.t_in + adj_seed, grad, self.model)
      # Make sure you keep a reference to it
      self.refs.append(callback)

      # Package it in the nominal_in+nominal_out+adj_seed form that CasADi expects
      nominal_in = self.mx_in()
      nominal_out = self.mx_out()
      adj_seed = self.mx_out()
      return ca.Function(name, nominal_in+nominal_out+adj_seed, callback.call(nominal_in + adj_seed), inames, onames)


  if __name__=="__main__":

    initializer = tf.random_normal_initializer(mean=1., stddev=2.)
    a = tf.Variable(initializer(shape=(2,2), dtype=tf.float64))
    b = tf.Variable(initializer(shape=(2,1), dtype=tf.float64))

    @tf.function
    def f_k(input_dat):
        y = tf.matmul(tf.sin(input_dat[0]), input_dat[1])
        grads = tf.gradients([y], input_dat)
        # grads = tape.gradient([y], input_dat)
        tf.print('tf >>', grads)
        print('print >>', grads)
        return y, grads
   
    
    f_tf = TensorFlowEvaluator([a,b], [f_k], None)

    a = ca.MX.sym("a",2,2)
    b = ca.MX.sym("a",2,1)
    y = f_tf(a,b)

    yref = ca.mtimes(ca.sin(a),b)

    f = ca.Function('f',[a,b],[ca.jacobian(y,a)])
    fref = ca.Function('f',[a,b],[ca.jacobian(yref,a)])
    print(fref(ca.DM([[1,2],[3,4]]),ca.DM([[1],[3]])))
    print(f(ca.DM([[1,2],[3,4]]),ca.DM([[1],[3]])))

Problem: In the get_reverse method, when calculating the gradient, i.e., grad = tf.gradients(self.t_out, self.t_in,grad_ys=adj_seed), I get symbolic form, i.e., [<tf.Tensor 'gradients/Sin_grad/mul:0' shape=(2, 2) dtype=float32>, <tf.Tensor 'gradients/MatMul_grad/MatMul_1:0' shape=(2, 1) dtype=float32>] in Tensorflow 1.

However, in Tensorflow 2, I always get numerical results. I can access the graph but those are not callable. self.t_out[0].get_concrete_function(self.t_in).graph similar to here

What would be the better way to get the symbolic gradient like in Tensorflow 1?

Expected Behaviour:

 out_, grad = self.t_out[0](self.t_in)

grad should return symbolic form of the gradient rather than numerical evaluation

python

tensorflow

tensorflow2.0

casadi

0 Answers

Your Answer

Posts

Questions

Blogs

Jobs