# Example: [3,4] linear conversion ->[3,2] # y = x@w + b x:[3,4] w:[4,2] b:[2], y:[3] # y one-hot depth = 2
# ------------------------Tensorflow ----------------------------- x = tf.random.uniform([3,4]) w = tf.random.uniform([4,2]) b = tf.zeros([2]) y = tf.constant([0, 1, 1])
with tf.GradientTape() as tape: # if the tensors are not variables tape.watch([w,b]) logits = x @ w + b probs = tf.nn.softmax(logits) y_true = tf.one_hot(y, depth=2) losses = tf.losses.MSE(y_true,probs) loss = tf.reduce_mean(losses) grads = tape.gradient(loss, [w,b])
grads_w = grads[0] grads_b = grads[1]
print(loss) print(grads[0]) print(grads[1])
# ------------------------PyTorch --------------------------------- defone_hot(label, depth): out = torch.zeros(label.size(0), depth) idx = torch.LongTensor(label).view(-1, 1) out.scatter_(dim=1, index=idx, value=1) return out
x = torch.rand(3,4) w = torch.rand([4,2], requires_grad=True) b = torch.zeros([2], requires_grad=True) y = torch.LongTensor([0, 1, 1])
# if "requires_grad=Flase" # w.requires_grad_() # b.requires_grad_()
logits = x @ w +b probs = F.softmax(logits, dim = 1)
y_true = one_hot(y, depth=2) loss = F.mse_loss(y_true, probs)
# ------------------------Tensorflow ----------------------------- x = tf.Variable([0.,0.])
lr = 0.001 optimizer = tf.optimizers.SGD(lr)
for step in range(30000): with tf.GradientTape() as tape: pred = himmelblau(x) grads = tape.gradient(pred, [x]) optimizer.apply_gradients(grads_and_vars = zip(grads, [x]))
if(step % 2000 == 0): print('step {}: x = {}, pred = {}' .format(step, x.numpy(), pred.numpy()))
# ------------------------PyTorch --------------------------------- x = torch.tensor([0.,0.], requires_grad=True)
lr = 0.001 optimizer = torch.optim.SGD([x],lr=lr)
for step in range(30000): pred = himmelblau(x) optimizer.zero_grad() pred.backward() optimizer.step() if(step % 2000 == 0): print('step {}: x = {}, pred = {}' .format(step, x.tolist(), pred.item()))