MSE
x = torch.FloatTensor([[1, 1],
                       [2, 2]])
x_hat = torch.FloatTensor([[0, 0],
                           [0, 0]])
import torch.nn as nn
mse_loss = nn.MSELoss()
mse_loss(x_hat, x)
-------------------------------
tensor(2.5000)
import torch.nn.functional as F
F.mse_loss(x_hat, x)
----------------------------
tensor(2.5000)
F.mse_loss(x_hat, x, reduction='sum')
------------------------------
tensor(10.)
F.mse_loss(x_hat, x, reduction='none')
------------------------------
tensor([[1., 1.],
        [4., 4.]])
Gradient descent
x = torch.FloatTensor([[1, 2],
                       [3, 4]]).requires_grad_(True)
x3 = x**2 - 4
y = x3.sum()
y.backward()
print(x.grad)
----------------------
tensor([[2., 4.],
        [6., 8.]])
# Instead of implement gradient equation,
# we can use <optim class> to update model parameters, automatically.
import torch.nn.functional as F
import torch.optim as optim
optimizer = optim.SGD(model.parameters(),
                      lr=learning_rate)
# We don't need learning rate hyper-parameter for Adam.
# optimizer = optim.Adam(model.parameters())
for i in range(n_epochs):
    y_hat = model(x)
    loss = F.mse_loss(y_hat, y)
    
    # 누적된 그래디언트 값 초기화
    optimizer.zero_grad()
    # 그래디언트 계산
    loss.backward()
    
    # 파라미터 업데이트
    optimizer.step()
