MSE
x = torch.FloatTensor([[1, 1],
[2, 2]])
x_hat = torch.FloatTensor([[0, 0],
[0, 0]])
import torch.nn as nn
mse_loss = nn.MSELoss()
mse_loss(x_hat, x)
-------------------------------
tensor(2.5000)
import torch.nn.functional as F
F.mse_loss(x_hat, x)
----------------------------
tensor(2.5000)
F.mse_loss(x_hat, x, reduction='sum')
------------------------------
tensor(10.)
F.mse_loss(x_hat, x, reduction='none')
------------------------------
tensor([[1., 1.],
[4., 4.]])
Gradient descent
x = torch.FloatTensor([[1, 2],
[3, 4]]).requires_grad_(True)
x3 = x**2 - 4
y = x3.sum()
y.backward()
print(x.grad)
----------------------
tensor([[2., 4.],
[6., 8.]])
# Instead of implement gradient equation,
# we can use <optim class> to update model parameters, automatically.
import torch.nn.functional as F
import torch.optim as optim
optimizer = optim.SGD(model.parameters(),
lr=learning_rate)
# We don't need learning rate hyper-parameter for Adam.
# optimizer = optim.Adam(model.parameters())
for i in range(n_epochs):
y_hat = model(x)
loss = F.mse_loss(y_hat, y)
# 누적된 그래디언트 값 초기화
optimizer.zero_grad()
# 그래디언트 계산
loss.backward()
# 파라미터 업데이트
optimizer.step()