1 year ago
#276515
HennyKo
Why does calculated loss differs from logged loss in pytorch_lightning?
I am training a model and want to create a confusion matrix every time the validation loss improves. So, in validation_epoch_end
I check if the loss of this epoch is better than any previous loss. I realized, that sometimes the loss I calculate (mean of all losses of validation_step
) is not equal to the loss logged in tensorboard.
I created a little toy example below.
Could this just be a rounding error? Is there a better way to know the exact loss in validation_epoch_end
?
import os
import torch
from pytorch_lightning import seed_everything
from pytorch_lightning.loggers import TensorBoardLogger
from torch import nn
import torch.nn.functional as F
from torchvision.datasets import MNIST
from torch.utils.data import DataLoader, random_split
from torchvision import transforms
import pytorch_lightning as pl
class LitAutoEncoder(pl.LightningModule):
def __init__(self):
super().__init__()
self.encoder = nn.Linear(28 * 28, 1)
self.decoder = nn.Linear(1, 28 * 28)
def forward(self, x):
embedding = self.encoder(x)
return embedding
def training_step(self, batch, batch_idx):
x, y = batch
x = x.view(x.size(0), -1)
x *= 42 # to get more extreme loss values in this example
z = self.encoder(x)
x_hat = self.decoder(z)
loss = F.mse_loss(x_hat, x)
self.log("train_loss", loss, on_step=True, on_epoch=True)
return {'loss': loss, "x": x, "x_hat": x_hat}
def training_epoch_end(self, train_step_outputs) -> None:
train_loss = torch.stack([x['loss'] for x in train_step_outputs]).mean() # different from logged value?
x_s = torch.cat([x['x'] for x in train_step_outputs])
x_hat_s = torch.cat([x['x_hat'] for x in train_step_outputs])
loss_mean = float(train_loss.detach().cpu())
loss_calc = F.mse_loss(x_hat_s, x_s) # value as logged in tensorboard
loss_calc_float = float(loss_calc.detach().cpu())
print()
print(train_loss, loss_calc, train_loss - loss_calc)
print(loss_mean, loss_calc_float, loss_mean - loss_calc_float)
def configure_optimizers(self):
optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
return optimizer
if __name__ == '__main__':
seed_everything(42)
dataset = MNIST(os.getcwd(), download=True, transform=transforms.ToTensor())
train, val = random_split(dataset, [1000, 59000])
autoencoder = LitAutoEncoder()
trainer = pl.Trainer(
max_epochs=42,
logger=TensorBoardLogger(save_dir='lightning_logs')
)
trainer.fit(autoencoder, DataLoader(train, batch_size=256, shuffle=True))
pytorch
rounding-error
pytorch-lightning
0 Answers
Your Answer