Why does calculated loss differs from logged loss in pytorch_lightning - Enhance your coding expertise with HennyKo on @onlycoders.net

2 years ago

#276515

HennyKo

Why does calculated loss differs from logged loss in pytorch_lightning?

I am training a model and want to create a confusion matrix every time the validation loss improves. So, in validation_epoch_end I check if the loss of this epoch is better than any previous loss. I realized, that sometimes the loss I calculate (mean of all losses of validation_step) is not equal to the loss logged in tensorboard.

I created a little toy example below. Could this just be a rounding error? Is there a better way to know the exact loss in validation_epoch_end?

import os
import torch
from pytorch_lightning import seed_everything
from pytorch_lightning.loggers import TensorBoardLogger
from torch import nn
import torch.nn.functional as F
from torchvision.datasets import MNIST
from torch.utils.data import DataLoader, random_split
from torchvision import transforms
import pytorch_lightning as pl


class LitAutoEncoder(pl.LightningModule):
    def __init__(self):
        super().__init__()
        self.encoder = nn.Linear(28 * 28, 1)
        self.decoder = nn.Linear(1, 28 * 28)

    def forward(self, x):
        embedding = self.encoder(x)
        return embedding

    def training_step(self, batch, batch_idx):
        x, y = batch
        x = x.view(x.size(0), -1)

        x *= 42  # to get more extreme loss values in this example

        z = self.encoder(x)
        x_hat = self.decoder(z)
        loss = F.mse_loss(x_hat, x)
        self.log("train_loss", loss, on_step=True, on_epoch=True)
        return {'loss': loss, "x": x, "x_hat": x_hat}

    def training_epoch_end(self, train_step_outputs) -> None:
        train_loss = torch.stack([x['loss'] for x in train_step_outputs]).mean()  # different from logged value?
        x_s = torch.cat([x['x'] for x in train_step_outputs])
        x_hat_s = torch.cat([x['x_hat'] for x in train_step_outputs])

        loss_mean = float(train_loss.detach().cpu())
        loss_calc = F.mse_loss(x_hat_s, x_s)  # value as logged in tensorboard
        loss_calc_float = float(loss_calc.detach().cpu())

        print()
        print(train_loss, loss_calc, train_loss - loss_calc)
        print(loss_mean, loss_calc_float, loss_mean - loss_calc_float)

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
        return optimizer


if __name__ == '__main__':
    seed_everything(42)

    dataset = MNIST(os.getcwd(), download=True, transform=transforms.ToTensor())
    train, val = random_split(dataset, [1000, 59000])

    autoencoder = LitAutoEncoder()
    trainer = pl.Trainer(
        max_epochs=42,
        logger=TensorBoardLogger(save_dir='lightning_logs')
    )
    trainer.fit(autoencoder, DataLoader(train, batch_size=256, shuffle=True))

pytorch

rounding-error

pytorch-lightning

0 Answers

Your Answer

Posts

Questions

Blogs

Jobs