How to correctly test the model by using multiple test data loaders ?

# How to correctly test the model by using multiple test data loaders ?
My benechmark has two test datasets, so I just want to test those two datasets in one test epoch. However, I don't know how to correctly using those two data loader in the `test_*` fucntions. Here is my code:
## Code

### DataModule
```
class MNISTData(pl.LightningDataModule):
    
    def __init__(self, data_dir="./", batch_size=100):
        
        super(MNISTData, self).__init__()
        
        MNIST(data_dir, train=True, download=True)
        MNIST(data_dir, train=False, download=True)
        
        self.data_dir = data_dir
        self.batch_size = batch_size
        self.transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.1307,), (0.3081,))
        ])
        
    def setup(self, stage=None):

        # Assign train/val datasets for use in dataloaders
        if stage == 'fit' or stage is None:
            mnist_full = MNIST(self.data_dir, train=True, transform=self.transform)
            self.mnist_train, self.mnist_val = random_split(mnist_full, [55000, 5000])

        # Assign test dataset for use in dataloader(s)
        if stage == 'test' or stage is None:
            mnist_test = MNIST(self.data_dir, train=False, transform=self.transform)
            self.mnist_testA, self.mnist_testB = random_split(mnist_test, [5000, 5000])
            
    def train_dataloader(self):
        return DataLoader(self.mnist_train, self.batch_size, shuffle=True, num_workers=16, pin_memory=True)
    
    def val_dataloader(self):
        return DataLoader(self.mnist_val, self.batch_size, num_workers=16, pin_memory=True)
    
    def test_dataloader(self):
        return [
            DataLoader(self.mnist_testA, self.batch_size, num_workers=16, pin_memory=True),
            DataLoader(self.mnist_testB, self.batch_size, num_workers=16, pin_memory=True),
        ]
```
### Module
```
class MNISTModel(pl.LightningModule):

    def __init__(self, data_dir="./", batch_size=100, n_train=1, n_val=1, n_test=2):
        
        super(MNISTModel, self).__init__()
        
        self.n_train = n_train
        self.train_accu = MeanAccuracy()
        self.train_loss = MeanLoss()
        
        self.n_val = n_val
        self.val_accu = MeanAccuracy()
        self.val_loss = MeanLoss()
        
        self.n_test = n_test
        self.test_accu_list = nn.ModuleList([MeanAccuracy() for i in range(n_test)])
        self.test_loss_list = nn.ModuleList([MeanLoss() for i in range(n_test)])
        
        self.conv1 = nn.Sequential(
            nn.Conv2d(1, 256, 3, 1, 1, bias=False),
            nn.BatchNorm2d(256), nn.LeakyReLU()
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(256, 512, 3, 1, 1, bias=False),
            nn.BatchNorm2d(512), nn.LeakyReLU()
        )
        self.head = nn.Sequential(
            nn.Conv2d(512, 10, 1, 1, 0, bias=False), 
            nn.AdaptiveAvgPool2d(1)
        )

    def forward(self, x):
        # called with self(x)
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.head(x).view(x.shape[0], 10)
        return x

    def training_step(self, batch, batch_idx, train_idx=0):
        # REQUIRED
        x, y = batch
        y_hat = self(x)
        return y_hat, y, train_idx
    
    def training_step_end(self, res):
        y_hat, y, idx = res
        loss = self.train_loss(y_hat, y)
        accu = self.train_accu(y_hat, y)
        
        return loss
    
    def training_epoch_end(self, outs):
        mean_loss = self.train_loss.compute()
        mean_accu = self.train_accu.compute()
        log_dict = {"train_loss": mean_loss, "train_accu": mean_accu}
        self.log_dict(log_dict, prog_bar=True, on_step=False, on_epoch=True)

    def validation_step(self, batch, batch_idx, val_idx=0):
        # OPTIONAL
        x, y = batch
        y_hat = self(x)
        
        return y_hat, y, val_idx

    def validation_step_end(self, res):
        y_hat, y, idx = res
        loss = self.val_loss(y_hat, y)
        accu = self.val_accu(y_hat, y)

    def validation_epoch_end(self, losses):
        mean_loss = self.val_loss.compute()
        mean_accu = self.val_accu.compute()
        log_dict = {"val_loss": mean_loss, "val_accu": mean_accu}
        self.log_dict(log_dict, prog_bar=True, on_step=False, on_epoch=True)
        
    def test_step(self, batch, batch_idx, test_idx=0):
        x, y = batch
        y_hat = self(x)
        
        return y_hat, y, test_idx

    def test_step_end(self, res):
        y_hat, y, idx = res
        loss = self.test_loss_list[idx](y_hat, y)
        accu = self.test_accu_list[idx](y_hat, y)

    def test_epoch_end(self, losses):
        for idx in range(self.n_test):
            mean_loss = self.test_loss_list[idx].compute()
            mean_accu = self.test_accu_list[idx].compute()
            log_dic = {f"test{idx}_loss": mean_loss, f"test{idx}_accu": mean_accu}
            self.log_dict(log_dic, prog_bar=True, on_step=False, on_epoch=True)
        
    def configure_optimizers(self):
        optimizer = Adam(self.parameters(), lr=0.05)
        exp_lr = ExponentialLR(optimizer, gamma=0.98)
        return [optimizer], [exp_lr]
```



## What have you tried?
After I testing the model, I got the information like this:
```
--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test0_accu': tensor([0.5268], device='cuda:0'),
 'test0_loss': tensor([1.2401], device='cuda:0'),
 'test1_accu': tensor([0.5278], device='cuda:0'),
 'test1_loss': tensor([1.2341], device='cuda:0')}
--------------------------------------------------------------------------------
DATALOADER:1 TEST RESULTS
{'test0_accu': tensor([0.5268], device='cuda:0'),
 'test0_loss': tensor([1.2401], device='cuda:0'),
 'test1_accu': tensor([0.5278], device='cuda:0'),
 'test1_loss': tensor([1.2341], device='cuda:0')}
--------------------------------------------------------------------------------
```
So, What can I do to change the information format like this:
```
--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test0_accu': tensor([0.5268], device='cuda:0'),
 'test0_loss': tensor([1.2401], device='cuda:0')}
--------------------------------------------------------------------------------
DATALOADER:1 TEST RESULTS
{'test1_accu': tensor([0.5278], device='cuda:0'),
 'test1_loss': tensor([1.2341], device='cuda:0')}
--------------------------------------------------------------------------------
```

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

How to correctly test the model by using multiple test data loaders ? #5710

How to correctly test the model by using multiple test data loaders ?

Code

DataModule

Module

What have you tried?

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

How to correctly test the model by using multiple test data loaders ? #5710

Description

How to correctly test the model by using multiple test data loaders ?

Code

DataModule

Module

What have you tried?

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions