prep cnn

bartzbeielstein · bartzbeielstein · commit 4c705aba987f · 2023-07-24T23:41:53.000+02:00
diff --git a/src/spotPython/light/cifar10/cifar10datamodule.py b/src/spotPython/light/cifar10/cifar10datamodule.py
@@ -9,6 +9,12 @@ class CIFAR10DataModule(pl.LightningDataModule):
     """
     A LightningDataModule for handling CIFAR10 data.
 
+    Note: Torchvision provides many built-in datasets in the torchvision.datasets module,
+        as well as utility classes for building your own datasets. All datasets are subclasses
+        of torch.utils.data.Dataset i.e, they have __getitem__ and __len__ methods implemented.
+        Hence, they can all be passed to a torch.utils.data.DataLoader which can load multiple
+        samples in parallel using torch.multiprocessing workers, see [1].
+
     Args:
         batch_size (int): The size of the batch.
         data_dir (str): The directory where the data is stored. Defaults to "./data".
@@ -18,6 +24,9 @@ class CIFAR10DataModule(pl.LightningDataModule):
         data_train (Dataset): The training dataset.
         data_val (Dataset): The validation dataset.
         data_test (Dataset): The test dataset.
+
+    References:
+        [1] [https://pytorch.org/vision/stable/datasets.html](https://pytorch.org/vision/stable/datasets.html)
     """
 
     def __init__(self, batch_size: int, data_dir: str = "./data", num_workers: int = 0):
@@ -40,22 +49,21 @@ def setup(self, stage: Optional[str] = None) -> None:
             stage (Optional[str]): The current stage. Defaults to None.
 
         """
+        # Assign appropriate data transforms, see
+        # https://lightning.ai/docs/pytorch/latest/notebooks/course_UvA-DL/04-inception-resnet-densenet.html
+        DATA_MEANS = (0.49139968, 0.48215841, 0.44653091)
+        DATA_STDS = (0.24703223, 0.24348513, 0.26158784)
+        transform = transforms.Compose(
+                [transforms.ToTensor(), transforms.Normalize(DATA_MEANS, DATA_STDS)]
+            )
         # Assign train/val datasets for use in dataloaders
         if stage == "fit" or stage is None:
-            transform = transforms.Compose(
-                [transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]
-            )
             data_full = CIFAR10(root=self.data_dir, train=True, transform=transform)
-            # self.data_train, self.data_val = random_split(daata_full, [45000, 5000])
             test_abs = int(len(data_full) * 0.6)
-            print("dm.setup(): test_abs", test_abs)
             self.data_train, self.data_val = random_split(data_full, [test_abs, len(data_full) - test_abs])
 
         # Assign test dataset for use in dataloader(s)
         if stage == "test" or stage is None:
-            transform = transforms.Compose(
-                [transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]
-            )
             self.data_test = CIFAR10(root=self.data_dir, train=False, transform=transform)
 
     def train_dataloader(self) -> DataLoader:
diff --git a/src/spotPython/light/cnn/netcnnbase.py b/src/spotPython/light/cnn/netcnnbase.py
@@ -32,13 +32,6 @@
 from torchvision import transforms
 from torchvision.datasets import CIFAR10
 
-matplotlib_inline.backend_inline.set_matplotlib_formats("svg", "pdf")  # For export
-matplotlib.rcParams["lines.linewidth"] = 2.0
-sns.reset_orig()
-
-# PyTorch
-# Torchvision
-
 
 class NetCNNBase(L.LightningModule):
     def __init__(self, model_name, model_hparams, optimizer_name, optimizer_hparams):
diff --git a/src/spotPython/light/traintest_NEW.py b/src/spotPython/light/traintest_NEW.py
@@ -1,5 +1,5 @@
 import lightning as L
-from spotPython.light.cifar10datamodule import CIFAR10DataModule
+from spotPython.light.cifar10.cifar10datamodule import CIFAR10DataModule
 from spotPython.light.crossvalidationdatamodule import CrossValidationDataModule
 from spotPython.utils.eda import generate_config_id
 from pytorch_lightning.loggers import TensorBoardLogger